aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--grep.c45
-rw-r--r--quote.c37
-rw-r--r--quote.h1
-rwxr-xr-xt/t7812-grep-icase-non-ascii.sh26
4 files changed, 108 insertions, 1 deletions
diff --git a/grep.c b/grep.c
index 451275d29..627ae3e3e 100644
--- a/grep.c
+++ b/grep.c
@@ -5,6 +5,7 @@
#include "diff.h"
#include "diffcore.h"
#include "commit.h"
+#include "quote.h"
static int grep_source_load(struct grep_source *gs);
static int grep_source_is_binary(struct grep_source *gs);
@@ -397,6 +398,28 @@ static int is_fixed(const char *s, size_t len)
return 1;
}
+static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int err;
+ int regflags;
+
+ basic_regex_quote_buf(&sb, p->pattern);
+ regflags = opt->regflags & ~REG_EXTENDED;
+ if (opt->ignore_case)
+ regflags |= REG_ICASE;
+ err = regcomp(&p->regexp, sb.buf, regflags);
+ if (opt->debug)
+ fprintf(stderr, "fixed %s\n", sb.buf);
+ strbuf_release(&sb);
+ if (err) {
+ char errbuf[1024];
+ regerror(err, &p->regexp, errbuf, sizeof(errbuf));
+ regfree(&p->regexp);
+ compile_regexp_failed(p, errbuf);
+ }
+}
+
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
int icase, ascii_only;
@@ -407,8 +430,20 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
icase = opt->regflags & REG_ICASE || p->ignore_case;
ascii_only = !has_non_ascii(p->pattern);
+ /*
+ * Even when -F (fixed) asks us to do a non-regexp search, we
+ * may not be able to correctly case-fold when -i
+ * (ignore-case) is asked (in which case, we'll synthesize a
+ * regexp to match the pattern that matches regexp special
+ * characters literally, while ignoring case differences). On
+ * the other hand, even without -F, if the pattern does not
+ * have any regexp special characters and there is no need for
+ * case-folding search, we can internally turn it into a
+ * simple string match using kws. p->fixed tells us if we
+ * want to use kws.
+ */
if (opt->fixed)
- p->fixed = 1;
+ p->fixed = !icase || ascii_only;
else if ((!icase || ascii_only) &&
is_fixed(p->pattern, p->patternlen))
p->fixed = 1;
@@ -423,6 +458,14 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
kwsincr(p->kws, p->pattern, p->patternlen);
kwsprep(p->kws);
return;
+ } else if (opt->fixed) {
+ /*
+ * We come here when the pattern has the non-ascii
+ * characters we cannot case-fold, and asked to
+ * ignore-case.
+ */
+ compile_fixed_regexp(p, opt);
+ return;
}
if (opt->pcre) {
diff --git a/quote.c b/quote.c
index fe884d245..c67adb718 100644
--- a/quote.c
+++ b/quote.c
@@ -440,3 +440,40 @@ void tcl_quote_buf(struct strbuf *sb, const char *src)
}
strbuf_addch(sb, '"');
}
+
+void basic_regex_quote_buf(struct strbuf *sb, const char *src)
+{
+ char c;
+
+ if (*src == '^') {
+ /* only beginning '^' is special and needs quoting */
+ strbuf_addch(sb, '\\');
+ strbuf_addch(sb, *src++);
+ }
+ if (*src == '*')
+ /* beginning '*' is not special, no quoting */
+ strbuf_addch(sb, *src++);
+
+ while ((c = *src++)) {
+ switch (c) {
+ case '[':
+ case '.':
+ case '\\':
+ case '*':
+ strbuf_addch(sb, '\\');
+ strbuf_addch(sb, c);
+ break;
+
+ case '$':
+ /* only the end '$' is special and needs quoting */
+ if (*src == '\0')
+ strbuf_addch(sb, '\\');
+ strbuf_addch(sb, c);
+ break;
+
+ default:
+ strbuf_addch(sb, c);
+ break;
+ }
+ }
+}
diff --git a/quote.h b/quote.h
index 99e04d34b..362d315be 100644
--- a/quote.h
+++ b/quote.h
@@ -67,5 +67,6 @@ extern char *quote_path_relative(const char *in, const char *prefix,
extern void perl_quote_buf(struct strbuf *sb, const char *src);
extern void python_quote_buf(struct strbuf *sb, const char *src);
extern void tcl_quote_buf(struct strbuf *sb, const char *src);
+extern void basic_regex_quote_buf(struct strbuf *sb, const char *src);
#endif
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index b78a774da..1929809d4 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -20,4 +20,30 @@ test_expect_success REGEX_LOCALE 'grep literal string, no -F' '
git grep -i "TILRAUN: HALLÓ HEIMUR!"
'
+test_expect_success REGEX_LOCALE 'grep literal string, with -F' '
+ git grep --debug -i -F "TILRAUN: Halló Heimur!" 2>&1 >/dev/null |
+ grep fixed >debug1 &&
+ test_write_lines "fixed TILRAUN: Halló Heimur!" >expect1 &&
+ test_cmp expect1 debug1 &&
+
+ git grep --debug -i -F "TILRAUN: HALLÓ HEIMUR!" 2>&1 >/dev/null |
+ grep fixed >debug2 &&
+ test_write_lines "fixed TILRAUN: HALLÓ HEIMUR!" >expect2 &&
+ test_cmp expect2 debug2
+'
+
+test_expect_success REGEX_LOCALE 'grep string with regex, with -F' '
+ test_write_lines "^*TILR^AUN:.* \\Halló \$He[]imur!\$" >file &&
+
+ git grep --debug -i -F "^*TILR^AUN:.* \\Halló \$He[]imur!\$" 2>&1 >/dev/null |
+ grep fixed >debug1 &&
+ test_write_lines "fixed \\^*TILR^AUN:\\.\\* \\\\Halló \$He\\[]imur!\\\$" >expect1 &&
+ test_cmp expect1 debug1 &&
+
+ git grep --debug -i -F "^*TILR^AUN:.* \\HALLÓ \$HE[]IMUR!\$" 2>&1 >/dev/null |
+ grep fixed >debug2 &&
+ test_write_lines "fixed \\^*TILR^AUN:\\.\\* \\\\HALLÓ \$HE\\[]IMUR!\\\$" >expect2 &&
+ test_cmp expect2 debug2
+'
+
test_done