aboutsummaryrefslogtreecommitdiff
path: root/grep.c
diff options
context:
space:
mode:
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>2016-06-25 07:22:31 +0200
committerJunio C Hamano <gitster@pobox.com>2016-07-01 12:44:30 -0700
commit793dc676e08394ed15bffe0ed66606ff9ced1c6a (patch)
tree8176b0281abf2fb31e70be8439ad4a3ec4fe56de /grep.c
parent5c1ebcca4d1df3b2a84d01b3f32ec13bf8f760f8 (diff)
downloadgit-793dc676e08394ed15bffe0ed66606ff9ced1c6a.tar.gz
git-793dc676e08394ed15bffe0ed66606ff9ced1c6a.tar.xz
grep/icase: avoid kwsset when -F is specified
Similar to the previous commit, we can't use kws on icase search outside ascii range. But we can't simply pass the pattern to regcomp/pcre like the previous commit because it may contain regex special characters, so we need to quote the regex first. To avoid misquote traps that could lead to undefined behavior, we always stick to basic regex engine in this case. We don't need fancy features for grepping a literal string anyway. basic_regex_quote_buf() assumes that if the pattern is in a multibyte encoding, ascii chars must be unambiguously encoded as single bytes. This is true at least for UTF-8. For others, let's wait until people yell up. Chances are nobody uses multibyte, non utf-8 charsets anymore. Noticed-by: Plamen Totev <plamen.totev@abv.bg> Helped-by: René Scharfe <l.s.r@web.de> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'grep.c')
-rw-r--r--grep.c45
1 files changed, 44 insertions, 1 deletions
diff --git a/grep.c b/grep.c
index 451275d29..627ae3e3e 100644
--- a/grep.c
+++ b/grep.c
@@ -5,6 +5,7 @@
#include "diff.h"
#include "diffcore.h"
#include "commit.h"
+#include "quote.h"
static int grep_source_load(struct grep_source *gs);
static int grep_source_is_binary(struct grep_source *gs);
@@ -397,6 +398,28 @@ static int is_fixed(const char *s, size_t len)
return 1;
}
+static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int err;
+ int regflags;
+
+ basic_regex_quote_buf(&sb, p->pattern);
+ regflags = opt->regflags & ~REG_EXTENDED;
+ if (opt->ignore_case)
+ regflags |= REG_ICASE;
+ err = regcomp(&p->regexp, sb.buf, regflags);
+ if (opt->debug)
+ fprintf(stderr, "fixed %s\n", sb.buf);
+ strbuf_release(&sb);
+ if (err) {
+ char errbuf[1024];
+ regerror(err, &p->regexp, errbuf, sizeof(errbuf));
+ regfree(&p->regexp);
+ compile_regexp_failed(p, errbuf);
+ }
+}
+
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
int icase, ascii_only;
@@ -407,8 +430,20 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
icase = opt->regflags & REG_ICASE || p->ignore_case;
ascii_only = !has_non_ascii(p->pattern);
+ /*
+ * Even when -F (fixed) asks us to do a non-regexp search, we
+ * may not be able to correctly case-fold when -i
+ * (ignore-case) is asked (in which case, we'll synthesize a
+ * regexp to match the pattern that matches regexp special
+ * characters literally, while ignoring case differences). On
+ * the other hand, even without -F, if the pattern does not
+ * have any regexp special characters and there is no need for
+ * case-folding search, we can internally turn it into a
+ * simple string match using kws. p->fixed tells us if we
+ * want to use kws.
+ */
if (opt->fixed)
- p->fixed = 1;
+ p->fixed = !icase || ascii_only;
else if ((!icase || ascii_only) &&
is_fixed(p->pattern, p->patternlen))
p->fixed = 1;
@@ -423,6 +458,14 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
kwsincr(p->kws, p->pattern, p->patternlen);
kwsprep(p->kws);
return;
+ } else if (opt->fixed) {
+ /*
+ * We come here when the pattern has the non-ascii
+ * characters we cannot case-fold, and asked to
+ * ignore-case.
+ */
+ compile_fixed_regexp(p, opt);
+ return;
}
if (opt->pcre) {