aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile36
-rw-r--r--configure.ac77
-rw-r--r--grep.c145
-rw-r--r--grep.h17
-rw-r--r--t/test-lib.sh2
5 files changed, 256 insertions, 21 deletions
diff --git a/Makefile b/Makefile
index 502c07246..96ebbed87 100644
--- a/Makefile
+++ b/Makefile
@@ -29,6 +29,11 @@ all::
# Perl-compatible regular expressions instead of standard or extended
# POSIX regular expressions.
#
+# Currently USE_LIBPCRE is a synonym for USE_LIBPCRE1, define
+# USE_LIBPCRE2 instead if you'd like to use version 2 of the PCRE
+# library. The USE_LIBPCRE flag will likely be changed to mean v2 by
+# default in future releases.
+#
# When using USE_LIBPCRE1, define NO_LIBPCRE1_JIT if the PCRE v1
# library is compiled without --enable-jit. We will auto-detect
# whether the version of the PCRE v1 library in use has JIT support at
@@ -37,8 +42,10 @@ all::
# you have link-time errors about a missing `pcre_jit_exec` define
# this, or recompile PCRE v1 with --enable-jit.
#
-# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
-# /foo/bar/include and /foo/bar/lib directories.
+# Define LIBPCREDIR=/foo/bar if your PCRE header and library files are
+# in /foo/bar/include and /foo/bar/lib directories. Which version of
+# PCRE this points to determined by the USE_LIBPCRE1 and USE_LIBPCRE2
+# variables.
#
# Define HAVE_ALLOCA_H if you have working alloca(3) defined in that header.
#
@@ -1095,12 +1102,14 @@ ifdef NO_LIBGEN_H
COMPAT_OBJS += compat/basename.o
endif
-ifdef USE_LIBPCRE
- BASIC_CFLAGS += -DUSE_LIBPCRE1
- ifdef LIBPCREDIR
- BASIC_CFLAGS += -I$(LIBPCREDIR)/include
- EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib)
+USE_LIBPCRE1 ?= $(USE_LIBPCRE)
+
+ifneq (,$(USE_LIBPCRE1))
+ ifdef USE_LIBPCRE2
+$(error Only set USE_LIBPCRE1 (or its alias USE_LIBPCRE) or USE_LIBPCRE2, not both!)
endif
+
+ BASIC_CFLAGS += -DUSE_LIBPCRE1
EXTLIBS += -lpcre
ifdef NO_LIBPCRE1_JIT
@@ -1108,6 +1117,16 @@ ifdef NO_LIBPCRE1_JIT
endif
endif
+ifdef USE_LIBPCRE2
+ BASIC_CFLAGS += -DUSE_LIBPCRE2
+ EXTLIBS += -lpcre2-8
+endif
+
+ifdef LIBPCREDIR
+ BASIC_CFLAGS += -I$(LIBPCREDIR)/include
+ EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib)
+endif
+
ifdef HAVE_ALLOCA_H
BASIC_CFLAGS += -DHAVE_ALLOCA_H
endif
@@ -2252,7 +2271,8 @@ GIT-BUILD-OPTIONS: FORCE
@echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@+
@echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@+
@echo NO_EXPAT=\''$(subst ','\'',$(subst ','\'',$(NO_EXPAT)))'\' >>$@+
- @echo USE_LIBPCRE1=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE)))'\' >>$@+
+ @echo USE_LIBPCRE1=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE1)))'\' >>$@+
+ @echo USE_LIBPCRE2=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE2)))'\' >>$@+
@echo NO_LIBPCRE1_JIT=\''$(subst ','\'',$(subst ','\'',$(NO_LIBPCRE1_JIT)))'\' >>$@+
@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@+
@echo NO_PTHREADS=\''$(subst ','\'',$(subst ','\'',$(NO_PTHREADS)))'\' >>$@+
diff --git a/configure.ac b/configure.ac
index deeb968da..11d083fbe 100644
--- a/configure.ac
+++ b/configure.ac
@@ -255,21 +255,61 @@ GIT_PARSE_WITH([openssl]))
# Perl-compatible regular expressions instead of standard or extended
# POSIX regular expressions.
#
-# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
+# Currently USE_LIBPCRE is a synonym for USE_LIBPCRE1, define
+# USE_LIBPCRE2 instead if you'd like to use version 2 of the PCRE
+# library. The USE_LIBPCRE flag will likely be changed to mean v2 by
+# default in future releases.
+#
+# Define LIBPCREDIR=/foo/bar if your PCRE header and library files are in
# /foo/bar/include and /foo/bar/lib directories.
#
AC_ARG_WITH(libpcre,
-AS_HELP_STRING([--with-libpcre],[support Perl-compatible regexes (default is NO)])
+AS_HELP_STRING([--with-libpcre],[synonym for --with-libpcre1]),
+ if test "$withval" = "no"; then
+ USE_LIBPCRE1=
+ elif test "$withval" = "yes"; then
+ USE_LIBPCRE1=YesPlease
+ else
+ USE_LIBPCRE1=YesPlease
+ LIBPCREDIR=$withval
+ AC_MSG_NOTICE([Setting LIBPCREDIR to $LIBPCREDIR])
+ dnl USE_LIBPCRE1 can still be modified below, so don't substitute
+ dnl it yet.
+ GIT_CONF_SUBST([LIBPCREDIR])
+ fi)
+
+AC_ARG_WITH(libpcre1,
+AS_HELP_STRING([--with-libpcre1],[support Perl-compatible regexes via libpcre1 (default is NO)])
+AS_HELP_STRING([], [ARG can be also prefix for libpcre library and headers]),
+ if test "$withval" = "no"; then
+ USE_LIBPCRE1=
+ elif test "$withval" = "yes"; then
+ USE_LIBPCRE1=YesPlease
+ else
+ USE_LIBPCRE1=YesPlease
+ LIBPCREDIR=$withval
+ AC_MSG_NOTICE([Setting LIBPCREDIR to $LIBPCREDIR])
+ dnl USE_LIBPCRE1 can still be modified below, so don't substitute
+ dnl it yet.
+ GIT_CONF_SUBST([LIBPCREDIR])
+ fi)
+
+AC_ARG_WITH(libpcre2,
+AS_HELP_STRING([--with-libpcre2],[support Perl-compatible regexes via libpcre2 (default is NO)])
AS_HELP_STRING([], [ARG can be also prefix for libpcre library and headers]),
+ if test -n "$USE_LIBPCRE1"; then
+ AC_MSG_ERROR([Only supply one of --with-libpcre1 or --with-libpcre2!])
+ fi
+
if test "$withval" = "no"; then
- USE_LIBPCRE=
+ USE_LIBPCRE2=
elif test "$withval" = "yes"; then
- USE_LIBPCRE=YesPlease
+ USE_LIBPCRE2=YesPlease
else
- USE_LIBPCRE=YesPlease
+ USE_LIBPCRE2=YesPlease
LIBPCREDIR=$withval
AC_MSG_NOTICE([Setting LIBPCREDIR to $LIBPCREDIR])
- dnl USE_LIBPCRE can still be modified below, so don't substitute
+ dnl USE_LIBPCRE2 can still be modified below, so don't substitute
dnl it yet.
GIT_CONF_SUBST([LIBPCREDIR])
fi)
@@ -501,13 +541,11 @@ GIT_CONF_SUBST([NEEDS_SSL_WITH_CRYPTO])
GIT_CONF_SUBST([NO_OPENSSL])
#
-# Define USE_LIBPCRE if you have and want to use libpcre. Various
-# commands such as log and grep offer runtime options to use
-# Perl-compatible regular expressions instead of standard or extended
-# POSIX regular expressions.
+# Handle the USE_LIBPCRE1 and USE_LIBPCRE2 options potentially set
+# above.
#
-if test -n "$USE_LIBPCRE"; then
+if test -n "$USE_LIBPCRE1"; then
GIT_STASH_FLAGS($LIBPCREDIR)
@@ -517,7 +555,22 @@ AC_CHECK_LIB([pcre], [pcre_version],
GIT_UNSTASH_FLAGS($LIBPCREDIR)
-GIT_CONF_SUBST([USE_LIBPCRE])
+GIT_CONF_SUBST([USE_LIBPCRE1])
+
+fi
+
+
+if test -n "$USE_LIBPCRE2"; then
+
+GIT_STASH_FLAGS($LIBPCREDIR)
+
+AC_CHECK_LIB([pcre2-8], [pcre2_config_8],
+[USE_LIBPCRE2=YesPlease],
+[USE_LIBPCRE2=])
+
+GIT_UNSTASH_FLAGS($LIBPCREDIR)
+
+GIT_CONF_SUBST([USE_LIBPCRE2])
fi
diff --git a/grep.c b/grep.c
index 19fa67c34..d0bf37858 100644
--- a/grep.c
+++ b/grep.c
@@ -179,22 +179,37 @@ static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, st
case GREP_PATTERN_TYPE_BRE:
opt->fixed = 0;
opt->pcre1 = 0;
+ opt->pcre2 = 0;
break;
case GREP_PATTERN_TYPE_ERE:
opt->fixed = 0;
opt->pcre1 = 0;
+ opt->pcre2 = 0;
opt->regflags |= REG_EXTENDED;
break;
case GREP_PATTERN_TYPE_FIXED:
opt->fixed = 1;
opt->pcre1 = 0;
+ opt->pcre2 = 0;
break;
case GREP_PATTERN_TYPE_PCRE:
opt->fixed = 0;
+#ifdef USE_LIBPCRE2
+ opt->pcre1 = 0;
+ opt->pcre2 = 1;
+#else
+ /*
+ * It's important that pcre1 always be assigned to
+ * even when there's no USE_LIBPCRE* defined. We still
+ * call the PCRE stub function, it just dies with
+ * "cannot use Perl-compatible regexes[...]".
+ */
opt->pcre1 = 1;
+ opt->pcre2 = 0;
+#endif
break;
}
}
@@ -446,6 +461,127 @@ static void free_pcre1_regexp(struct grep_pat *p)
}
#endif /* !USE_LIBPCRE1 */
+#ifdef USE_LIBPCRE2
+static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
+{
+ int error;
+ PCRE2_UCHAR errbuf[256];
+ PCRE2_SIZE erroffset;
+ int options = PCRE2_MULTILINE;
+ const uint8_t *character_tables = NULL;
+ int jitret;
+
+ assert(opt->pcre2);
+
+ p->pcre2_compile_context = NULL;
+
+ if (opt->ignore_case) {
+ if (has_non_ascii(p->pattern)) {
+ character_tables = pcre2_maketables(NULL);
+ p->pcre2_compile_context = pcre2_compile_context_create(NULL);
+ pcre2_set_character_tables(p->pcre2_compile_context, character_tables);
+ }
+ options |= PCRE2_CASELESS;
+ }
+ if (is_utf8_locale() && has_non_ascii(p->pattern))
+ options |= PCRE2_UTF;
+
+ p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
+ p->patternlen, options, &error, &erroffset,
+ p->pcre2_compile_context);
+
+ if (p->pcre2_pattern) {
+ p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL);
+ if (!p->pcre2_match_data)
+ die("Couldn't allocate PCRE2 match data");
+ } else {
+ pcre2_get_error_message(error, errbuf, sizeof(errbuf));
+ compile_regexp_failed(p, (const char *)&errbuf);
+ }
+
+ pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
+ if (p->pcre2_jit_on == 1) {
+ jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
+ if (jitret)
+ die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret);
+ p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL);
+ if (!p->pcre2_jit_stack)
+ die("Couldn't allocate PCRE2 JIT stack");
+ p->pcre2_match_context = pcre2_match_context_create(NULL);
+ if (!p->pcre2_jit_stack)
+ die("Couldn't allocate PCRE2 match context");
+ pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack);
+ } else if (p->pcre2_jit_on != 0) {
+ die("BUG: The pcre2_jit_on variable should be 0 or 1, not %d",
+ p->pcre1_jit_on);
+ }
+}
+
+static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
+ regmatch_t *match, int eflags)
+{
+ int ret, flags = 0;
+ PCRE2_SIZE *ovector;
+ PCRE2_UCHAR errbuf[256];
+
+ if (eflags & REG_NOTBOL)
+ flags |= PCRE2_NOTBOL;
+
+ if (p->pcre2_jit_on)
+ ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line,
+ eol - line, 0, flags, p->pcre2_match_data,
+ NULL);
+ else
+ ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line,
+ eol - line, 0, flags, p->pcre2_match_data,
+ NULL);
+
+ if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) {
+ pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
+ die("%s failed with error code %d: %s",
+ (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret,
+ errbuf);
+ }
+ if (ret > 0) {
+ ovector = pcre2_get_ovector_pointer(p->pcre2_match_data);
+ ret = 0;
+ match->rm_so = (int)ovector[0];
+ match->rm_eo = (int)ovector[1];
+ }
+
+ return ret;
+}
+
+static void free_pcre2_pattern(struct grep_pat *p)
+{
+ pcre2_compile_context_free(p->pcre2_compile_context);
+ pcre2_code_free(p->pcre2_pattern);
+ pcre2_match_data_free(p->pcre2_match_data);
+ pcre2_jit_stack_free(p->pcre2_jit_stack);
+ pcre2_match_context_free(p->pcre2_match_context);
+}
+#else /* !USE_LIBPCRE2 */
+static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
+{
+ /*
+ * Unreachable until USE_LIBPCRE2 becomes synonymous with
+ * USE_LIBPCRE. See the sibling comment in
+ * grep_set_pattern_type_option().
+ */
+ die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
+}
+
+static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
+ regmatch_t *match, int eflags)
+{
+ return 1;
+}
+
+static void free_pcre2_pattern(struct grep_pat *p)
+{
+}
+#endif /* !USE_LIBPCRE2 */
+
static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
{
struct strbuf sb = STRBUF_INIT;
@@ -511,6 +647,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
return;
}
+ if (opt->pcre2) {
+ compile_pcre2_pattern(p, opt);
+ return;
+ }
+
if (opt->pcre1) {
compile_pcre1_regexp(p, opt);
return;
@@ -870,6 +1011,8 @@ void free_grep_patterns(struct grep_opt *opt)
kwsfree(p->kws);
else if (p->pcre1_regexp)
free_pcre1_regexp(p);
+ else if (p->pcre2_pattern)
+ free_pcre2_pattern(p);
else
regfree(&p->regexp);
free(p->pattern);
@@ -950,6 +1093,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol,
hit = !fixmatch(p, line, eol, match);
else if (p->pcre1_regexp)
hit = !pcre1match(p, line, eol, match, eflags);
+ else if (p->pcre2_pattern)
+ hit = !pcre2match(p, line, eol, match, eflags);
else
hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
eflags);
diff --git a/grep.h b/grep.h
index 3bff0870b..6f3d4e195 100644
--- a/grep.h
+++ b/grep.h
@@ -21,6 +21,16 @@ typedef int pcre;
typedef int pcre_extra;
typedef int pcre_jit_stack;
#endif
+#ifdef USE_LIBPCRE2
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+#else
+typedef int pcre2_code;
+typedef int pcre2_match_data;
+typedef int pcre2_compile_context;
+typedef int pcre2_match_context;
+typedef int pcre2_jit_stack;
+#endif
#include "kwset.h"
#include "thread-utils.h"
#include "userdiff.h"
@@ -65,6 +75,12 @@ struct grep_pat {
pcre_jit_stack *pcre1_jit_stack;
const unsigned char *pcre1_tables;
int pcre1_jit_on;
+ pcre2_code *pcre2_pattern;
+ pcre2_match_data *pcre2_match_data;
+ pcre2_compile_context *pcre2_compile_context;
+ pcre2_match_context *pcre2_match_context;
+ pcre2_jit_stack *pcre2_jit_stack;
+ uint32_t pcre2_jit_on;
kwset_t kws;
unsigned fixed:1;
unsigned ignore_case:1;
@@ -128,6 +144,7 @@ struct grep_opt {
int extended;
int use_reflog_filter;
int pcre1;
+ int pcre2;
int relative;
int pathname;
int null_following_name;
diff --git a/t/test-lib.sh b/t/test-lib.sh
index ab92c0eba..44d467938 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1011,7 +1011,7 @@ esac
test -z "$NO_PERL" && test_set_prereq PERL
test -z "$NO_PTHREADS" && test_set_prereq PTHREADS
test -z "$NO_PYTHON" && test_set_prereq PYTHON
-test -n "$USE_LIBPCRE1" && test_set_prereq PCRE
+test -n "$USE_LIBPCRE1$USE_LIBPCRE2" && test_set_prereq PCRE
test -z "$NO_GETTEXT" && test_set_prereq GETTEXT
# Can we rely on git's output in the C locale?