From fb95e2e38dd4e8301b3992bacfac4c87b8262181 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 1 Jun 2017 18:20:55 +0000 Subject: grep: un-break building with PCRE >= 8.32 without --enable-jit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Amend my change earlier in this series ("grep: add support for the PCRE v1 JIT API", 2017-04-11) to un-break the build on PCRE v1 versions later than 8.31 compiled without --enable-jit. As explained in that change and a later compatibility change in this series ("grep: un-break building with PCRE < 8.32", 2017-05-10) the pcre_jit_exec() function is a faster path to execute the JIT. Unfortunately there's no compatibility stub for that function compiled into the library if pcre_config(PCRE_CONFIG_JIT, &ret) would return 0, and no macro that can be used to check for it, so the only portable option to support builds without --enable-jit is via a new NO_LIBPCRE1_JIT=UnfortunatelyYes Makefile option[1]. Another option would be to make the JIT opt-in via USE_LIBPCRE1_JIT=YesPlease, after all it's not a default option of PCRE v1. I think it makes more sense to make it opt-out since even though it's not a default option, most packagers of PCRE seem to turn it on by default, with the notable exception of the MinGW package. Make the MinGW platform work by default by changing the build defaults to turn on NO_LIBPCRE1_JIT=UnfortunatelyYes. It is the only platform that turns on USE_LIBPCRE=YesPlease by default, see commit df5218b4c3 ("config.mak.uname: support MSys2", 2016-01-13) for that change. 1. "How do I support pcre1 JIT on all versions?" (https://lists.exim.org/lurker/thread/20170601.103148.10253788.en.html) 2. https://github.com/Alexpux/MINGW-packages/blob/master/mingw-w64-pcre/PKGBUILD (referenced from "Re: PCRE v2 compile error, was Re: What's cooking in git.git (May 2017, #01; Mon, 1)"; ) Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- Makefile | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'Makefile') diff --git a/Makefile b/Makefile index a79274e5e..502c07246 100644 --- a/Makefile +++ b/Makefile @@ -29,6 +29,14 @@ all:: # Perl-compatible regular expressions instead of standard or extended # POSIX regular expressions. # +# When using USE_LIBPCRE1, define NO_LIBPCRE1_JIT if the PCRE v1 +# library is compiled without --enable-jit. We will auto-detect +# whether the version of the PCRE v1 library in use has JIT support at +# all, but we unfortunately can't auto-detect whether JIT support +# hasn't been compiled in in an otherwise JIT-supporting version. If +# you have link-time errors about a missing `pcre_jit_exec` define +# this, or recompile PCRE v1 with --enable-jit. +# # Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in # /foo/bar/include and /foo/bar/lib directories. # @@ -1094,6 +1102,10 @@ ifdef USE_LIBPCRE EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib) endif EXTLIBS += -lpcre + +ifdef NO_LIBPCRE1_JIT + BASIC_CFLAGS += -DNO_LIBPCRE1_JIT +endif endif ifdef HAVE_ALLOCA_H @@ -2241,6 +2253,7 @@ GIT-BUILD-OPTIONS: FORCE @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@+ @echo NO_EXPAT=\''$(subst ','\'',$(subst ','\'',$(NO_EXPAT)))'\' >>$@+ @echo USE_LIBPCRE1=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE)))'\' >>$@+ + @echo NO_LIBPCRE1_JIT=\''$(subst ','\'',$(subst ','\'',$(NO_LIBPCRE1_JIT)))'\' >>$@+ @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@+ @echo NO_PTHREADS=\''$(subst ','\'',$(subst ','\'',$(NO_PTHREADS)))'\' >>$@+ @echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@+ -- cgit v1.2.1 From 94da9193a6eb8f1085d611c04ff8bbb4f5ae1e0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 1 Jun 2017 18:20:56 +0000 Subject: grep: add support for PCRE v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for v2 of the PCRE API. This is a new major version of PCRE that came out in early 2015[1]. The regular expression syntax is the same, but while the API is similar, pretty much every function is either renamed or takes different arguments. Thus using it via entirely new functions makes sense, as opposed to trying to e.g. have one compile_pcre_pattern() that would call either PCRE v1 or v2 functions. Git can now be compiled with either USE_LIBPCRE1=YesPlease or USE_LIBPCRE2=YesPlease, with USE_LIBPCRE=YesPlease currently being a synonym for the former. Providing both is a compile-time error. With earlier patches to enable JIT for PCRE v1 the performance of the release versions of both libraries is almost exactly the same, with PCRE v2 being around 1% slower. However after I reported this to the pcre-dev mailing list[2] I got a lot of help with the API use from Zoltán Herczeg, he subsequently optimized some of the JIT functionality in v2 of the library. Running the p7820-grep-engines.sh performance test against the latest Subversion trunk of both, with both them and git compiled as -O3, and the test run against linux.git, gives the following results. Just the /perl/ tests shown: $ GIT_PERF_REPEAT_COUNT=30 GIT_PERF_LARGE_REPO=~/g/linux GIT_PERF_MAKE_COMMAND='grep -q LIBPCRE2 Makefile && make -j8 USE_LIBPCRE2=YesPlease CC=~/perl5/installed/bin/gcc NO_R_TO_GCC_LINKER=YesPlease CFLAGS=-O3 LIBPCREDIR=/home/avar/g/pcre2/inst LDFLAGS=-Wl,-rpath,/home/avar/g/pcre2/inst/lib || make -j8 USE_LIBPCRE=YesPlease CC=~/perl5/installed/bin/gcc NO_R_TO_GCC_LINKER=YesPlease CFLAGS=-O3 LIBPCREDIR=/home/avar/g/pcre/inst LDFLAGS=-Wl,-rpath,/home/avar/g/pcre/inst/lib' ./run HEAD~5 HEAD~ HEAD p7820-grep-engines.sh [...] Test HEAD~5 HEAD~ HEAD ----------------------------------------------------------------------------------------------------------------- 7820.3: perl grep 'how.to' 0.31(1.10+0.48) 0.21(0.35+0.56) -32.3% 0.21(0.34+0.55) -32.3% 7820.7: perl grep '^how to' 0.56(2.70+0.40) 0.24(0.64+0.52) -57.1% 0.20(0.28+0.60) -64.3% 7820.11: perl grep '[how] to' 0.56(2.66+0.38) 0.29(0.95+0.45) -48.2% 0.23(0.45+0.54) -58.9% 7820.15: perl grep '(e.t[^ ]*|v.ry) rare' 1.02(5.77+0.42) 0.31(1.02+0.54) -69.6% 0.23(0.50+0.54) -77.5% 7820.19: perl grep 'm(ú|u)lt.b(æ|y)te' 0.38(1.57+0.42) 0.27(0.85+0.46) -28.9% 0.21(0.33+0.57) -44.7% See commit ("perf: add a comparison test of grep regex engines", 2017-04-19) for details on the machine the above test run was executed on. Here HEAD~2 is git with PCRE v1 without JIT, HEAD~ is PCRE v1 with JIT, and HEAD is PCRE v2 (also with JIT). See previous commits of mine mentioning p7820-grep-engines.sh for more details on the test setup. For ease of readability, a different run just of HEAD~ (PCRE v1 with JIT v.s. PCRE v2), again with just the /perl/ tests shown: [...] Test HEAD~ HEAD ---------------------------------------------------------------------------------------- 7820.3: perl grep 'how.to' 0.21(0.42+0.52) 0.21(0.31+0.58) +0.0% 7820.7: perl grep '^how to' 0.25(0.65+0.50) 0.20(0.31+0.57) -20.0% 7820.11: perl grep '[how] to' 0.30(0.90+0.50) 0.23(0.46+0.53) -23.3% 7820.15: perl grep '(e.t[^ ]*|v.ry) rare' 0.30(1.19+0.38) 0.23(0.51+0.51) -23.3% 7820.19: perl grep 'm(ú|u)lt.b(æ|y)te' 0.27(0.84+0.48) 0.21(0.34+0.57) -22.2% I.e. the two are either neck-to-neck, but PCRE v2 usually pulls ahead, when it does it's around 20% faster. A brief note on thread safety: As noted in pcre2api(3) & pcre2jit(3) the compiled pattern can be shared between threads, but not some of the JIT context, however the grep threading support does all pattern & JIT compilation in separate threads, so this code doesn't need to concern itself with thread safety. See commit 63e7e9d8b6 ("git-grep: Learn PCRE", 2011-05-09) for the initial addition of PCRE v1. This change follows some of the same patterns it did (and which were discussed on list at the time), e.g. mocking up types with typedef instead of ifdef-ing them out when USE_LIBPCRE2 isn't defined. This adds some trivial memory use to the program, but makes the code look nicer. 1. https://lists.exim.org/lurker/message/20150105.162835.0666407a.en.html 2. https://lists.exim.org/lurker/thread/20170419.172322.833ee099.en.html Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- Makefile | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index 502c07246..96ebbed87 100644 --- a/Makefile +++ b/Makefile @@ -29,6 +29,11 @@ all:: # Perl-compatible regular expressions instead of standard or extended # POSIX regular expressions. # +# Currently USE_LIBPCRE is a synonym for USE_LIBPCRE1, define +# USE_LIBPCRE2 instead if you'd like to use version 2 of the PCRE +# library. The USE_LIBPCRE flag will likely be changed to mean v2 by +# default in future releases. +# # When using USE_LIBPCRE1, define NO_LIBPCRE1_JIT if the PCRE v1 # library is compiled without --enable-jit. We will auto-detect # whether the version of the PCRE v1 library in use has JIT support at @@ -37,8 +42,10 @@ all:: # you have link-time errors about a missing `pcre_jit_exec` define # this, or recompile PCRE v1 with --enable-jit. # -# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in -# /foo/bar/include and /foo/bar/lib directories. +# Define LIBPCREDIR=/foo/bar if your PCRE header and library files are +# in /foo/bar/include and /foo/bar/lib directories. Which version of +# PCRE this points to determined by the USE_LIBPCRE1 and USE_LIBPCRE2 +# variables. # # Define HAVE_ALLOCA_H if you have working alloca(3) defined in that header. # @@ -1095,12 +1102,14 @@ ifdef NO_LIBGEN_H COMPAT_OBJS += compat/basename.o endif -ifdef USE_LIBPCRE - BASIC_CFLAGS += -DUSE_LIBPCRE1 - ifdef LIBPCREDIR - BASIC_CFLAGS += -I$(LIBPCREDIR)/include - EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib) +USE_LIBPCRE1 ?= $(USE_LIBPCRE) + +ifneq (,$(USE_LIBPCRE1)) + ifdef USE_LIBPCRE2 +$(error Only set USE_LIBPCRE1 (or its alias USE_LIBPCRE) or USE_LIBPCRE2, not both!) endif + + BASIC_CFLAGS += -DUSE_LIBPCRE1 EXTLIBS += -lpcre ifdef NO_LIBPCRE1_JIT @@ -1108,6 +1117,16 @@ ifdef NO_LIBPCRE1_JIT endif endif +ifdef USE_LIBPCRE2 + BASIC_CFLAGS += -DUSE_LIBPCRE2 + EXTLIBS += -lpcre2-8 +endif + +ifdef LIBPCREDIR + BASIC_CFLAGS += -I$(LIBPCREDIR)/include + EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib) +endif + ifdef HAVE_ALLOCA_H BASIC_CFLAGS += -DHAVE_ALLOCA_H endif @@ -2252,7 +2271,8 @@ GIT-BUILD-OPTIONS: FORCE @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@+ @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@+ @echo NO_EXPAT=\''$(subst ','\'',$(subst ','\'',$(NO_EXPAT)))'\' >>$@+ - @echo USE_LIBPCRE1=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE)))'\' >>$@+ + @echo USE_LIBPCRE1=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE1)))'\' >>$@+ + @echo USE_LIBPCRE2=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE2)))'\' >>$@+ @echo NO_LIBPCRE1_JIT=\''$(subst ','\'',$(subst ','\'',$(NO_LIBPCRE1_JIT)))'\' >>$@+ @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@+ @echo NO_PTHREADS=\''$(subst ','\'',$(subst ','\'',$(NO_PTHREADS)))'\' >>$@+ -- cgit v1.2.1