From 5ec8274b8424f76bf998059e66facff1b241337e Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Wed, 25 Oct 2017 11:49:11 -0700 Subject: xdiff-interface: export comparing and hashing strings This will turn out to be useful in a later patch. xdl_recmatch is exported in xdiff/xutils.h, to be used by various xdiff/*.c files, but not outside of xdiff/. This one makes it available to the outside, too. While at it, add documentation. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- xdiff-interface.c | 12 ++++++++++++ xdiff-interface.h | 16 ++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/xdiff-interface.c b/xdiff-interface.c index 018e03308..770e1f7f8 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -5,6 +5,7 @@ #include "xdiff/xdiffi.h" #include "xdiff/xemit.h" #include "xdiff/xmacros.h" +#include "xdiff/xutils.h" struct xdiff_emit_state { xdiff_emit_consume_fn consume; @@ -296,6 +297,17 @@ void xdiff_clear_find_func(xdemitconf_t *xecfg) } } +unsigned long xdiff_hash_string(const char *s, size_t len, long flags) +{ + return xdl_hash_record(&s, s + len, flags); +} + +int xdiff_compare_lines(const char *l1, long s1, + const char *l2, long s2, long flags) +{ + return xdl_recmatch(l1, s1, l2, s2, flags); +} + int git_xmerge_style = -1; int git_xmerge_config(const char *var, const char *value, void *cb) diff --git a/xdiff-interface.h b/xdiff-interface.h index 6f6ba9095..135fc05d7 100644 --- a/xdiff-interface.h +++ b/xdiff-interface.h @@ -29,4 +29,20 @@ extern void xdiff_clear_find_func(xdemitconf_t *xecfg); extern int git_xmerge_config(const char *var, const char *value, void *cb); extern int git_xmerge_style; +/* + * Compare the strings l1 with l2 which are of size s1 and s2 respectively. + * Returns 1 if the strings are deemed equal, 0 otherwise. + * The `flags` given as XDF_WHITESPACE_FLAGS determine how white spaces + * are treated for the comparision. + */ +extern int xdiff_compare_lines(const char *l1, long s1, + const char *l2, long s2, long flags); + +/* + * Returns a hash of the string s of length len. + * The `flags` given as XDF_WHITESPACE_FLAGS determine how white spaces + * are treated for the hash. + */ +extern unsigned long xdiff_hash_string(const char *s, size_t len, long flags); + #endif -- cgit v1.2.1 From 01be97c2b285e8ba377ba58385ef6ad2e7815c93 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Wed, 25 Oct 2017 11:49:12 -0700 Subject: diff.c: get rid of duplicate implementation The implementations in diff.c to detect moved lines needs to compare strings and hash strings, which is implemented in that file, as well as in the xdiff library. Remove the rather recent implementation in diff.c and rely on the well exercised code in the xdiff lib. With this change the hash used for bucketing the strings for the moved line detection changes from FNV32 (that is provided via the hashmaps memhash) to DJB2 (which is used internally in xdiff). Benchmarks found on the web[1] do not indicate that these hashes are different in performance for readable strings. [1] https://softwareengineering.stackexchange.com/questions/49550/which-hashing-algorithm-is-best-for-uniqueness-and-speed Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- diff.c | 82 ++++-------------------------------------------------------------- 1 file changed, 4 insertions(+), 78 deletions(-) diff --git a/diff.c b/diff.c index c4a669ffa..e6814b9e9 100644 --- a/diff.c +++ b/diff.c @@ -707,88 +707,14 @@ struct moved_entry { struct moved_entry *next_line; }; -static int next_byte(const char **cp, const char **endp, - const struct diff_options *diffopt) -{ - int retval; - - if (*cp >= *endp) - return -1; - - if (isspace(**cp)) { - if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_CHANGE)) { - while (*cp < *endp && isspace(**cp)) - (*cp)++; - /* - * After skipping a couple of whitespaces, - * we still have to account for one space. - */ - return (int)' '; - } - - if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE)) { - while (*cp < *endp && isspace(**cp)) - (*cp)++; - /* - * return the first non-ws character via the usual - * below, unless we ate all of the bytes - */ - if (*cp >= *endp) - return -1; - } - } - - retval = (unsigned char)(**cp); - (*cp)++; - return retval; -} - static int moved_entry_cmp(const struct diff_options *diffopt, const struct moved_entry *a, const struct moved_entry *b, const void *keydata) { - const char *ap = a->es->line, *ae = a->es->line + a->es->len; - const char *bp = b->es->line, *be = b->es->line + b->es->len; - - if (!(diffopt->xdl_opts & XDF_WHITESPACE_FLAGS)) - return a->es->len != b->es->len || memcmp(ap, bp, a->es->len); - - if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_AT_EOL)) { - while (ae > ap && isspace(ae[-1])) - ae--; - while (be > bp && isspace(be[-1])) - be--; - } - - while (1) { - int ca, cb; - ca = next_byte(&ap, &ae, diffopt); - cb = next_byte(&bp, &be, diffopt); - if (ca != cb) - return 1; - if (ca < 0) - return 0; - } -} - -static unsigned get_string_hash(struct emitted_diff_symbol *es, struct diff_options *o) -{ - if (o->xdl_opts & XDF_WHITESPACE_FLAGS) { - static struct strbuf sb = STRBUF_INIT; - const char *ap = es->line, *ae = es->line + es->len; - int c; - - strbuf_reset(&sb); - while (ae > ap && isspace(ae[-1])) - ae--; - while ((c = next_byte(&ap, &ae, o)) >= 0) - strbuf_addch(&sb, c); - - return memhash(sb.buf, sb.len); - } else { - return memhash(es->line, es->len); - } + return !xdiff_compare_lines(a->es->line, a->es->len, + b->es->line, b->es->len, + diffopt->xdl_opts); } static struct moved_entry *prepare_entry(struct diff_options *o, @@ -797,7 +723,7 @@ static struct moved_entry *prepare_entry(struct diff_options *o, struct moved_entry *ret = xmalloc(sizeof(*ret)); struct emitted_diff_symbol *l = &o->emitted_symbols->buf[line_no]; - ret->ent.hash = get_string_hash(l, o); + ret->ent.hash = xdiff_hash_string(l->line, l->len, o->xdl_opts); ret->es = l; ret->next_line = NULL; -- cgit v1.2.1