aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile5
-rw-r--r--count-delta.c93
-rw-r--r--count-delta.h9
-rw-r--r--diffcore-rename.c16
4 files changed, 116 insertions, 7 deletions
diff --git a/Makefile b/Makefile
index b957cec1a..92d0e8753 100644
--- a/Makefile
+++ b/Makefile
@@ -46,8 +46,9 @@ LIB_H=cache.h object.h blob.h tree.h commit.h tag.h delta.h
LIB_H += strbuf.h
LIB_OBJS += strbuf.o
-LIB_H += diff.h
-LIB_OBJS += diff.o diffcore-rename.o diffcore-pickaxe.o diffcore-pathspec.o
+LIB_H += diff.h count-delta.h
+LIB_OBJS += diff.o diffcore-rename.o diffcore-pickaxe.o diffcore-pathspec.o \
+ count-delta.o
LIB_OBJS += gitenv.o
diff --git a/count-delta.c b/count-delta.c
new file mode 100644
index 000000000..dd81e9296
--- /dev/null
+++ b/count-delta.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ * The delta-parsing part is almost straight copy of patch-delta.c
+ * which is (C) 2005 Nicolas Pitre <nico@cam.org>.
+ */
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "delta.h"
+#include "count-delta.h"
+
+static unsigned long get_hdr_size(const unsigned char **datap)
+{
+ const unsigned char *data = *datap;
+ unsigned long size;
+ unsigned char cmd;
+ int i;
+ size = i = 0;
+ cmd = *data++;
+ while (cmd) {
+ if (cmd & 1)
+ size |= *data++ << i;
+ i += 8;
+ cmd >>= 1;
+ }
+ *datap = data;
+ return size;
+}
+
+/*
+ * NOTE. We do not _interpret_ delta fully. As an approximation, we
+ * just count the number of bytes that are copied from the source, and
+ * the number of literal data bytes that are inserted. Number of
+ * bytes that are _not_ copied from the source is deletion, and number
+ * of inserted literal bytes are addition, so sum of them is what we
+ * return. xdelta can express an edit that copies data inside of the
+ * destination which originally came from the source. We do not count
+ * that in the following routine, so we are undercounting the source
+ * material that remains in the final output that way.
+ */
+unsigned long count_delta(void *delta_buf, unsigned long delta_size)
+{
+ unsigned long copied_from_source, added_literal;
+ const unsigned char *data, *top;
+ unsigned char cmd;
+ unsigned long src_size, dst_size, out;
+
+ /* the smallest delta size possible is 6 bytes */
+ if (delta_size < 6)
+ return UINT_MAX;
+
+ data = delta_buf;
+ top = delta_buf + delta_size;
+
+ src_size = get_hdr_size(&data);
+ dst_size = get_hdr_size(&data);
+
+ added_literal = copied_from_source = out = 0;
+ while (data < top) {
+ cmd = *data++;
+ if (cmd & 0x80) {
+ unsigned long cp_off = 0, cp_size = 0;
+ if (cmd & 0x01) cp_off = *data++;
+ if (cmd & 0x02) cp_off |= (*data++ << 8);
+ if (cmd & 0x04) cp_off |= (*data++ << 16);
+ if (cmd & 0x08) cp_off |= (*data++ << 24);
+ if (cmd & 0x10) cp_size = *data++;
+ if (cmd & 0x20) cp_size |= (*data++ << 8);
+ if (cp_size == 0) cp_size = 0x10000;
+
+ if (cmd & 0x40)
+ /* copy from dst */
+ ;
+ else
+ copied_from_source += cp_size;
+ out += cp_size;
+ } else {
+ /* write literal into dst */
+ added_literal += cmd;
+ out += cmd;
+ data += cmd;
+ }
+ }
+
+ /* sanity check */
+ if (data != top || out != dst_size)
+ return UINT_MAX;
+
+ /* delete size is what was _not_ copied from source.
+ * edit size is that and literal additions.
+ */
+ return (src_size - copied_from_source) + added_literal;
+}
diff --git a/count-delta.h b/count-delta.h
new file mode 100644
index 000000000..4e6b584f4
--- /dev/null
+++ b/count-delta.h
@@ -0,0 +1,9 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ */
+#ifndef COUNT_DELTA_H
+#define COUNT_DELTA_H
+
+unsigned long count_delta(void *, unsigned long);
+
+#endif
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 34e83dac8..07782f4b7 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -5,6 +5,7 @@
#include "diff.h"
#include "diffcore.h"
#include "delta.h"
+#include "count-delta.h"
/* Table of rename/copy destinations */
@@ -158,13 +159,18 @@ static int estimate_similarity(struct diff_filespec *src,
delta = diff_delta(src->data, src->size,
dst->data, dst->size,
&delta_size);
- /*
- * We currently punt here, but we may later end up parsing the
- * delta to really assess the extent of damage. A big consecutive
- * remove would produce small delta_size that affects quite a
- * big portion of the file.
+
+ /* A delta that has a lot of literal additions would have
+ * big delta_size no matter what else it does.
*/
+ if (minimum_score < MAX_SCORE * delta_size / base_size)
+ return 0;
+
+ /* Estimate the edit size by interpreting delta. */
+ delta_size = count_delta(delta, delta_size);
free(delta);
+ if (delta_size == UINT_MAX)
+ return 0;
/*
* Now we will give some score to it. 100% edit gets 0 points