aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <junkio@cox.net>2006-03-04 00:31:28 -0800
committerJunio C Hamano <junkio@cox.net>2006-03-04 00:31:28 -0800
commit5aad948f6500d29a755c81358ea9a07eaea26beb (patch)
tree0ae7c20386e15903665b9848e01f9af92545b1c3
parented19f367220a50e4e2a5c1a00b03c14eafcaba89 (diff)
parent1706306a54cfb5f1bf65f2b054aab2a5a7dba8e7 (diff)
downloadgit-5aad948f6500d29a755c81358ea9a07eaea26beb.tar.gz
git-5aad948f6500d29a755c81358ea9a07eaea26beb.tar.xz
Merge branch 'jc/diff' into next
* jc/diff: diffcore-rename: similarity estimator fix. diffcore-delta: stop using deltifier for packing.
-rw-r--r--diffcore-delta.c141
-rw-r--r--diffcore-rename.c20
2 files changed, 121 insertions, 40 deletions
diff --git a/diffcore-delta.c b/diffcore-delta.c
index 1e6a6911e..d03787be6 100644
--- a/diffcore-delta.c
+++ b/diffcore-delta.c
@@ -1,43 +1,128 @@
#include "cache.h"
#include "diff.h"
#include "diffcore.h"
-#include "delta.h"
-#include "count-delta.h"
-
-static int diffcore_count_changes_1(void *src, unsigned long src_size,
- void *dst, unsigned long dst_size,
- unsigned long delta_limit,
- unsigned long *src_copied,
- unsigned long *literal_added)
-{
- void *delta;
- unsigned long delta_size;
-
- delta = diff_delta(src, src_size,
- dst, dst_size,
- &delta_size, delta_limit);
- if (!delta)
- /* If delta_limit is exceeded, we have too much differences */
- return -1;
- /* Estimate the edit size by interpreting delta. */
- if (count_delta(delta, delta_size, src_copied, literal_added)) {
- free(delta);
- return -1;
+struct linehash {
+ unsigned long bytes;
+ unsigned long hash;
+};
+
+static unsigned long hash_extended_line(const unsigned char **buf_p,
+ unsigned long left)
+{
+ /* An extended line is zero or more whitespace letters (including LF)
+ * followed by one non whitespace letter followed by zero or more
+ * non LF, and terminated with by a LF (or EOF).
+ */
+ const unsigned char *bol = *buf_p;
+ const unsigned char *buf = bol;
+ unsigned long hashval = 0;
+ while (left) {
+ unsigned c = *buf++;
+ if (!c)
+ goto binary;
+ left--;
+ if (' ' < c) {
+ hashval = c;
+ break;
+ }
+ }
+ while (left) {
+ unsigned c = *buf++;
+ if (!c)
+ goto binary;
+ left--;
+ if (c == '\n')
+ break;
+ if (' ' < c)
+ hashval = hashval * 11 + c;
}
- free(delta);
+ *buf_p = buf;
+ return hashval;
+
+ binary:
+ *buf_p = NULL;
+ return 0;
+}
+
+static int linehash_compare(const void *a_, const void *b_)
+{
+ struct linehash *a = (struct linehash *) a_;
+ struct linehash *b = (struct linehash *) b_;
+ if (a->hash < b->hash) return -1;
+ if (a->hash > b->hash) return 1;
return 0;
}
+static struct linehash *hash_lines(const unsigned char *buf,
+ unsigned long size)
+{
+ const unsigned char *eobuf = buf + size;
+ struct linehash *line = NULL;
+ int alloc = 0, used = 0;
+
+ while (buf < eobuf) {
+ const unsigned char *ptr = buf;
+ unsigned long hash = hash_extended_line(&buf, eobuf-ptr);
+ if (!buf) {
+ free(line);
+ return NULL;
+ }
+ if (alloc <= used) {
+ alloc = alloc_nr(alloc);
+ line = xrealloc(line, sizeof(*line) * alloc);
+ }
+ line[used].bytes = buf - ptr;
+ line[used].hash = hash;
+ used++;
+ }
+ qsort(line, used, sizeof(*line), linehash_compare);
+
+ /* Terminate the list */
+ if (alloc <= used)
+ line = xrealloc(line, sizeof(*line) * (used+1));
+ line[used].bytes = line[used].hash = 0;
+ return line;
+}
+
int diffcore_count_changes(void *src, unsigned long src_size,
void *dst, unsigned long dst_size,
unsigned long delta_limit,
unsigned long *src_copied,
unsigned long *literal_added)
{
- return diffcore_count_changes_1(src, src_size,
- dst, dst_size,
- delta_limit,
- src_copied,
- literal_added);
+ struct linehash *src_lines, *dst_lines;
+ unsigned long sc, la;
+
+ src_lines = hash_lines(src, src_size);
+ if (!src_lines)
+ return -1;
+ dst_lines = hash_lines(dst, dst_size);
+ if (!dst_lines) {
+ free(src_lines);
+ return -1;
+ }
+ sc = la = 0;
+ while (src_lines->bytes && dst_lines->bytes) {
+ int cmp = linehash_compare(src_lines, dst_lines);
+ if (!cmp) {
+ sc += src_lines->bytes;
+ src_lines++;
+ dst_lines++;
+ continue;
+ }
+ if (cmp < 0) {
+ src_lines++;
+ continue;
+ }
+ la += dst_lines->bytes;
+ dst_lines++;
+ }
+ while (dst_lines->bytes) {
+ la += dst_lines->bytes;
+ dst_lines++;
+ }
+ *src_copied = sc;
+ *literal_added = la;
+ return 0;
}
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 55cf1c37f..625b589fb 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -170,19 +170,15 @@ static int estimate_similarity(struct diff_filespec *src,
&src_copied, &literal_added))
return 0;
- /* Extent of damage */
- if (src->size + literal_added < src_copied)
- delta_size = 0;
- else
- delta_size = (src->size - src_copied) + literal_added;
-
- /*
- * Now we will give some score to it. 100% edit gets 0 points
- * and 0% edit gets MAX_SCORE points.
+ /* How similar are they?
+ * what percentage of material in dst are from source?
*/
- score = MAX_SCORE - (MAX_SCORE * delta_size / base_size);
- if (score < 0) return 0;
- if (MAX_SCORE < score) return MAX_SCORE;
+ if (dst->size < src_copied)
+ score = MAX_SCORE;
+ else if (!dst->size)
+ score = 0; /* should not happen */
+ else
+ score = src_copied * MAX_SCORE / dst->size;
return score;
}