aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2007-06-28 23:14:13 -0700
committerJunio C Hamano <gitster@pobox.com>2007-06-30 20:51:31 -0700
commitb9905fed7a028cc9749cf8ad479cbb07940c8638 (patch)
tree62122a630a8871f2f3e63a54a744b25285a7ccb6
parentaf3abef94af9c821a0c192c693c3e5342ab8729f (diff)
downloadgit-b9905fed7a028cc9749cf8ad479cbb07940c8638.tar.gz
git-b9905fed7a028cc9749cf8ad479cbb07940c8638.tar.xz
diffcore-delta.c: Ignore CR in CRLF for text files
This ignores CR byte in CRLF sequence in text file when computing similarity of two blobs. Usually this should not matter as nobody sane would be checking in a file with CRLF line endings to the repository (they would use autocrlf so that the repository copy would have LF line endings). Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--diffcore-delta.c14
-rwxr-xr-xt/t0022-crlf-rename.sh33
2 files changed, 44 insertions, 3 deletions
diff --git a/diffcore-delta.c b/diffcore-delta.c
index 7116df0b8..a038b166c 100644
--- a/diffcore-delta.c
+++ b/diffcore-delta.c
@@ -122,11 +122,14 @@ static struct spanhash_top *add_spanhash(struct spanhash_top *top,
}
}
-static struct spanhash_top *hash_chars(unsigned char *buf, unsigned int sz)
+static struct spanhash_top *hash_chars(struct diff_filespec *one)
{
int i, n;
unsigned int accum1, accum2, hashval;
struct spanhash_top *hash;
+ unsigned char *buf = one->data;
+ unsigned int sz = one->size;
+ int is_text = !one->is_binary;
i = INITIAL_HASH_SIZE;
hash = xmalloc(sizeof(*hash) + sizeof(struct spanhash) * (1<<i));
@@ -140,6 +143,11 @@ static struct spanhash_top *hash_chars(unsigned char *buf, unsigned int sz)
unsigned int c = *buf++;
unsigned int old_1 = accum1;
sz--;
+
+ /* Ignore CR in CRLF sequence if text */
+ if (is_text && c == '\r' && sz && *buf == '\n')
+ continue;
+
accum1 = (accum1 << 7) ^ (accum2 >> 25);
accum2 = (accum2 << 7) ^ (old_1 >> 25);
accum1 += c;
@@ -169,14 +177,14 @@ int diffcore_count_changes(struct diff_filespec *src,
if (src_count_p)
src_count = *src_count_p;
if (!src_count) {
- src_count = hash_chars(src->data, src->size);
+ src_count = hash_chars(src);
if (src_count_p)
*src_count_p = src_count;
}
if (dst_count_p)
dst_count = *dst_count_p;
if (!dst_count) {
- dst_count = hash_chars(dst->data, dst->size);
+ dst_count = hash_chars(dst);
if (dst_count_p)
*dst_count_p = dst_count;
}
diff --git a/t/t0022-crlf-rename.sh b/t/t0022-crlf-rename.sh
new file mode 100755
index 000000000..430a1d1d3
--- /dev/null
+++ b/t/t0022-crlf-rename.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+test_description='ignore CR in CRLF sequence while computing similiarity'
+
+. ./test-lib.sh
+
+test_expect_success setup '
+
+ cat ../t0022-crlf-rename.sh >sample &&
+ git add sample &&
+
+ test_tick &&
+ git commit -m Initial &&
+
+ sed -e "s/\$/ /" ../t0022-crlf-rename.sh >elpmas &&
+ git add elpmas &&
+ rm -f sample &&
+
+ test_tick &&
+ git commit -a -m Second
+
+'
+
+test_expect_success 'diff -M' '
+
+ git diff-tree -M -r --name-status HEAD^ HEAD |
+ sed -e "s/R[0-9]*/RNUM/" >actual &&
+ echo "RNUM sample elpmas" >expect &&
+ diff -u expect actual
+
+'
+
+test_done