diff options
author | Junio C Hamano <junkio@cox.net> | 2006-02-23 02:59:24 -0800 |
---|---|---|
committer | Junio C Hamano <junkio@cox.net> | 2006-02-23 02:59:24 -0800 |
commit | d2540f0203f52e4888a06372765443ce5717eb0f (patch) | |
tree | eb56e9bff985f9ce9b019195fb3b01af441245b5 | |
parent | 63c2fcefd8b786ebe99d0beaa1ca3590cdc24609 (diff) | |
parent | 907380eeff27e9a07d6f1c03847c3d80f9e0e79a (diff) | |
download | git-d2540f0203f52e4888a06372765443ce5717eb0f.tar.gz git-d2540f0203f52e4888a06372765443ce5717eb0f.tar.xz |
Merge branch 'np/delta' into next
* np/delta:
count-delta: tweak counting of copied source material.
diff-delta: produce optimal pack data
-rw-r--r-- | count-delta.c | 77 | ||||
-rw-r--r-- | diff-delta.c | 77 |
2 files changed, 101 insertions, 53 deletions
diff --git a/count-delta.c b/count-delta.c index 058a2aadb..4e4d2f4fc 100644 --- a/count-delta.c +++ b/count-delta.c @@ -3,11 +3,74 @@ * The delta-parsing part is almost straight copy of patch-delta.c * which is (C) 2005 Nicolas Pitre <nico@cam.org>. */ +#include "cache.h" +#include "delta.h" +#include "count-delta.h" #include <stdlib.h> #include <string.h> #include <limits.h> -#include "delta.h" -#include "count-delta.h" + +struct span { + struct span *next; + unsigned long ofs; + unsigned long end; +}; + +static void touch_range(struct span **span, + unsigned long ofs, unsigned long end) +{ + struct span *e = *span; + struct span *p = NULL; + + while (e && e->ofs <= ofs) { + again: + if (ofs < e->end) { + while (e->end < end) { + if (e->next) { + e->end = e->next->ofs; + e = e->next; + } + else { + e->end = end; + return; + } + } + return; + } + p = e; + e = e->next; + } + if (e && e->ofs <= end) { + e->ofs = ofs; + goto again; + } + else { + e = xmalloc(sizeof(*e)); + e->ofs = ofs; + e->end = end; + if (p) { + e->next = p->next; + p->next = e; + } + else { + e->next = *span; + *span = e; + } + } +} + +static unsigned long count_range(struct span *s) +{ + struct span *t; + unsigned long sz = 0; + while (s) { + t = s; + sz += s->end - s->ofs; + s = s->next; + free(t); + } + return sz; +} /* * NOTE. We do not _interpret_ delta fully. As an approximation, we @@ -21,10 +84,11 @@ int count_delta(void *delta_buf, unsigned long delta_size, unsigned long *src_copied, unsigned long *literal_added) { - unsigned long copied_from_source, added_literal; + unsigned long added_literal; const unsigned char *data, *top; unsigned char cmd; unsigned long src_size, dst_size, out; + struct span *span = NULL; if (delta_size < DELTA_SIZE_MIN) return -1; @@ -35,7 +99,7 @@ int count_delta(void *delta_buf, unsigned long delta_size, src_size = get_delta_hdr_size(&data); dst_size = get_delta_hdr_size(&data); - added_literal = copied_from_source = out = 0; + added_literal = out = 0; while (data < top) { cmd = *data++; if (cmd & 0x80) { @@ -49,7 +113,7 @@ int count_delta(void *delta_buf, unsigned long delta_size, if (cmd & 0x40) cp_size |= (*data++ << 16); if (cp_size == 0) cp_size = 0x10000; - copied_from_source += cp_size; + touch_range(&span, cp_off, cp_off+cp_size); out += cp_size; } else { /* write literal into dst */ @@ -59,6 +123,8 @@ int count_delta(void *delta_buf, unsigned long delta_size, } } + *src_copied = count_range(span); + /* sanity check */ if (data != top || out != dst_size) return -1; @@ -66,7 +132,6 @@ int count_delta(void *delta_buf, unsigned long delta_size, /* delete size is what was _not_ copied from source. * edit size is that and literal additions. */ - *src_copied = copied_from_source; *literal_added = added_literal; return 0; } diff --git a/diff-delta.c b/diff-delta.c index 2ed5984b1..27f83a085 100644 --- a/diff-delta.c +++ b/diff-delta.c @@ -20,21 +20,11 @@ #include <stdlib.h> #include <string.h> -#include <zlib.h> #include "delta.h" -/* block size: min = 16, max = 64k, power of 2 */ -#define BLK_SIZE 16 - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -#define GR_PRIME 0x9e370001 -#define HASH(v, shift) (((unsigned int)(v) * GR_PRIME) >> (shift)) - struct index { const unsigned char *ptr; - unsigned int val; struct index *next; }; @@ -42,21 +32,21 @@ static struct index ** delta_index(const unsigned char *buf, unsigned long bufsize, unsigned int *hash_shift) { - unsigned int hsize, hshift, entries, blksize, i; + unsigned long hsize; + unsigned int hshift, i; const unsigned char *data; struct index *entry, **hash; void *mem; /* determine index hash size */ - entries = (bufsize + BLK_SIZE - 1) / BLK_SIZE; - hsize = entries / 4; - for (i = 4; (1 << i) < hsize && i < 16; i++); + hsize = bufsize / 4; + for (i = 8; (1 << i) < hsize && i < 16; i++); hsize = 1 << i; - hshift = 32 - i; + hshift = i - 8; *hash_shift = hshift; /* allocate lookup index */ - mem = malloc(hsize * sizeof(*hash) + entries * sizeof(*entry)); + mem = malloc(hsize * sizeof(*hash) + bufsize * sizeof(*entry)); if (!mem) return NULL; hash = mem; @@ -64,17 +54,12 @@ static struct index ** delta_index(const unsigned char *buf, memset(hash, 0, hsize * sizeof(*hash)); /* then populate it */ - data = buf + entries * BLK_SIZE - BLK_SIZE; - blksize = bufsize - (data - buf); - while (data >= buf) { - unsigned int val = adler32(0, data, blksize); - i = HASH(val, hshift); - entry->ptr = data; - entry->val = val; + data = buf + bufsize - 2; + while (data > buf) { + entry->ptr = --data; + i = data[0] ^ data[1] ^ (data[2] << hshift); entry->next = hash[i]; hash[i] = entry++; - blksize = BLK_SIZE; - data -= BLK_SIZE; } return hash; @@ -141,29 +126,27 @@ void *diff_delta(void *from_buf, unsigned long from_size, while (data < top) { unsigned int moff = 0, msize = 0; - unsigned int blksize = MIN(top - data, BLK_SIZE); - unsigned int val = adler32(0, data, blksize); - i = HASH(val, hash_shift); - for (entry = hash[i]; entry; entry = entry->next) { - const unsigned char *ref = entry->ptr; - const unsigned char *src = data; - unsigned int ref_size = ref_top - ref; - if (entry->val != val) - continue; - if (ref_size > top - src) - ref_size = top - src; - while (ref_size && *src++ == *ref) { - ref++; - ref_size--; - } - ref_size = ref - entry->ptr; - if (ref_size > msize) { - /* this is our best match so far */ - moff = entry->ptr - ref_data; - msize = ref_size; - if (msize >= 0x10000) { - msize = 0x10000; + if (data + 2 < top) { + i = data[0] ^ data[1] ^ (data[2] << hash_shift); + for (entry = hash[i]; entry; entry = entry->next) { + const unsigned char *ref = entry->ptr; + const unsigned char *src = data; + unsigned int ref_size = ref_top - ref; + if (ref_size > top - src) + ref_size = top - src; + if (ref_size > 0x10000) + ref_size = 0x10000; + if (ref_size <= msize) break; + while (ref_size && *src++ == *ref) { + ref++; + ref_size--; + } + ref_size = ref - entry->ptr; + if (msize < ref - entry->ptr) { + /* this is our best match so far */ + msize = ref - entry->ptr; + moff = entry->ptr - ref_data; } } } |