aboutsummaryrefslogtreecommitdiff
path: root/diff.c
diff options
context:
space:
mode:
authorJunio C Hamano <junkio@cox.net>2006-05-05 02:41:53 -0700
committerJunio C Hamano <junkio@cox.net>2006-05-05 15:24:32 -0700
commit0660626caff6ac3745cd7b7908a2ca540141a2ec (patch)
treeff3efcf56efaa6a04d0259164d6bf4ae2ac48ffd /diff.c
parent051308f6e9cebeb76b8fb4f52b7e9e7ce064445c (diff)
downloadgit-0660626caff6ac3745cd7b7908a2ca540141a2ec.tar.gz
git-0660626caff6ac3745cd7b7908a2ca540141a2ec.tar.xz
binary diff: further updates.
This updates the user interface and generated diff data format. * "diff --binary" is used to signal that we want an e-mailable binary patch. It implies --full-index and -p. * "apply --allow-binary-replacement" acquired a short synonym "apply --binary". * After the "GIT binary patch\n" header line there is a token to record which binary patch mechanism was used, so that we can extend it later. Currently there are two mechanisms defined: "literal" and "delta". The former records the deflated postimage and the latter records the deflated delta from the preimage to postimage. For purely implementation convenience, I added the deflated length after these "literal/delta" tokens (otherwise the decoding side needs to guess and reallocate the buffer while inflating). Improvement patches are very welcomed. Signed-off-by: Junio C Hamano <junkio@cox.net>
Diffstat (limited to 'diff.c')
-rw-r--r--diff.c134
1 files changed, 71 insertions, 63 deletions
diff --git a/diff.c b/diff.c
index b14d897f1..bfe54c3e0 100644
--- a/diff.c
+++ b/diff.c
@@ -392,78 +392,78 @@ static void show_stats(struct diffstat_t* data)
total_files, adds, dels);
}
-static void *encode_delta_size(void *data, unsigned long size)
+static unsigned char *deflate_it(char *data,
+ unsigned long size,
+ unsigned long *result_size)
{
- unsigned char *cp = data;
- *cp++ = size;
- size >>= 7;
- while (size) {
- cp[-1] |= 0x80;
- *cp++ = size;
- size >>= 7;
- }
- return cp;
+ int bound;
+ unsigned char *deflated;
+ z_stream stream;
+
+ memset(&stream, 0, sizeof(stream));
+ deflateInit(&stream, Z_BEST_COMPRESSION);
+ bound = deflateBound(&stream, size);
+ deflated = xmalloc(bound);
+ stream.next_out = deflated;
+ stream.avail_out = bound;
+
+ stream.next_in = (unsigned char *)data;
+ stream.avail_in = size;
+ while (deflate(&stream, Z_FINISH) == Z_OK)
+ ; /* nothing */
+ deflateEnd(&stream);
+ *result_size = stream.total_out;
+ return deflated;
}
-static void *safe_diff_delta(const unsigned char *src, unsigned long src_size,
- const unsigned char *dst, unsigned long dst_size,
- unsigned long *delta_size)
+static void emit_binary_diff(mmfile_t *one, mmfile_t *two)
{
- unsigned long bufsize;
- unsigned char *data;
- unsigned char *cp;
-
- if (src_size && dst_size)
- return diff_delta(src, src_size, dst, dst_size, delta_size, 0);
+ void *cp;
+ void *delta;
+ void *deflated;
+ void *data;
+ unsigned long orig_size;
+ unsigned long delta_size;
+ unsigned long deflate_size;
+ unsigned long data_size;
- /* diff-delta does not like to do delta with empty, so
- * we do that by hand here. Sigh...
+ printf("GIT binary patch\n");
+ /* We could do deflated delta, or we could do just deflated two,
+ * whichever is smaller.
*/
-
- if (!src_size)
- /* literal copy can be done only 127-byte at a time.
- */
- bufsize = dst_size + (dst_size / 127) + 40;
- else
- bufsize = 40;
- data = xmalloc(bufsize);
- cp = encode_delta_size(data, src_size);
- cp = encode_delta_size(cp, dst_size);
-
- if (dst_size) {
- /* copy out literally */
- while (dst_size) {
- int sz = (127 < dst_size) ? 127 : dst_size;
- *cp++ = sz;
- dst_size -= sz;
- while (sz) {
- *cp++ = *dst++;
- sz--;
- }
+ delta = NULL;
+ deflated = deflate_it(two->ptr, two->size, &deflate_size);
+ if (one->size && two->size) {
+ delta = diff_delta(one->ptr, one->size,
+ two->ptr, two->size,
+ &delta_size, deflate_size);
+ if (delta) {
+ void *to_free = delta;
+ orig_size = delta_size;
+ delta = deflate_it(delta, delta_size, &delta_size);
+ free(to_free);
}
}
- *delta_size = (cp - data);
- return data;
-}
-static void emit_binary_diff(mmfile_t *one, mmfile_t *two)
-{
- void *delta, *cp;
- unsigned long delta_size;
+ if (delta && delta_size < deflate_size) {
+ printf("delta %lu\n", orig_size);
+ free(deflated);
+ data = delta;
+ data_size = delta_size;
+ }
+ else {
+ printf("literal %lu\n", two->size);
+ free(delta);
+ data = deflated;
+ data_size = deflate_size;
+ }
- printf("GIT binary patch\n");
- delta = safe_diff_delta(one->ptr, one->size,
- two->ptr, two->size,
- &delta_size);
- if (!delta)
- die("unable to generate binary diff");
-
- /* emit delta encoded in base85 */
- cp = delta;
- while (delta_size) {
- int bytes = (52 < delta_size) ? 52 : delta_size;
+ /* emit data encoded in base85 */
+ cp = data;
+ while (data_size) {
+ int bytes = (52 < data_size) ? 52 : data_size;
char line[70];
- delta_size -= bytes;
+ data_size -= bytes;
if (bytes <= 26)
line[0] = bytes + 'A' - 1;
else
@@ -473,7 +473,7 @@ static void emit_binary_diff(mmfile_t *one, mmfile_t *two)
puts(line);
}
printf("\n");
- free(delta);
+ free(data);
}
#define FIRST_FEW_BYTES 8000
@@ -538,7 +538,11 @@ static void builtin_diff(const char *name_a,
die("unable to read files to diff");
if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) {
- if (o->full_index)
+ /* Quite common confusing case */
+ if (mf1.size == mf2.size &&
+ !memcmp(mf1.ptr, mf2.ptr, mf1.size))
+ goto free_ab_and_return;
+ if (o->binary)
emit_binary_diff(&mf1, &mf2);
else
printf("Binary files %s and %s differ\n",
@@ -1239,6 +1243,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
options->rename_limit = strtoul(arg+2, NULL, 10);
else if (!strcmp(arg, "--full-index"))
options->full_index = 1;
+ else if (!strcmp(arg, "--binary")) {
+ options->output_format = DIFF_FORMAT_PATCH;
+ options->full_index = options->binary = 1;
+ }
else if (!strcmp(arg, "--name-only"))
options->output_format = DIFF_FORMAT_NAME;
else if (!strcmp(arg, "--name-status"))