diff options
-rw-r--r-- | index-pack.c | 163 | ||||
-rwxr-xr-x | t/t5302-pack-index.sh | 92 |
2 files changed, 146 insertions, 109 deletions
diff --git a/index-pack.c b/index-pack.c index 6f89bb9ac..fe75332a9 100644 --- a/index-pack.c +++ b/index-pack.c @@ -221,17 +221,23 @@ static void bad_object(unsigned long offset, const char *format, ...) die("pack has bad object at offset %lu: %s", offset, buf); } +static void free_base_data(struct base_data *c) +{ + if (c->data) { + free(c->data); + c->data = NULL; + base_cache_used -= c->size; + } +} + static void prune_base_data(struct base_data *retain) { struct base_data *b = base_cache; for (b = base_cache; base_cache_used > delta_base_cache_limit && b; b = b->child) { - if (b->data && b != retain) { - free(b->data); - b->data = NULL; - base_cache_used -= b->size; - } + if (b->data && b != retain) + free_base_data(b); } } @@ -244,7 +250,8 @@ static void link_base_data(struct base_data *base, struct base_data *c) c->base = base; c->child = NULL; - base_cache_used += c->size; + if (c->data) + base_cache_used += c->size; prune_base_data(c); } @@ -255,10 +262,7 @@ static void unlink_base_data(struct base_data *c) base->child = NULL; else base_cache = NULL; - if (c->data) { - free(c->data); - base_cache_used -= c->size; - } + free_base_data(c); } static void *unpack_entry_data(unsigned long offset, unsigned long size) @@ -408,22 +412,24 @@ static int find_delta(const union delta_base *base) return -first-1; } -static int find_delta_children(const union delta_base *base, - int *first_index, int *last_index) +static void find_delta_children(const union delta_base *base, + int *first_index, int *last_index) { int first = find_delta(base); int last = first; int end = nr_deltas - 1; - if (first < 0) - return -1; + if (first < 0) { + *first_index = 0; + *last_index = -1; + return; + } while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ)) --first; while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ)) ++last; *first_index = first; *last_index = last; - return 0; } static void sha1_object(const void *data, unsigned long size, @@ -494,8 +500,10 @@ static void *get_base_data(struct base_data *c) free(raw); if (!c->data) bad_object(obj->idx.offset, "failed to apply delta"); - } else + } else { c->data = get_data_from_pack(obj); + c->size = obj->size; + } base_cache_used += c->size; prune_base_data(c); @@ -504,49 +512,74 @@ static void *get_base_data(struct base_data *c) } static void resolve_delta(struct object_entry *delta_obj, - struct base_data *base_obj, enum object_type type) + struct base_data *base, struct base_data *result) { - void *delta_data; - unsigned long delta_size; - union delta_base delta_base; - int j, first, last; - struct base_data result; + void *base_data, *delta_data; - delta_obj->real_type = type; + delta_obj->real_type = base->obj->real_type; delta_data = get_data_from_pack(delta_obj); - delta_size = delta_obj->size; - result.data = patch_delta(get_base_data(base_obj), base_obj->size, - delta_data, delta_size, - &result.size); + base_data = get_base_data(base); + result->obj = delta_obj; + result->data = patch_delta(base_data, base->size, + delta_data, delta_obj->size, &result->size); free(delta_data); - if (!result.data) + if (!result->data) bad_object(delta_obj->idx.offset, "failed to apply delta"); - sha1_object(result.data, result.size, type, delta_obj->idx.sha1); + sha1_object(result->data, result->size, delta_obj->real_type, + delta_obj->idx.sha1); nr_resolved_deltas++; +} + +static void find_unresolved_deltas(struct base_data *base, + struct base_data *prev_base) +{ + int i, ref_first, ref_last, ofs_first, ofs_last; + + /* + * This is a recursive function. Those brackets should help reducing + * stack usage by limiting the scope of the delta_base union. + */ + { + union delta_base base_spec; + + hashcpy(base_spec.sha1, base->obj->idx.sha1); + find_delta_children(&base_spec, &ref_first, &ref_last); + + memset(&base_spec, 0, sizeof(base_spec)); + base_spec.offset = base->obj->idx.offset; + find_delta_children(&base_spec, &ofs_first, &ofs_last); + } - result.obj = delta_obj; - link_base_data(base_obj, &result); + if (ref_last == -1 && ofs_last == -1) { + free(base->data); + return; + } + + link_base_data(prev_base, base); - hashcpy(delta_base.sha1, delta_obj->idx.sha1); - if (!find_delta_children(&delta_base, &first, &last)) { - for (j = first; j <= last; j++) { - struct object_entry *child = objects + deltas[j].obj_no; - if (child->real_type == OBJ_REF_DELTA) - resolve_delta(child, &result, type); + for (i = ref_first; i <= ref_last; i++) { + struct object_entry *child = objects + deltas[i].obj_no; + if (child->real_type == OBJ_REF_DELTA) { + struct base_data result; + resolve_delta(child, base, &result); + if (i == ref_last && ofs_last == -1) + free_base_data(base); + find_unresolved_deltas(&result, base); } } - memset(&delta_base, 0, sizeof(delta_base)); - delta_base.offset = delta_obj->idx.offset; - if (!find_delta_children(&delta_base, &first, &last)) { - for (j = first; j <= last; j++) { - struct object_entry *child = objects + deltas[j].obj_no; - if (child->real_type == OBJ_OFS_DELTA) - resolve_delta(child, &result, type); + for (i = ofs_first; i <= ofs_last; i++) { + struct object_entry *child = objects + deltas[i].obj_no; + if (child->real_type == OBJ_OFS_DELTA) { + struct base_data result; + resolve_delta(child, base, &result); + if (i == ofs_last) + free_base_data(base); + find_unresolved_deltas(&result, base); } } - unlink_base_data(&result); + unlink_base_data(base); } static int compare_delta_entry(const void *a, const void *b) @@ -622,37 +655,13 @@ static void parse_pack_objects(unsigned char *sha1) progress = start_progress("Resolving deltas", nr_deltas); for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; - union delta_base base; - int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last; struct base_data base_obj; if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) continue; - hashcpy(base.sha1, obj->idx.sha1); - ref = !find_delta_children(&base, &ref_first, &ref_last); - memset(&base, 0, sizeof(base)); - base.offset = obj->idx.offset; - ofs = !find_delta_children(&base, &ofs_first, &ofs_last); - if (!ref && !ofs) - continue; - base_obj.data = get_data_from_pack(obj); - base_obj.size = obj->size; base_obj.obj = obj; - link_base_data(NULL, &base_obj); - - if (ref) - for (j = ref_first; j <= ref_last; j++) { - struct object_entry *child = objects + deltas[j].obj_no; - if (child->real_type == OBJ_REF_DELTA) - resolve_delta(child, &base_obj, obj->type); - } - if (ofs) - for (j = ofs_first; j <= ofs_last; j++) { - struct object_entry *child = objects + deltas[j].obj_no; - if (child->real_type == OBJ_OFS_DELTA) - resolve_delta(child, &base_obj, obj->type); - } - unlink_base_data(&base_obj); + base_obj.data = NULL; + find_unresolved_deltas(&base_obj, NULL); display_progress(progress, nr_resolved_deltas); } } @@ -745,7 +754,6 @@ static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved) for (i = 0; i < n; i++) { struct delta_entry *d = sorted_by_pos[i]; enum object_type type; - int j, first, last; struct base_data base_obj; if (objects[d->obj_no].real_type != OBJ_REF_DELTA) @@ -759,16 +767,7 @@ static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved) die("local object %s is corrupt", sha1_to_hex(d->base.sha1)); base_obj.obj = append_obj_to_pack(f, d->base.sha1, base_obj.data, base_obj.size, type); - link_base_data(NULL, &base_obj); - - find_delta_children(&d->base, &first, &last); - for (j = first; j <= last; j++) { - struct object_entry *child = objects + deltas[j].obj_no; - if (child->real_type == OBJ_REF_DELTA) - resolve_delta(child, &base_obj, type); - } - - unlink_base_data(&base_obj); + find_unresolved_deltas(&base_obj, NULL); display_progress(progress, nr_resolved_deltas); } free(sorted_by_pos); diff --git a/t/t5302-pack-index.sh b/t/t5302-pack-index.sh index 344ab25b8..b0b0fdaca 100755 --- a/t/t5302-pack-index.sh +++ b/t/t5302-pack-index.sh @@ -11,13 +11,18 @@ test_expect_success \ 'rm -rf .git git init && i=1 && - while test $i -le 100 + while test $i -le 100 do - i=`printf '%03i' $i` - echo $i >file_$i && - test-genrandom "$i" 8192 >>file_$i && - git update-index --add file_$i && - i=`expr $i + 1` || return 1 + iii=`printf '%03i' $i` + test-genrandom "bar" 200 > wide_delta_$iii && + test-genrandom "baz $iii" 50 >> wide_delta_$iii && + test-genrandom "foo"$i 100 > deep_delta_$iii && + test-genrandom "foo"`expr $i + 1` 100 >> deep_delta_$iii && + test-genrandom "foo"`expr $i + 2` 100 >> deep_delta_$iii && + echo $iii >file_$iii && + test-genrandom "$iii" 8192 >>file_$iii && + git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && + i=`expr $i + 1` || return 1 done && { echo 101 && test-genrandom 100 8192; } >file_101 && git update-index --add file_101 && @@ -92,6 +97,31 @@ test_expect_success \ '64-bit offsets: index-pack result should match pack-objects one' \ 'cmp "test-3-${pack3}.idx" "3.idx"' +# returns the object number for given object in given pack index +index_obj_nr() +{ + idx_file=$1 + object_sha1=$2 + nr=0 + git show-index < $idx_file | + while read offs sha1 extra + do + nr=$(($nr + 1)) + test "$sha1" = "$object_sha1" || continue + echo "$(($nr - 1))" + break + done +} + +# returns the pack offset for given object as found in given pack index +index_obj_offset() +{ + idx_file=$1 + object_sha1=$2 + git show-index < $idx_file | grep $object_sha1 | + ( read offs extra && echo "$offs" ) +} + test_expect_success \ '[index v1] 1) stream pack to repository' \ 'git index-pack --index-version=1 --stdin < "test-1-${pack1}.pack" && @@ -102,19 +132,22 @@ test_expect_success \ test_expect_success \ '[index v1] 2) create a stealth corruption in a delta base reference' \ - '# this test assumes a delta smaller than 16 bytes at the end of the pack - git show-index <1.idx | sort -n | sed -ne \$p | ( - read delta_offs delta_sha1 && - git cat-file blob "$delta_sha1" > blob_1 && - chmod +w ".git/objects/pack/pack-${pack1}.pack" && - dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($delta_offs + 1)) \ - if=".git/objects/pack/pack-${pack1}.idx" skip=$((256 * 4 + 4)) \ - bs=1 count=20 conv=notrunc && - git cat-file blob "$delta_sha1" > blob_2 )' + '# This test assumes file_101 is a delta smaller than 16 bytes. + # It should be against file_100 but we substitute its base for file_099 + sha1_101=`git hash-object file_101` && + sha1_099=`git hash-object file_099` && + offs_101=`index_obj_offset 1.idx $sha1_101` && + nr_099=`index_obj_nr 1.idx $sha1_099` && + chmod +w ".git/objects/pack/pack-${pack1}.pack" && + dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($offs_101 + 1)) \ + if=".git/objects/pack/pack-${pack1}.idx" \ + skip=$((4 + 256 * 4 + $nr_099 * 24)) \ + bs=1 count=20 conv=notrunc && + git cat-file blob $sha1_101 > file_101_foo1' test_expect_success \ '[index v1] 3) corrupted delta happily returned wrong data' \ - '! cmp blob_1 blob_2' + 'test -f file_101_foo1 && ! cmp file_101 file_101_foo1' test_expect_success \ '[index v1] 4) confirm that the pack is actually corrupted' \ @@ -140,19 +173,22 @@ test_expect_success \ test_expect_success \ '[index v2] 2) create a stealth corruption in a delta base reference' \ - '# this test assumes a delta smaller than 16 bytes at the end of the pack - git show-index <1.idx | sort -n | sed -ne \$p | ( - read delta_offs delta_sha1 delta_crc && - git cat-file blob "$delta_sha1" > blob_3 && - chmod +w ".git/objects/pack/pack-${pack1}.pack" && - dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($delta_offs + 1)) \ - if=".git/objects/pack/pack-${pack1}.idx" skip=$((8 + 256 * 4)) \ - bs=1 count=20 conv=notrunc && - git cat-file blob "$delta_sha1" > blob_4 )' + '# This test assumes file_101 is a delta smaller than 16 bytes. + # It should be against file_100 but we substitute its base for file_099 + sha1_101=`git hash-object file_101` && + sha1_099=`git hash-object file_099` && + offs_101=`index_obj_offset 1.idx $sha1_101` && + nr_099=`index_obj_nr 1.idx $sha1_099` && + chmod +w ".git/objects/pack/pack-${pack1}.pack" && + dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($offs_101 + 1)) \ + if=".git/objects/pack/pack-${pack1}.idx" \ + skip=$((8 + 256 * 4 + $nr_099 * 20)) \ + bs=1 count=20 conv=notrunc && + git cat-file blob $sha1_101 > file_101_foo2' test_expect_success \ '[index v2] 3) corrupted delta happily returned wrong data' \ - '! cmp blob_3 blob_4' + 'test -f file_101_foo2 && ! cmp file_101 file_101_foo2' test_expect_success \ '[index v2] 4) confirm that the pack is actually corrupted' \ @@ -167,9 +203,11 @@ test_expect_success \ 'rm -f .git/objects/pack/* && git index-pack --index-version=2 --stdin < "test-1-${pack1}.pack" && git verify-pack ".git/objects/pack/pack-${pack1}.pack" && + obj=`git hash-object file_001` && + nr=`index_obj_nr ".git/objects/pack/pack-${pack1}.idx" $obj` && chmod +w ".git/objects/pack/pack-${pack1}.idx" && dd if=/dev/zero of=".git/objects/pack/pack-${pack1}.idx" conv=notrunc \ - bs=1 count=4 seek=$((8 + 256 * 4 + `wc -l <obj-list` * 20 + 0)) && + bs=1 count=4 seek=$((8 + 256 * 4 + `wc -l <obj-list` * 20 + $nr * 4)) && ( while read obj do git cat-file -p $obj >/dev/null || exit 1 done <obj-list ) && |