From c649657501bada28794a30102d9c13cc28ca0e5e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 19 Feb 2006 03:32:31 -0800 Subject: rev-list --objects-edge This new flag is similar to --objects, but causes rev-list to show list of "uninteresting" commits that appear on the edge commit prefixed with '-'. Downstream pack-objects will be changed to take these as hints to use the trees and blobs contained with them as base objects of resulting pack, producing an incomplete (not self-contained) pack. Such a pack cannot be used in .git/objects/pack (it is prevented by git-index-pack erroring out if it is fed to git-fetch-pack -k or git-clone-pack), but would be useful when transferring only small changes to huge blobs. Signed-off-by: Junio C Hamano --- rev-list.c | 34 ++++++++++++++++++++++++++++------ rev-parse.c | 1 + 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/rev-list.c b/rev-list.c index f2d1105ca..373549e59 100644 --- a/rev-list.c +++ b/rev-list.c @@ -30,7 +30,7 @@ static const char rev_list_usage[] = " --date-order\n" " formatting output:\n" " --parents\n" -" --objects\n" +" --objects | --objects-edge\n" " --unpacked\n" " --header | --pretty\n" " --abbrev=nr | --no-abbrev\n" @@ -44,6 +44,7 @@ static int bisect_list = 0; static int tag_objects = 0; static int tree_objects = 0; static int blob_objects = 0; +static int edge_hint = 0; static int verbose_header = 0; static int abbrev = DEFAULT_ABBREV; static int show_parents = 0; @@ -430,16 +431,30 @@ static struct commit_list *find_bisection(struct commit_list *list) return best; } +static void mark_edge_parents_uninteresting(struct commit *commit) +{ + struct commit_list *parents; + + for (parents = commit->parents; parents; parents = parents->next) { + struct commit *parent = parents->item; + if (!(parent->object.flags & UNINTERESTING)) + continue; + mark_tree_uninteresting(parent->tree); + if (edge_hint) + printf("-%s\n", sha1_to_hex(parent->object.sha1)); + } +} + static void mark_edges_uninteresting(struct commit_list *list) { for ( ; list; list = list->next) { - struct commit_list *parents = list->item->parents; + struct commit *commit = list->item; - for ( ; parents; parents = parents->next) { - struct commit *commit = parents->item; - if (commit->object.flags & UNINTERESTING) - mark_tree_uninteresting(commit->tree); + if (commit->object.flags & UNINTERESTING) { + mark_tree_uninteresting(commit->tree); + continue; } + mark_edge_parents_uninteresting(commit); } } @@ -843,6 +858,13 @@ int main(int argc, const char **argv) blob_objects = 1; continue; } + if (!strcmp(arg, "--objects-edge")) { + tag_objects = 1; + tree_objects = 1; + blob_objects = 1; + edge_hint = 1; + continue; + } if (!strcmp(arg, "--unpacked")) { unpacked = 1; limited = 1; diff --git a/rev-parse.c b/rev-parse.c index a5fb93c3c..610eacb35 100644 --- a/rev-parse.c +++ b/rev-parse.c @@ -43,6 +43,7 @@ static int is_rev_argument(const char *arg) "--min-age=", "--no-merges", "--objects", + "--objects-edge", "--parents", "--pretty", "--show-breaks", -- cgit v1.2.1 From 7a979d99bafae3576d9480007f958eed0c9e0278 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 19 Feb 2006 14:47:21 -0800 Subject: Thin pack - create packfile with missing delta base. This goes together with "rev-list --object-edge" change, to feed pack-objects list of edge commits in addition to the usual object list. Upon seeing such list, pack-objects loosens the usual "self contained delta" constraints, and can produce delta against blobs and trees contained in the edge commits without storing the delta base objects themselves. The resulting packfile is not usable in .git/object/packs, but is a good way to implement "delta-only" transfer. Signed-off-by: Junio C Hamano --- pack-objects.c | 300 +++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 219 insertions(+), 81 deletions(-) diff --git a/pack-objects.c b/pack-objects.c index 0c9f4c9d2..ceb107f63 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -3,6 +3,7 @@ #include "delta.h" #include "pack.h" #include "csum-file.h" +#include "diff.h" #include static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list"; @@ -26,6 +27,13 @@ struct object_entry { struct object_entry *delta_sibling; /* other deltified objects who * uses the same base as me */ + int preferred_base; /* we do not pack this, but is encouraged to + * be used as the base objectto delta huge + * objects against. + */ + int based_on_preferred; /* current delta candidate is a preferred + * one, or delta against a preferred one. + */ }; /* @@ -48,7 +56,7 @@ static int local = 0; static int incremental = 0; static struct object_entry **sorted_by_sha, **sorted_by_type; static struct object_entry *objects = NULL; -static int nr_objects = 0, nr_alloc = 0; +static int nr_objects = 0, nr_alloc = 0, nr_result = 0; static const char *base_name; static unsigned char pack_file_sha1[20]; static int progress = 1; @@ -229,7 +237,8 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha return n; } -static unsigned long write_object(struct sha1file *f, struct object_entry *entry) +static unsigned long write_object(struct sha1file *f, + struct object_entry *entry) { unsigned long size; char type[10]; @@ -239,6 +248,9 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry enum object_type obj_type; int to_reuse = 0; + if (entry->preferred_base) + return 0; + obj_type = entry->type; if (! entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ @@ -326,10 +338,11 @@ static void write_pack_file(void) if (!base_name) f = sha1fd(1, ""); else - f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "pack"); + f = sha1create("%s-%s.%s", base_name, + sha1_to_hex(object_list_sha1), "pack"); hdr.hdr_signature = htonl(PACK_SIGNATURE); hdr.hdr_version = htonl(PACK_VERSION); - hdr.hdr_entries = htonl(nr_objects); + hdr.hdr_entries = htonl(nr_result); sha1write(f, &hdr, sizeof(hdr)); offset = sizeof(hdr); for (i = 0; i < nr_objects; i++) @@ -341,9 +354,10 @@ static void write_pack_file(void) static void write_index_file(void) { int i; - struct sha1file *f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "idx"); + struct sha1file *f = sha1create("%s-%s.%s", base_name, + sha1_to_hex(object_list_sha1), "idx"); struct object_entry **list = sorted_by_sha; - struct object_entry **last = list + nr_objects; + struct object_entry **last = list + nr_result; unsigned int array[256]; /* @@ -368,7 +382,7 @@ static void write_index_file(void) * Write the actual SHA1 entries.. */ list = sorted_by_sha; - for (i = 0; i < nr_objects; i++) { + for (i = 0; i < nr_result; i++) { struct object_entry *entry = *list++; unsigned int offset = htonl(entry->offset); sha1write(f, &offset, 4); @@ -378,27 +392,87 @@ static void write_index_file(void) sha1close(f, NULL, 1); } -static int add_object_entry(unsigned char *sha1, unsigned int hash) +static int locate_object_entry_hash(const unsigned char *sha1) +{ + int i; + unsigned int ui; + memcpy(&ui, sha1, sizeof(unsigned int)); + i = ui % object_ix_hashsz; + while (0 < object_ix[i]) { + if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20)) + return i; + if (++i == object_ix_hashsz) + i = 0; + } + return -1 - i; +} + +static struct object_entry *locate_object_entry(const unsigned char *sha1) +{ + int i; + + if (!object_ix_hashsz) + return NULL; + + i = locate_object_entry_hash(sha1); + if (0 <= i) + return &objects[object_ix[i]-1]; + return NULL; +} + +static void rehash_objects(void) { + int i; + struct object_entry *oe; + + object_ix_hashsz = nr_objects * 3; + if (object_ix_hashsz < 1024) + object_ix_hashsz = 1024; + object_ix = xrealloc(object_ix, sizeof(int) * object_ix_hashsz); + object_ix = memset(object_ix, 0, sizeof(int) * object_ix_hashsz); + for (i = 0, oe = objects; i < nr_objects; i++, oe++) { + int ix = locate_object_entry_hash(oe->sha1); + if (0 <= ix) + continue; + ix = -1 - ix; + object_ix[ix] = i + 1; + } +} + +static int add_object_entry(const unsigned char *sha1, const char *name, int exclude) +{ + unsigned int hash = 0; unsigned int idx = nr_objects; struct object_entry *entry; struct packed_git *p; unsigned int found_offset = 0; struct packed_git *found_pack = NULL; - - for (p = packed_git; p; p = p->next) { - struct pack_entry e; - if (find_pack_entry_one(sha1, &e, p)) { - if (incremental) - return 0; - if (local && !p->pack_local) - return 0; - if (!found_pack) { - found_offset = e.offset; - found_pack = e.p; + int ix; + + if (!exclude) { + for (p = packed_git; p; p = p->next) { + struct pack_entry e; + if (find_pack_entry_one(sha1, &e, p)) { + if (incremental) + return 0; + if (local && !p->pack_local) + return 0; + if (!found_pack) { + found_offset = e.offset; + found_pack = e.p; + } } } } + if ((entry = locate_object_entry(sha1)) != NULL) + goto already_added; + + while (*name) { + unsigned char c = *name++; + if (isspace(c)) + continue; + hash = hash * 11 + c; + } if (idx >= nr_alloc) { unsigned int needed = (idx + 1024) * 3 / 2; @@ -406,45 +480,79 @@ static int add_object_entry(unsigned char *sha1, unsigned int hash) nr_alloc = needed; } entry = objects + idx; + nr_objects = idx + 1; memset(entry, 0, sizeof(*entry)); memcpy(entry->sha1, sha1, 20); entry->hash = hash; - if (found_pack) { - entry->in_pack = found_pack; - entry->in_pack_offset = found_offset; + + if (object_ix_hashsz * 3 <= nr_objects * 4) + rehash_objects(); + else { + ix = locate_object_entry_hash(entry->sha1); + if (0 <= ix) + die("internal error in object hashing."); + object_ix[-1 - ix] = idx + 1; + } + + already_added: + if (exclude) + entry->preferred_base = 1; + else { + if (found_pack) { + entry->in_pack = found_pack; + entry->in_pack_offset = found_offset; + } } - nr_objects = idx+1; return 1; } -static int locate_object_entry_hash(unsigned char *sha1) +static void add_pbase_tree(struct tree_desc *tree) { - int i; - unsigned int ui; - memcpy(&ui, sha1, sizeof(unsigned int)); - i = ui % object_ix_hashsz; - while (0 < object_ix[i]) { - if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20)) - return i; - if (++i == object_ix_hashsz) - i = 0; + while (tree->size) { + const unsigned char *sha1; + const char *name; + unsigned mode; + unsigned long size; + char type[20]; + + sha1 = tree_entry_extract(tree, &name, &mode); + update_tree_entry(tree); + if (!has_sha1_file(sha1)) + continue; + if (sha1_object_info(sha1, type, &size)) + continue; + add_object_entry(sha1, name, 1); + if (!strcmp(type, "tree")) { + struct tree_desc sub; + void *elem; + elem = read_sha1_file(sha1, type, &sub.size); + sub.buf = elem; + if (sub.buf) { + add_pbase_tree(&sub); + free(elem); + } + } } - return -1 - i; } -static struct object_entry *locate_object_entry(unsigned char *sha1) +static void add_preferred_base(unsigned char *sha1) { - int i = locate_object_entry_hash(sha1); - if (0 <= i) - return &objects[object_ix[i]-1]; - return NULL; + struct tree_desc tree; + void *elem; + elem = read_object_with_reference(sha1, "tree", &tree.size, NULL); + tree.buf = elem; + if (!tree.buf) + return; + add_object_entry(sha1, "", 1); + add_pbase_tree(&tree); + free(elem); } static void check_object(struct object_entry *entry) { char type[20]; - if (entry->in_pack) { + if (entry->in_pack && !entry->preferred_base) { unsigned char base[20]; unsigned long size; struct object_entry *base_entry; @@ -463,7 +571,8 @@ static void check_object(struct object_entry *entry) */ if (!no_reuse_delta && entry->in_pack_type == OBJ_DELTA && - (base_entry = locate_object_entry(base))) { + (base_entry = locate_object_entry(base)) && + (!base_entry->preferred_base)) { /* Depth value does not matter - find_deltas() * will never consider reused delta as the @@ -501,25 +610,6 @@ static void check_object(struct object_entry *entry) sha1_to_hex(entry->sha1), type); } -static void hash_objects(void) -{ - int i; - struct object_entry *oe; - - object_ix_hashsz = nr_objects * 2; - object_ix = xcalloc(sizeof(int), object_ix_hashsz); - for (i = 0, oe = objects; i < nr_objects; i++, oe++) { - int ix = locate_object_entry_hash(oe->sha1); - if (0 <= ix) { - error("the same object '%s' added twice", - sha1_to_hex(oe->sha1)); - continue; - } - ix = -1 - ix; - object_ix[ix] = i + 1; - } -} - static unsigned int check_delta_limit(struct object_entry *me, unsigned int n) { struct object_entry *child = me->delta_child; @@ -538,7 +628,6 @@ static void get_object_details(void) int i; struct object_entry *entry; - hash_objects(); prepare_pack_ix(); for (i = 0, entry = objects; i < nr_objects; i++, entry++) check_object(entry); @@ -576,6 +665,24 @@ static int sha1_sort(const struct object_entry *a, const struct object_entry *b) return memcmp(a->sha1, b->sha1, 20); } +static struct object_entry **create_final_object_list() +{ + struct object_entry **list; + int i, j; + + for (i = nr_result = 0; i < nr_objects; i++) + if (!objects[i].preferred_base) + nr_result++; + list = xmalloc(nr_result * sizeof(struct object_entry *)); + for (i = j = 0; i < nr_objects; i++) { + if (!objects[i].preferred_base) + list[j++] = objects + i; + } + current_sort = sha1_sort; + qsort(list, nr_result, sizeof(struct object_entry *), sort_comparator); + return list; +} + static int type_size_sort(const struct object_entry *a, const struct object_entry *b) { if (a->type < b->type) @@ -586,6 +693,10 @@ static int type_size_sort(const struct object_entry *a, const struct object_entr return -1; if (a->hash > b->hash) return 1; + if (a->preferred_base < b->preferred_base) + return -1; + if (a->preferred_base > b->preferred_base) + return 1; if (a->size < b->size) return -1; if (a->size > b->size) @@ -610,6 +721,8 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de { struct object_entry *cur_entry = cur->entry; struct object_entry *old_entry = old->entry; + int old_preferred = (old_entry->preferred_base || + old_entry->based_on_preferred); unsigned long size, oldsize, delta_size, sizediff; long max_size; void *delta_buf; @@ -618,9 +731,15 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de if (cur_entry->type != old_entry->type) return -1; - /* If the current object is at edge, take the depth the objects - * that depend on the current object into account -- otherwise - * they would become too deep. + /* We do not compute delta to *create* objects we are not + * going to pack. + */ + if (cur_entry->preferred_base) + return -1; + + /* If the current object is at pack edge, take the depth the + * objects that depend on the current object into account -- + * otherwise they would become too deep. */ if (cur_entry->delta_child) { if (max_depth <= cur_entry->delta_limit) @@ -646,8 +765,27 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de * delete). */ max_size = size / 2 - 20; - if (cur_entry->delta) - max_size = cur_entry->delta_size-1; + if (cur_entry->delta) { + if (cur_entry->based_on_preferred) { + if (old_preferred) + max_size = cur_entry->delta_size-1; + else + /* trying with non-preferred one when we + * already have a delta based on preferred + * one is pointless. + */ + return 0; + } + else if (!old_preferred) + max_size = cur_entry->delta_size-1; + else + /* otherwise... even if delta with a + * preferred one produces a bigger result than + * what we currently have, which is based on a + * non-preferred one, it is OK. + */ + ; + } if (sizediff >= max_size) return -1; delta_buf = diff_delta(old->data, oldsize, @@ -657,6 +795,7 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de cur_entry->delta = old_entry; cur_entry->delta_size = delta_size; cur_entry->depth = old_entry->depth + 1; + cur_entry->based_on_preferred = old_preferred; free(delta_buf); return 0; } @@ -722,7 +861,7 @@ static void find_deltas(struct object_entry **list, int window, int depth) static void prepare_pack(int window, int depth) { if (progress) - fprintf(stderr, "Packing %d objects", nr_objects); + fprintf(stderr, "Packing %d objects", nr_result); get_object_details(); if (progress) fputc('.', stderr); @@ -861,8 +1000,6 @@ int main(int argc, char **argv) gettimeofday(&prev_tv, NULL); } while (fgets(line, sizeof(line), stdin) != NULL) { - unsigned int hash; - char *p; unsigned char sha1[20]; if (progress && (eye_candy <= nr_objects)) { @@ -881,31 +1018,32 @@ int main(int argc, char **argv) } eye_candy += eye_candy_incr; } + if (line[0] == '-') { + if (get_sha1_hex(line+1, sha1)) + die("expected edge sha1, got garbage:\n %s", + line+1); + add_preferred_base(sha1); + continue; + } if (get_sha1_hex(line, sha1)) die("expected sha1, got garbage:\n %s", line); - hash = 0; - p = line+40; - while (*p) { - unsigned char c = *p++; - if (isspace(c)) - continue; - hash = hash * 11 + c; - } - add_object_entry(sha1, hash); + add_object_entry(sha1, line+40, 0); } if (progress) fprintf(stderr, "Done counting %d objects.\n", nr_objects); if (non_empty && !nr_objects) return 0; - sorted_by_sha = create_sorted_list(sha1_sort); + sorted_by_sha = create_final_object_list(); SHA1_Init(&ctx); list = sorted_by_sha; - for (i = 0; i < nr_objects; i++) { + for (i = 0; i < nr_result; i++) { struct object_entry *entry = *list++; SHA1_Update(&ctx, entry->sha1, 20); } SHA1_Final(object_list_sha1, &ctx); + if (progress && (nr_objects != nr_result)) + fprintf(stderr, "Result has %d objects.\n", nr_result); if (reuse_cached_pack(object_list_sha1, pack_to_stdout)) ; @@ -918,6 +1056,6 @@ int main(int argc, char **argv) } if (progress) fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n", - nr_objects, written, written_delta, reused, reused_delta); + nr_result, written, written_delta, reused, reused_delta); return 0; } -- cgit v1.2.1 From 2245be3e7a5a2999ebf7d38e569c98994b0cda31 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 19 Feb 2006 15:03:49 -0800 Subject: send-pack --thin: use "thin pack" delta transfer. The new flag loosens the usual "self containedness" requirment of packfiles, and sends deltified representation of objects when we know the other side has the base objects needed to unpack them. This would help reducing the transfer size. Signed-off-by: Junio C Hamano --- send-pack.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/send-pack.c b/send-pack.c index 990be3f1a..ad22da56e 100644 --- a/send-pack.c +++ b/send-pack.c @@ -12,6 +12,7 @@ static const char *exec = "git-receive-pack"; static int verbose = 0; static int send_all = 0; static int force_update = 0; +static int use_thin_pack = 0; static int is_zero_sha1(const unsigned char *sha1) { @@ -41,7 +42,10 @@ static void exec_rev_list(struct ref *refs) int i = 0; args[i++] = "rev-list"; /* 0 */ - args[i++] = "--objects"; /* 1 */ + if (use_thin_pack) /* 1 */ + args[i++] = "--objects-edge"; + else + args[i++] = "--objects"; while (refs) { char *buf = malloc(100); if (i > 900) @@ -361,6 +365,10 @@ int main(int argc, char **argv) verbose = 1; continue; } + if (!strcmp(arg, "--thin")) { + use_thin_pack = 1; + continue; + } usage(send_pack_usage); } if (!dest) { -- cgit v1.2.1 From a79a27636098be2b9652f59bd447ac074f741e26 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 20 Feb 2006 00:09:41 -0800 Subject: Add git-push --thin. Maybe we would want to make this default before it graduates to the master branch, but in the meantime to help testing things, this allows you to say "git push --thin destination". Signed-off-by: Junio C Hamano --- git-push.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/git-push.sh b/git-push.sh index 706db9933..73dcf067c 100755 --- a/git-push.sh +++ b/git-push.sh @@ -8,6 +8,7 @@ USAGE='[--all] [--tags] [--force] [...]' has_all= has_force= has_exec= +has_thin= remote= do_tags= @@ -22,6 +23,8 @@ do has_force=--force ;; --exec=*) has_exec="$1" ;; + --thin) + has_thin="$1" ;; -*) usage ;; *) @@ -72,6 +75,7 @@ set x "$remote" "$@"; shift test "$has_all" && set x "$has_all" "$@" && shift test "$has_force" && set x "$has_force" "$@" && shift test "$has_exec" && set x "$has_exec" "$@" && shift +test "$has_thin" && set x "$has_thin" "$@" && shift case "$remote" in http://* | https://*) -- cgit v1.2.1 From b19696c2e7c3e753777189100b2ac09c9e04080b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 20 Feb 2006 00:38:39 -0800 Subject: Use thin pack transfer in "git fetch". Signed-off-by: Junio C Hamano --- fetch-pack.c | 15 +++++++++++---- git-fetch.sh | 2 +- upload-pack.c | 11 ++++++++--- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/fetch-pack.c b/fetch-pack.c index aa6f42ae1..09738fee9 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -8,7 +8,7 @@ static int keep_pack; static int quiet; static int verbose; static const char fetch_pack_usage[] = -"git-fetch-pack [-q] [-v] [-k] [--exec=upload-pack] [host:]directory ..."; +"git-fetch-pack [-q] [-v] [-k] [--thin] [--exec=upload-pack] [host:]directory ..."; static const char *exec = "git-upload-pack"; #define COMPLETE (1U << 0) @@ -18,7 +18,7 @@ static const char *exec = "git-upload-pack"; #define POPPED (1U << 4) static struct commit_list *rev_list = NULL; -static int non_common_revs = 0, multi_ack = 0; +static int non_common_revs = 0, multi_ack = 0, use_thin_pack = 0; static void rev_list_push(struct commit *commit, int mark) { @@ -156,8 +156,9 @@ static int find_common(int fd[2], unsigned char *result_sha1, continue; } - packet_write(fd[1], "want %s%s\n", sha1_to_hex(remote), - multi_ack ? " multi_ack" : ""); + packet_write(fd[1], "want %s%s%s\n", sha1_to_hex(remote), + (multi_ack ? " multi_ack" : ""), + (use_thin_pack ? " thin-pack" : "")); fetching++; } packet_flush(fd[1]); @@ -421,6 +422,10 @@ int main(int argc, char **argv) keep_pack = 1; continue; } + if (!strcmp("--thin", arg)) { + use_thin_pack = 1; + continue; + } if (!strcmp("-v", arg)) { verbose = 1; continue; @@ -434,6 +439,8 @@ int main(int argc, char **argv) } if (!dest) usage(fetch_pack_usage); + if (keep_pack) + use_thin_pack = 0; pid = git_connect(fd, dest, exec); if (pid < 0) return 1; diff --git a/git-fetch.sh b/git-fetch.sh index b4325d9d9..23d965f32 100755 --- a/git-fetch.sh +++ b/git-fetch.sh @@ -320,7 +320,7 @@ fetch_main () { ( : subshell because we muck with IFS IFS=" $LF" ( - git-fetch-pack $exec $keep "$remote" $rref || echo failed "$remote" + git-fetch-pack $exec $keep --thin "$remote" $rref || echo failed "$remote" ) | while read sha1 remote_name do diff --git a/upload-pack.c b/upload-pack.c index d1980556c..3cdf4288b 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -14,6 +14,7 @@ static const char upload_pack_usage[] = "git-upload-pack [--strict] [--timeout=n #define MAX_HAS 256 #define MAX_NEEDS 256 static int nr_has = 0, nr_needs = 0, multi_ack = 0, nr_our_refs = 0; +static int use_thin_pack = 0; static unsigned char has_sha1[MAX_HAS][20]; static unsigned char needs_sha1[MAX_NEEDS][20]; static unsigned int timeout = 0; @@ -49,8 +50,10 @@ static void create_pack_file(void) char *buf; char **p; - if (create_full_pack) + if (create_full_pack) { args = 10; + use_thin_pack = 0; /* no point doing it */ + } else args = nr_has + nr_needs + 5; argv = xmalloc(args * sizeof(char *)); @@ -62,7 +65,7 @@ static void create_pack_file(void) close(fd[0]); close(fd[1]); *p++ = "rev-list"; - *p++ = "--objects"; + *p++ = use_thin_pack ? "--objects-edge" : "--objects"; if (create_full_pack || MAX_NEEDS <= nr_needs) *p++ = "--all"; else { @@ -192,6 +195,8 @@ static int receive_needs(void) "expected to get sha, not '%s'", line); if (strstr(line+45, "multi_ack")) multi_ack = 1; + if (strstr(line+45, "thin-pack")) + use_thin_pack = 1; /* We have sent all our refs already, and the other end * should have chosen out of them; otherwise they are @@ -213,7 +218,7 @@ static int receive_needs(void) static int send_ref(const char *refname, const unsigned char *sha1) { - static char *capabilities = "multi_ack"; + static char *capabilities = "multi_ack thin-pack"; struct object *o = parse_object(sha1); if (capabilities) -- cgit v1.2.1 From b925410d10fce5e0d4182847f99e8c2df048bde1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 22 Feb 2006 21:45:45 -0800 Subject: pack-objects: thin pack micro-optimization. Since we sort objects by type, hash, preferredness and then size, after we have a delta against preferred base, there is no point trying a delta with non-preferred base. This seems to save expensive calls to diff-delta and it also seems to save the output space as well. Signed-off-by: Junio C Hamano --- pack-objects.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pack-objects.c b/pack-objects.c index ceb107f63..af3bdf5d3 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -447,7 +447,7 @@ static int add_object_entry(const unsigned char *sha1, const char *name, int exc struct packed_git *p; unsigned int found_offset = 0; struct packed_git *found_pack = NULL; - int ix; + int ix, status = 0; if (!exclude) { for (p = packed_git; p; p = p->next) { @@ -493,6 +493,7 @@ static int add_object_entry(const unsigned char *sha1, const char *name, int exc die("internal error in object hashing."); object_ix[-1 - ix] = idx + 1; } + status = 1; already_added: if (exclude) @@ -503,7 +504,7 @@ static int add_object_entry(const unsigned char *sha1, const char *name, int exc entry->in_pack_offset = found_offset; } } - return 1; + return status; } static void add_pbase_tree(struct tree_desc *tree) @@ -521,7 +522,10 @@ static void add_pbase_tree(struct tree_desc *tree) continue; if (sha1_object_info(sha1, type, &size)) continue; - add_object_entry(sha1, name, 1); + + if (!add_object_entry(sha1, name, 1)) + continue; + if (!strcmp(type, "tree")) { struct tree_desc sub; void *elem; @@ -543,8 +547,8 @@ static void add_preferred_base(unsigned char *sha1) tree.buf = elem; if (!tree.buf) return; - add_object_entry(sha1, "", 1); - add_pbase_tree(&tree); + if (add_object_entry(sha1, "", 1)) + add_pbase_tree(&tree); free(elem); } @@ -774,7 +778,7 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de * already have a delta based on preferred * one is pointless. */ - return 0; + return -1; } else if (!old_preferred) max_size = cur_entry->delta_size-1; -- cgit v1.2.1 From 1d6b38cc76c348e2477506ca9759fc241e3d0d46 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 22 Feb 2006 22:10:24 -0800 Subject: pack-objects: use full pathname to help hashing with "thin" pack. This uses the same hashing algorithm to the "preferred base tree" objects and the incoming pathnames, to group the same files from different revs together, while spreading files with the same basename in different directories. Signed-off-by: Junio C Hamano --- pack-objects.c | 60 ++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/pack-objects.c b/pack-objects.c index af3bdf5d3..3a16b7e4c 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -439,9 +439,37 @@ static void rehash_objects(void) } } -static int add_object_entry(const unsigned char *sha1, const char *name, int exclude) +struct name_path { + struct name_path *up; + const char *elem; + int len; +}; + +static unsigned name_hash(struct name_path *path, const char *name) +{ + struct name_path *p = path; + const char *n = name + strlen(name); + unsigned hash = 0; + + if (n != name && n[-1] == '\n') + n--; + while (name <= --n) { + unsigned char c = *n; + hash = hash * 11 + c; + } + for (p = path; p; p = p->up) { + hash = hash * 11 + '/'; + n = p->elem + p->len; + while (p->elem <= --n) { + unsigned char c = *n; + hash = hash * 11 + c; + } + } + return hash; +} + +static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclude) { - unsigned int hash = 0; unsigned int idx = nr_objects; struct object_entry *entry; struct packed_git *p; @@ -467,13 +495,6 @@ static int add_object_entry(const unsigned char *sha1, const char *name, int exc if ((entry = locate_object_entry(sha1)) != NULL) goto already_added; - while (*name) { - unsigned char c = *name++; - if (isspace(c)) - continue; - hash = hash * 11 + c; - } - if (idx >= nr_alloc) { unsigned int needed = (idx + 1024) * 3 / 2; objects = xrealloc(objects, needed * sizeof(*entry)); @@ -507,12 +528,12 @@ static int add_object_entry(const unsigned char *sha1, const char *name, int exc return status; } -static void add_pbase_tree(struct tree_desc *tree) +static void add_pbase_tree(struct tree_desc *tree, struct name_path *up) { while (tree->size) { const unsigned char *sha1; const char *name; - unsigned mode; + unsigned mode, hash; unsigned long size; char type[20]; @@ -523,16 +544,22 @@ static void add_pbase_tree(struct tree_desc *tree) if (sha1_object_info(sha1, type, &size)) continue; - if (!add_object_entry(sha1, name, 1)) + hash = name_hash(up, name); + if (!add_object_entry(sha1, hash, 1)) continue; if (!strcmp(type, "tree")) { struct tree_desc sub; void *elem; + struct name_path me; + elem = read_sha1_file(sha1, type, &sub.size); sub.buf = elem; if (sub.buf) { - add_pbase_tree(&sub); + me.up = up; + me.elem = name; + me.len = strlen(name); + add_pbase_tree(&sub, &me); free(elem); } } @@ -543,12 +570,13 @@ static void add_preferred_base(unsigned char *sha1) { struct tree_desc tree; void *elem; + elem = read_object_with_reference(sha1, "tree", &tree.size, NULL); tree.buf = elem; if (!tree.buf) return; - if (add_object_entry(sha1, "", 1)) - add_pbase_tree(&tree); + if (add_object_entry(sha1, name_hash(NULL, ""), 1)) + add_pbase_tree(&tree, NULL); free(elem); } @@ -1031,7 +1059,7 @@ int main(int argc, char **argv) } if (get_sha1_hex(line, sha1)) die("expected sha1, got garbage:\n %s", line); - add_object_entry(sha1, line+40, 0); + add_object_entry(sha1, name_hash(NULL, line+41), 0); } if (progress) fprintf(stderr, "Done counting %d objects.\n", nr_objects); -- cgit v1.2.1 From b76f6b627802d0a3c8bbf66fba0c090dbe56d509 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 23 Feb 2006 23:04:52 -0800 Subject: pack-objects: allow "thin" packs to exceed depth limits When creating a new pack to be used in .git/objects/pack/ directory, we carefully count the depth of deltified objects to be reused, so that the generated pack does not to exceed the specified depth limit for runtime efficiency. However, when we are generating a thin pack that does not contain base objects, such a pack can only be used during network transfer that is expanded on the other end upon reception, so being careful and artificially cutting the delta chain does not buy us anything except increased bandwidth requirement. This patch disables the delta chain depth limit check when reusing an existing delta. Signed-off-by: Junio C Hamano --- pack-objects.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/pack-objects.c b/pack-objects.c index 3a16b7e4c..2320bcf31 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -663,10 +663,23 @@ static void get_object_details(void) prepare_pack_ix(); for (i = 0, entry = objects; i < nr_objects; i++, entry++) check_object(entry); - for (i = 0, entry = objects; i < nr_objects; i++, entry++) - if (!entry->delta && entry->delta_child) - entry->delta_limit = - check_delta_limit(entry, 1); + + if (nr_objects == nr_result) { + /* + * Depth of objects that depend on the entry -- this + * is subtracted from depth-max to break too deep + * delta chain because of delta data reusing. + * However, we loosen this restriction when we know we + * are creating a thin pack -- it will have to be + * expanded on the other end anyway, so do not + * artificially cut the delta chain and let it go as + * deep as it wants. + */ + for (i = 0, entry = objects; i < nr_objects; i++, entry++) + if (!entry->delta && entry->delta_child) + entry->delta_limit = + check_delta_limit(entry, 1); + } } typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *); -- cgit v1.2.1 From eb38cc689e84a8fd01c1856e889fe8d3b4f1bfb4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 23 Feb 2006 23:44:15 -0800 Subject: rev-list --objects-edge: remove duplicated edge commit output. Signed-off-by: Junio C Hamano --- rev-list.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rev-list.c b/rev-list.c index 373549e59..b5de0759f 100644 --- a/rev-list.c +++ b/rev-list.c @@ -440,8 +440,10 @@ static void mark_edge_parents_uninteresting(struct commit *commit) if (!(parent->object.flags & UNINTERESTING)) continue; mark_tree_uninteresting(parent->tree); - if (edge_hint) + if (edge_hint && !(parent->object.flags & SHOWN)) { + parent->object.flags |= SHOWN; printf("-%s\n", sha1_to_hex(parent->object.sha1)); + } } } -- cgit v1.2.1 From e646de0d14bac20ef6e156c1742b9e62fb0b9020 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 22 Feb 2006 22:10:24 -0800 Subject: rev-list --objects: use full pathname to help hashing. This helps to group the same files from different revs together, while spreading files with the same basename in different directories, to help pack-object. Signed-off-by: Junio C Hamano --- rev-list.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/rev-list.c b/rev-list.c index b5de0759f..dda6fcaa9 100644 --- a/rev-list.c +++ b/rev-list.c @@ -63,6 +63,36 @@ static int no_merges = 0; static const char **paths = NULL; static int remove_empty_trees = 0; +struct name_path { + struct name_path *up; + int elem_len; + const char *elem; +}; + +static char *path_name(struct name_path *path, const char *name) +{ + struct name_path *p; + char *n, *m; + int nlen = strlen(name); + int len = nlen + 1; + + for (p = path; p; p = p->up) { + if (p->elem_len) + len += p->elem_len + 1; + } + n = xmalloc(len); + m = n + len - (nlen + 1); + strcpy(m, name); + for (p = path; p; p = p->up) { + if (p->elem_len) { + m -= p->elem_len + 1; + memcpy(m, p->elem, p->elem_len); + m[p->elem_len] = '/'; + } + } + return n; +} + static void show_commit(struct commit *commit) { commit->object.flags |= SHOWN; @@ -174,17 +204,23 @@ static int process_commit(struct commit * commit) return CONTINUE; } -static struct object_list **add_object(struct object *obj, struct object_list **p, const char *name) +static struct object_list **add_object(struct object *obj, + struct object_list **p, + struct name_path *path, + const char *name) { struct object_list *entry = xmalloc(sizeof(*entry)); entry->item = obj; entry->next = *p; - entry->name = name; + entry->name = path_name(path, name); *p = entry; return &entry->next; } -static struct object_list **process_blob(struct blob *blob, struct object_list **p, const char *name) +static struct object_list **process_blob(struct blob *blob, + struct object_list **p, + struct name_path *path, + const char *name) { struct object *obj = &blob->object; @@ -193,13 +229,17 @@ static struct object_list **process_blob(struct blob *blob, struct object_list * if (obj->flags & (UNINTERESTING | SEEN)) return p; obj->flags |= SEEN; - return add_object(obj, p, name); + return add_object(obj, p, path, name); } -static struct object_list **process_tree(struct tree *tree, struct object_list **p, const char *name) +static struct object_list **process_tree(struct tree *tree, + struct object_list **p, + struct name_path *path, + const char *name) { struct object *obj = &tree->object; struct tree_entry_list *entry; + struct name_path me; if (!tree_objects) return p; @@ -208,15 +248,18 @@ static struct object_list **process_tree(struct tree *tree, struct object_list * if (parse_tree(tree) < 0) die("bad tree object %s", sha1_to_hex(obj->sha1)); obj->flags |= SEEN; - p = add_object(obj, p, name); + p = add_object(obj, p, path, name); + me.up = path; + me.elem = name; + me.elem_len = strlen(name); entry = tree->entries; tree->entries = NULL; while (entry) { struct tree_entry_list *next = entry->next; if (entry->directory) - p = process_tree(entry->item.tree, p, entry->name); + p = process_tree(entry->item.tree, p, &me, entry->name); else - p = process_blob(entry->item.blob, p, entry->name); + p = process_blob(entry->item.blob, p, &me, entry->name); free(entry); entry = next; } @@ -231,7 +274,7 @@ static void show_commit_list(struct commit_list *list) while (list) { struct commit *commit = pop_most_recent_commit(&list, SEEN); - p = process_tree(commit->tree, p, ""); + p = process_tree(commit->tree, p, NULL, ""); if (process_commit(commit) == STOP) break; } @@ -242,15 +285,15 @@ static void show_commit_list(struct commit_list *list) continue; if (obj->type == tag_type) { obj->flags |= SEEN; - p = add_object(obj, p, name); + p = add_object(obj, p, NULL, name); continue; } if (obj->type == tree_type) { - p = process_tree((struct tree *)obj, p, name); + p = process_tree((struct tree *)obj, p, NULL, name); continue; } if (obj->type == blob_type) { - p = process_blob((struct blob *)obj, p, name); + p = process_blob((struct blob *)obj, p, NULL, name); continue; } die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name); @@ -674,7 +717,7 @@ static struct commit_list *limit_list(struct commit_list *list) static void add_pending_object(struct object *obj, const char *name) { - add_object(obj, &pending_objects, name); + add_object(obj, &pending_objects, NULL, name); } static struct commit *get_commit_reference(const char *name, const unsigned char *sha1, unsigned int flags) -- cgit v1.2.1 From eeef7135fed9b8784627c4c96e125241c06c65e1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 23 Feb 2006 23:27:49 -0800 Subject: pack-objects: hash basename and direname a bit differently. ...so that "Makefile"s from different revs are sorted together, separate from "t/Makefile"s, but close enough. Signed-off-by: Junio C Hamano --- pack-objects.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/pack-objects.c b/pack-objects.c index 2320bcf31..095bcb828 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -445,18 +445,29 @@ struct name_path { int len; }; +#define DIRBITS 12 + static unsigned name_hash(struct name_path *path, const char *name) { struct name_path *p = path; const char *n = name + strlen(name); - unsigned hash = 0; + unsigned hash = 0, name_hash = 0, name_done = 0; if (n != name && n[-1] == '\n') n--; while (name <= --n) { unsigned char c = *n; + if (c == '/' && !name_done) { + name_hash = hash; + name_done = 1; + hash = 0; + } hash = hash * 11 + c; } + if (!name_done) { + name_hash = hash; + hash = 0; + } for (p = path; p; p = p->up) { hash = hash * 11 + '/'; n = p->elem + p->len; @@ -465,6 +476,26 @@ static unsigned name_hash(struct name_path *path, const char *name) hash = hash * 11 + c; } } + /* + * Make sure "Makefile" and "t/Makefile" are hashed separately + * but close enough. + */ + hash = (name_hash<up) { + fputc('/', stderr); + n = p->elem + p->len; + while (p->elem <= --n) + fputc(*n, stderr); + } + fprintf(stderr, "\t%08x\n", hash); + } return hash; } -- cgit v1.2.1