From ccc1297226b184c40459e9d373cc9eebfb7bd898 Mon Sep 17 00:00:00 2001 From: Brandon Casey Date: Fri, 9 May 2008 23:01:55 -0500 Subject: repack: modify behavior of -A option to leave unreferenced objects unpacked The previous behavior of the -A option was to retain any previously packed objects which had become unreferenced, and place them into the newly created pack file. Since git-gc, when run automatically with the --auto option, calls repack with the -A option, this had the effect of retaining unreferenced packed objects indefinitely. To avoid this scenario, the user was required to run git-gc with the little known --prune option or to manually run repack with the -a option. This patch changes the behavior of the -A option so that unreferenced objects that exist in any pack file being replaced, will be unpacked into the repository. The unreferenced loose objects can then be garbage collected by git-gc (i.e. git-prune) based on the gc.pruneExpire setting. Also add new tests for checking whether unreferenced objects which were previously packed are properly left in the repository unpacked after repacking. Signed-off-by: Brandon Casey Signed-off-by: Junio C Hamano --- git-repack.sh | 18 ++++++++++---- t/t7701-repack-unpack-unreachable.sh | 47 ++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 5 deletions(-) create mode 100755 t/t7701-repack-unpack-unreachable.sh diff --git a/git-repack.sh b/git-repack.sh index e18eb3f5d..a0e06ed07 100755 --- a/git-repack.sh +++ b/git-repack.sh @@ -30,7 +30,7 @@ do -n) no_update_info=t ;; -a) all_into_one=t ;; -A) all_into_one=t - keep_unreachable=--keep-unreachable ;; + keep_unreachable=t ;; -d) remove_redundant=t ;; -q) quiet=-q ;; -f) no_reuse=--no-reuse-object ;; @@ -78,9 +78,6 @@ case ",$all_into_one," in if test -z "$args" then args='--unpacked --incremental' - elif test -n "$keep_unreachable" - then - args="$args $keep_unreachable" fi ;; esac @@ -130,7 +127,18 @@ then do case " $fullbases " in *" $e "*) ;; - *) rm -f "$e.pack" "$e.idx" "$e.keep" ;; + *) + rm -f "$e.idx" "$e.keep" + if test -n "$keep_unreachable" && + test -f "$e.pack" + then + git unpack-objects < "$e.pack" || { + echo >&2 "Failed unpacking unreachable objects from redundant pack file $e.pack" + exit 1 + } + fi + rm -f "$e.pack" + ;; esac done ) diff --git a/t/t7701-repack-unpack-unreachable.sh b/t/t7701-repack-unpack-unreachable.sh new file mode 100755 index 000000000..6a5211f18 --- /dev/null +++ b/t/t7701-repack-unpack-unreachable.sh @@ -0,0 +1,47 @@ +#!/bin/sh + +test_description='git-repack works correctly' + +. ./test-lib.sh + +test_expect_success '-A option leaves unreachable objects unpacked' ' + echo content > file1 && + git add . && + git commit -m initial_commit && + # create a transient branch with unique content + git checkout -b transient_branch && + echo more content >> file1 && + # record the objects created in the database for file, commit, tree + fsha1=$(git hash-object file1) && + git commit -a -m more_content && + csha1=$(git rev-parse HEAD^{commit}) && + tsha1=$(git rev-parse HEAD^{tree}) && + git checkout master && + echo even more content >> file1 && + git commit -a -m even_more_content && + # delete the transient branch + git branch -D transient_branch && + # pack the repo + git repack -A -d -l && + # verify objects are packed in repository + test 3 = $(git verify-pack -v -- .git/objects/pack/*.idx | + grep -e "^$fsha1 " -e "^$csha1 " -e "^$tsha1 " | + sort | uniq | wc -l) && + git show $fsha1 && + git show $csha1 && + git show $tsha1 && + # now expire the reflog + sleep 1 && + git reflog expire --expire-unreachable=now --all && + # and repack + git repack -A -d -l && + # verify objects are retained unpacked + test 0 = $(git verify-pack -v -- .git/objects/pack/*.idx | + grep -e "^$fsha1 " -e "^$csha1 " -e "^$tsha1 " | + sort | uniq | wc -l) && + git show $fsha1 && + git show $csha1 && + git show $tsha1 +' + +test_done -- cgit v1.2.1 From a37cce3b23afa2c88baa2f1369c585e05bcf4526 Mon Sep 17 00:00:00 2001 From: Brandon Casey Date: Fri, 9 May 2008 23:01:56 -0500 Subject: git-gc: always use -A when manually repacking Now that repack -A will leave unreferenced objects unpacked, there is no reason to use the -a option to repack (which will discard unreferenced objects). The unpacked unreferenced objects will not be repacked by a subsequent repack, and will eventually be pruned by git-gc based on the gc.pruneExpire config option. --- builtin-gc.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/builtin-gc.c b/builtin-gc.c index f99ebc792..6db2f513b 100644 --- a/builtin-gc.c +++ b/builtin-gc.c @@ -256,17 +256,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix) "performance. You may also\n" "run \"git gc\" manually. See " "\"git help gc\" for more information.\n"); - } else { - /* - * Use safer (for shared repos) "-A" option to - * repack when not pruning. Auto-gc makes its - * own decision. - */ - if (prune) - append_option(argv_repack, "-a", MAX_ADD); - else - append_option(argv_repack, "-A", MAX_ADD); - } + } else + append_option(argv_repack, "-A", MAX_ADD); if (pack_refs && run_command_v_opt(argv_pack_refs, RUN_GIT_CMD)) return error(FAILED_RUN, argv_pack_refs[0]); -- cgit v1.2.1 From 9e7d501990a9c63db264f3185fc311f446196427 Mon Sep 17 00:00:00 2001 From: Brandon Casey Date: Fri, 9 May 2008 23:01:57 -0500 Subject: builtin-gc.c: deprecate --prune, it now really has no effect --- builtin-gc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/builtin-gc.c b/builtin-gc.c index 6db2f513b..48f7d95f9 100644 --- a/builtin-gc.c +++ b/builtin-gc.c @@ -219,7 +219,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) char buf[80]; struct option builtin_gc_options[] = { - OPT_BOOLEAN(0, "prune", &prune, "prune unreferenced objects"), + OPT_BOOLEAN(0, "prune", &prune, "prune unreferenced objects (deprecated)"), OPT_BOOLEAN(0, "aggressive", &aggressive, "be more thorough (increased runtime)"), OPT_BOOLEAN(0, "auto", &auto_gc, "enable auto-gc mode"), OPT_BOOLEAN('q', "quiet", &quiet, "suppress progress reports"), @@ -249,7 +249,6 @@ int cmd_gc(int argc, const char **argv, const char *prefix) /* * Auto-gc should be least intrusive as possible. */ - prune = 0; if (!need_to_gc()) return 0; fprintf(stderr, "Auto packing your repository for optimum " -- cgit v1.2.1 From bbac73117ebed9f02ccae3df45f4baa0c793dab7 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 14 May 2008 01:32:48 -0400 Subject: add a force_object_loose() function This is meant to force the creation of a loose object even if it already exists packed. Needed for the next commit. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- cache.h | 1 + sha1_file.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/cache.h b/cache.h index 7fb8f3359..dcc7e9882 100644 --- a/cache.h +++ b/cache.h @@ -506,6 +506,7 @@ extern void * read_sha1_file(const unsigned char *sha1, enum object_type *type, extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1); extern int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *return_sha1); extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *); +extern int force_object_loose(const unsigned char *sha1, time_t mtime); extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type); diff --git a/sha1_file.c b/sha1_file.c index 3516777bc..141f5a713 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2102,26 +2102,16 @@ int hash_sha1_file(const void *buf, unsigned long len, const char *type, return 0; } -int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *returnsha1) +static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen, + void *buf, unsigned long len, time_t mtime) { - int size, ret; + int fd, size, ret; unsigned char *compressed; z_stream stream; - unsigned char sha1[20]; char *filename; static char tmpfile[PATH_MAX]; - char hdr[32]; - int fd, hdrlen; - /* Normally if we have it in the pack then we do not bother writing - * it out into .git/objects/??/?{38} file. - */ - write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen); filename = sha1_file_name(sha1); - if (returnsha1) - hashcpy(returnsha1, sha1); - if (has_sha1_file(sha1)) - return 0; fd = open(filename, O_RDONLY); if (fd >= 0) { /* @@ -2182,9 +2172,53 @@ int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned cha die("unable to write sha1 file"); free(compressed); + if (mtime) { + struct utimbuf utb; + utb.actime = mtime; + utb.modtime = mtime; + if (utime(tmpfile, &utb) < 0) + warning("failed utime() on %s: %s", + tmpfile, strerror(errno)); + } + return move_temp_to_file(tmpfile, filename); } +int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *returnsha1) +{ + unsigned char sha1[20]; + char hdr[32]; + int hdrlen; + + /* Normally if we have it in the pack then we do not bother writing + * it out into .git/objects/??/?{38} file. + */ + write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen); + if (returnsha1) + hashcpy(returnsha1, sha1); + if (has_sha1_file(sha1)) + return 0; + return write_loose_object(sha1, hdr, hdrlen, buf, len, 0); +} + +int force_object_loose(const unsigned char *sha1, time_t mtime) +{ + struct stat st; + void *buf; + unsigned long len; + enum object_type type; + char hdr[32]; + int hdrlen; + + if (find_sha1_file(sha1, &st)) + return 0; + buf = read_packed_sha1(sha1, &type, &len); + if (!buf) + return error("cannot read sha1_file for %s", sha1_to_hex(sha1)); + hdrlen = sprintf(hdr, "%s %lu", typename(type), len) + 1; + return write_loose_object(sha1, hdr, hdrlen, buf, len, mtime); +} + /* * We need to unpack and recompress the object for writing * it out to a different file. -- cgit v1.2.1 From ca11b212eb3e31d6fee12e9974c67dc774c1bc7c Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 14 May 2008 01:33:53 -0400 Subject: let pack-objects do the writing of unreachable objects as loose objects Commit ccc1297226b184c40459e9d373cc9eebfb7bd898 changed the behavior of 'git repack -A' so unreachable objects are stored as loose objects. However it did so in a naive and inn efficient way by making packs about to be deleted inaccessible and feeding their content through 'git unpack-objects'. While this works, there are major flaws with this approach: - It is unacceptably sloooooooooooooow. In the Linux kernel repository with no actual unreachable objects, doing 'git repack -A -d' before: real 2m33.220s user 2m21.675s sys 0m3.510s And with this change: real 0m36.849s user 0m24.365s sys 0m1.950s For reference, here's the timing for 'git repack -a -d': real 0m35.816s user 0m22.571s sys 0m2.011s This is explained by the fact that 'git unpack-objects' was used to unpack _every_ objects even if (almost) 100% of them were thrown away. - There is a black out period. Between the removal of the .idx file for the redundant pack and the completion of its unpacking, the unreachable objects become completely unaccessible. This is not a big issue as we're talking about unreachable objects, but some consistency is always good. - There is no way to easily set a sensible mtime for the newly created unreachable loose objects. So, while having a command called "pack-objects" to perform object unpacking looks really odd, this is probably the best compromize to be able to solve the above issues in an efficient way. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 40 ++++++++++++++++++++++++++++++++++++++-- git-repack.sh | 22 +++++++--------------- 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 777f27266..5a10119fb 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -28,7 +28,8 @@ git-pack-objects [{ -q | --progress | --all-progress }] \n\ [--window=N] [--window-memory=N] [--depth=N] \n\ [--no-reuse-delta] [--no-reuse-object] [--delta-base-offset] \n\ [--threads=N] [--non-empty] [--revs [--unpacked | --all]*] [--reflog] \n\ - [--stdout | base-name] [--include-tag] [--keep-unreachable] \n\ + [--stdout | base-name] [--include-tag] \n\ + [--keep-unreachable | --unpack-unreachable] \n\ [next) { + for (i = 0; i < revs->num_ignore_packed; i++) { + if (matches_pack_name(p, revs->ignore_packed[i])) + break; + } + if (revs->num_ignore_packed <= i) + continue; + + if (open_pack_index(p)) + die("cannot open pack index"); + + for (i = 0; i < p->num_objects; i++) { + sha1 = nth_packed_object_sha1(p, i); + if (!locate_object_entry(sha1)) + if (force_object_loose(sha1, p->mtime)) + die("unable to force loose object"); + } + } +} + static void get_object_list(int ac, const char **av) { struct rev_info revs; @@ -1939,6 +1966,8 @@ static void get_object_list(int ac, const char **av) if (keep_unreachable) add_objects_in_unpacked_packs(&revs); + if (unpack_unreachable) + loosen_unused_packed_objects(&revs); } static int adjust_perm(const char *path, mode_t mode) @@ -2073,6 +2102,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) keep_unreachable = 1; continue; } + if (!strcmp("--unpack-unreachable", arg)) { + unpack_unreachable = 1; + continue; + } if (!strcmp("--include-tag", arg)) { include_tag = 1; continue; @@ -2138,6 +2171,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (!pack_to_stdout && thin) die("--thin cannot be used to build an indexable pack."); + if (keep_unreachable && unpack_unreachable) + die("--keep-unreachable and --unpack-unreachable are incompatible."); + #ifdef THREADED_DELTA_SEARCH if (!delta_search_threads) /* --threads=0 means autodetect */ delta_search_threads = online_cpus(); diff --git a/git-repack.sh b/git-repack.sh index a0e06ed07..607f217b7 100755 --- a/git-repack.sh +++ b/git-repack.sh @@ -8,7 +8,7 @@ OPTIONS_SPEC="\ git-repack [options] -- a pack everything in a single pack -A same as -a, and keep unreachable objects too +A same as -a, and turn unreachable objects loose d remove redundant packs, and run git-prune-packed f pass --no-reuse-delta to git-pack-objects q,quiet be quiet @@ -22,7 +22,7 @@ max-pack-size= maximum size of each packfile SUBDIRECTORY_OK='Yes' . git-sh-setup -no_update_info= all_into_one= remove_redundant= keep_unreachable= +no_update_info= all_into_one= remove_redundant= unpack_unreachable= local= quiet= no_reuse= extra= while test $# != 0 do @@ -30,7 +30,7 @@ do -n) no_update_info=t ;; -a) all_into_one=t ;; -A) all_into_one=t - keep_unreachable=t ;; + unpack_unreachable=--unpack-unreachable ;; -d) remove_redundant=t ;; -q) quiet=-q ;; -f) no_reuse=--no-reuse-object ;; @@ -78,6 +78,9 @@ case ",$all_into_one," in if test -z "$args" then args='--unpacked --incremental' + elif test -n "$unpack_unreachable" + then + args="$args $unpack_unreachable" fi ;; esac @@ -127,18 +130,7 @@ then do case " $fullbases " in *" $e "*) ;; - *) - rm -f "$e.idx" "$e.keep" - if test -n "$keep_unreachable" && - test -f "$e.pack" - then - git unpack-objects < "$e.pack" || { - echo >&2 "Failed unpacking unreachable objects from redundant pack file $e.pack" - exit 1 - } - fi - rm -f "$e.pack" - ;; + *) rm -f "$e.pack" "$e.idx" "$e.keep" ;; esac done ) -- cgit v1.2.1 From bbefaa1f38a47e06c32612edc2f64a0168cc40b2 Mon Sep 17 00:00:00 2001 From: Chris Frey Date: Thu, 15 May 2008 22:37:31 -0400 Subject: Documentation/git-repack.txt: document new -A behaviour Add paragraph for the -A option, and describe the new behaviour that makes unreachable objects loose. Signed-off-by: Chris Frey Signed-off-by: Junio C Hamano --- Documentation/git-repack.txt | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt index 3d957492f..906d3c705 100644 --- a/Documentation/git-repack.txt +++ b/Documentation/git-repack.txt @@ -8,7 +8,7 @@ git-repack - Pack unpacked objects in a repository SYNOPSIS -------- -'git-repack' [-a] [-d] [-f] [-l] [-n] [-q] [--window=N] [--depth=N] +'git-repack' [-a] [-A] [-d] [-f] [-l] [-n] [-q] [--window=N] [--depth=N] DESCRIPTION ----------- @@ -37,6 +37,18 @@ OPTIONS leaves behind, but `git fsck --full` shows as dangling. +-A:: + Same as `-a`, but any unreachable objects in a previous + pack become loose, unpacked objects, instead of being + left in the old pack. Unreachable objects are never + intentionally added to a pack, even when repacking. + When used with '-d', this option + prevents unreachable objects from being immediately + deleted by way of being left in the old pack and then + removed. Instead, the loose unreachable objects + will be pruned according to normal expiry rules + with the next linkgit:git-gc[1]. + -d:: After packing, if the newly created packs make some existing packs redundant, remove the redundant packs. -- cgit v1.2.1