aboutsummaryrefslogtreecommitdiff
path: root/builtin
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2014-10-29 10:07:56 -0700
committerJunio C Hamano <gitster@pobox.com>2014-10-29 10:07:56 -0700
commitd70e331c0e8eaeb0bd75ae3020c3be71de075ff7 (patch)
tree645e25685926321704394063059a928d7492a9e8 /builtin
parent853878d520cd4be5a8568b5abd1826e090d3b9d9 (diff)
parent189a1222493f73977291f57d0f2030e982aff282 (diff)
downloadgit-d70e331c0e8eaeb0bd75ae3020c3be71de075ff7.tar.gz
git-d70e331c0e8eaeb0bd75ae3020c3be71de075ff7.tar.xz
Merge branch 'jk/prune-mtime'
Tighten the logic to decide that an unreachable cruft is sufficiently old by covering corner cases such as an ancient object becoming reachable and then going unreachable again, in which case its retention period should be prolonged. * jk/prune-mtime: (28 commits) drop add_object_array_with_mode revision: remove definition of unused 'add_object' function pack-objects: double-check options before discarding objects repack: pack objects mentioned by the index pack-objects: use argv_array reachable: use revision machinery's --indexed-objects code rev-list: add --indexed-objects option rev-list: document --reflog option t5516: test pushing a tag of an otherwise unreferenced blob traverse_commit_list: support pending blobs/trees with paths make add_object_array_with_context interface more sane write_sha1_file: freshen existing objects pack-objects: match prune logic for discarding objects pack-objects: refactor unpack-unreachable expiration check prune: keep objects reachable from recent objects sha1_file: add for_each iterators for loose and packed objects count-objects: use for_each_loose_file_in_objdir count-objects: do not use xsize_t when counting object size prune-packed: use for_each_loose_file_in_objdir reachable: mark index blobs as SEEN ...
Diffstat (limited to 'builtin')
-rw-r--r--builtin/count-objects.c101
-rw-r--r--builtin/grep.c8
-rw-r--r--builtin/pack-objects.c86
-rw-r--r--builtin/prune-packed.c69
-rw-r--r--builtin/prune.c89
-rw-r--r--builtin/reflog.c2
-rw-r--r--builtin/repack.c1
7 files changed, 157 insertions, 199 deletions
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index a7f70cb85..e47ef0b1a 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -11,6 +11,9 @@
static unsigned long garbage;
static off_t size_garbage;
+static int verbose;
+static unsigned long loose, packed, packed_loose;
+static off_t loose_size;
static void real_report_garbage(const char *desc, const char *path)
{
@@ -21,61 +24,31 @@ static void real_report_garbage(const char *desc, const char *path)
garbage++;
}
-static void count_objects(DIR *d, char *path, int len, int verbose,
- unsigned long *loose,
- off_t *loose_size,
- unsigned long *packed_loose)
+static void loose_garbage(const char *path)
{
- struct dirent *ent;
- while ((ent = readdir(d)) != NULL) {
- char hex[41];
- unsigned char sha1[20];
- const char *cp;
- int bad = 0;
+ if (verbose)
+ report_garbage("garbage found", path);
+}
- if (is_dot_or_dotdot(ent->d_name))
- continue;
- for (cp = ent->d_name; *cp; cp++) {
- int ch = *cp;
- if (('0' <= ch && ch <= '9') ||
- ('a' <= ch && ch <= 'f'))
- continue;
- bad = 1;
- break;
- }
- if (cp - ent->d_name != 38)
- bad = 1;
- else {
- struct stat st;
- memcpy(path + len + 3, ent->d_name, 38);
- path[len + 2] = '/';
- path[len + 41] = 0;
- if (lstat(path, &st) || !S_ISREG(st.st_mode))
- bad = 1;
- else
- (*loose_size) += xsize_t(on_disk_bytes(st));
- }
- if (bad) {
- if (verbose) {
- struct strbuf sb = STRBUF_INIT;
- strbuf_addf(&sb, "%.*s/%s",
- len + 2, path, ent->d_name);
- report_garbage("garbage found", sb.buf);
- strbuf_release(&sb);
- }
- continue;
- }
- (*loose)++;
- if (!verbose)
- continue;
- memcpy(hex, path+len, 2);
- memcpy(hex+2, ent->d_name, 38);
- hex[40] = 0;
- if (get_sha1_hex(hex, sha1))
- die("internal error");
- if (has_sha1_pack(sha1))
- (*packed_loose)++;
+static int count_loose(const unsigned char *sha1, const char *path, void *data)
+{
+ struct stat st;
+
+ if (lstat(path, &st) || !S_ISREG(st.st_mode))
+ loose_garbage(path);
+ else {
+ loose_size += on_disk_bytes(st);
+ loose++;
+ if (verbose && has_sha1_pack(sha1))
+ packed_loose++;
}
+ return 0;
+}
+
+static int count_cruft(const char *basename, const char *path, void *data)
+{
+ loose_garbage(path);
+ return 0;
}
static char const * const count_objects_usage[] = {
@@ -85,12 +58,7 @@ static char const * const count_objects_usage[] = {
int cmd_count_objects(int argc, const char **argv, const char *prefix)
{
- int i, verbose = 0, human_readable = 0;
- const char *objdir = get_object_directory();
- int len = strlen(objdir);
- char *path = xmalloc(len + 50);
- unsigned long loose = 0, packed = 0, packed_loose = 0;
- off_t loose_size = 0;
+ int human_readable = 0;
struct option opts[] = {
OPT__VERBOSE(&verbose, N_("be verbose")),
OPT_BOOL('H', "human-readable", &human_readable,
@@ -104,19 +72,10 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
usage_with_options(count_objects_usage, opts);
if (verbose)
report_garbage = real_report_garbage;
- memcpy(path, objdir, len);
- if (len && objdir[len-1] != '/')
- path[len++] = '/';
- for (i = 0; i < 256; i++) {
- DIR *d;
- sprintf(path + len, "%02x", i);
- d = opendir(path);
- if (!d)
- continue;
- count_objects(d, path, len, verbose,
- &loose, &loose_size, &packed_loose);
- closedir(d);
- }
+
+ for_each_loose_file_in_objdir(get_object_directory(),
+ count_loose, count_cruft, NULL, NULL);
+
if (verbose) {
struct packed_git *p;
unsigned long num_pack = 0;
diff --git a/builtin/grep.c b/builtin/grep.c
index c86a142f3..4063882f0 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -456,10 +456,10 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
}
static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec,
- struct object *obj, const char *name, struct object_context *oc)
+ struct object *obj, const char *name, const char *path)
{
if (obj->type == OBJ_BLOB)
- return grep_sha1(opt, obj->sha1, name, 0, oc ? oc->path : NULL);
+ return grep_sha1(opt, obj->sha1, name, 0, path);
if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) {
struct tree_desc tree;
void *data;
@@ -501,7 +501,7 @@ static int grep_objects(struct grep_opt *opt, const struct pathspec *pathspec,
for (i = 0; i < nr; i++) {
struct object *real_obj;
real_obj = deref_tag(list->objects[i].item, NULL, 0);
- if (grep_object(opt, pathspec, real_obj, list->objects[i].name, list->objects[i].context)) {
+ if (grep_object(opt, pathspec, real_obj, list->objects[i].name, list->objects[i].path)) {
hit = 1;
if (opt->status_only)
break;
@@ -821,7 +821,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
struct object *object = parse_object_or_die(sha1, arg);
if (!seen_dashdash)
verify_non_filename(prefix, arg);
- add_object_array_with_context(object, arg, &list, xmemdupz(&oc, sizeof(struct object_context)));
+ add_object_array_with_path(object, arg, &list, oc.mode, oc.path);
continue;
}
if (!strcmp(arg, "--")) {
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 78c659a6b..3f9f5c776 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -20,6 +20,9 @@
#include "streaming.h"
#include "thread-utils.h"
#include "pack-bitmap.h"
+#include "reachable.h"
+#include "sha1-array.h"
+#include "argv-array.h"
static const char *pack_usage[] = {
N_("git pack-objects --stdout [options...] [< ref-list | < object-list]"),
@@ -2406,6 +2409,27 @@ static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1)
return 0;
}
+/*
+ * Store a list of sha1s that are should not be discarded
+ * because they are either written too recently, or are
+ * reachable from another object that was.
+ *
+ * This is filled by get_object_list.
+ */
+static struct sha1_array recent_objects;
+
+static int loosened_object_can_be_discarded(const unsigned char *sha1,
+ unsigned long mtime)
+{
+ if (!unpack_unreachable_expiration)
+ return 0;
+ if (mtime > unpack_unreachable_expiration)
+ return 0;
+ if (sha1_array_lookup(&recent_objects, sha1) >= 0)
+ return 0;
+ return 1;
+}
+
static void loosen_unused_packed_objects(struct rev_info *revs)
{
struct packed_git *p;
@@ -2416,17 +2440,14 @@ static void loosen_unused_packed_objects(struct rev_info *revs)
if (!p->pack_local || p->pack_keep)
continue;
- if (unpack_unreachable_expiration &&
- p->mtime < unpack_unreachable_expiration)
- continue;
-
if (open_pack_index(p))
die("cannot open pack index");
for (i = 0; i < p->num_objects; i++) {
sha1 = nth_packed_object_sha1(p, i);
if (!packlist_find(&to_pack, sha1, NULL) &&
- !has_sha1_pack_kept_or_nonlocal(sha1))
+ !has_sha1_pack_kept_or_nonlocal(sha1) &&
+ !loosened_object_can_be_discarded(sha1, p->mtime))
if (force_object_loose(sha1, p->mtime))
die("unable to force loose object");
}
@@ -2462,6 +2483,19 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
return 0;
}
+static void record_recent_object(struct object *obj,
+ const struct name_path *path,
+ const char *last,
+ void *data)
+{
+ sha1_array_append(&recent_objects, obj->sha1);
+}
+
+static void record_recent_commit(struct commit *commit, void *data)
+{
+ sha1_array_append(&recent_objects, commit->object.sha1);
+}
+
static void get_object_list(int ac, const char **av)
{
struct rev_info revs;
@@ -2509,10 +2543,23 @@ static void get_object_list(int ac, const char **av)
mark_edges_uninteresting(&revs, show_edge);
traverse_commit_list(&revs, show_commit, show_object, NULL);
+ if (unpack_unreachable_expiration) {
+ revs.ignore_missing_links = 1;
+ if (add_unseen_recent_objects_to_traversal(&revs,
+ unpack_unreachable_expiration))
+ die("unable to add recent objects");
+ if (prepare_revision_walk(&revs))
+ die("revision walk setup failed");
+ traverse_commit_list(&revs, record_recent_commit,
+ record_recent_object, NULL);
+ }
+
if (keep_unreachable)
add_objects_in_unpacked_packs(&revs);
if (unpack_unreachable)
loosen_unused_packed_objects(&revs);
+
+ sha1_array_clear(&recent_objects);
}
static int option_parse_index_version(const struct option *opt,
@@ -2567,9 +2614,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
int use_internal_rev_list = 0;
int thin = 0;
int all_progress_implied = 0;
- const char *rp_av[6];
- int rp_ac = 0;
+ struct argv_array rp = ARGV_ARRAY_INIT;
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
+ int rev_list_index = 0;
struct option pack_objects_options[] = {
OPT_SET_INT('q', "quiet", &progress,
N_("do not show progress meter"), 0),
@@ -2616,6 +2663,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
{ OPTION_SET_INT, 0, "reflog", &rev_list_reflog, NULL,
N_("include objects referred by reflog entries"),
PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
+ { OPTION_SET_INT, 0, "indexed-objects", &rev_list_index, NULL,
+ N_("include objects referred to by the index"),
+ PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
OPT_BOOL(0, "stdout", &pack_to_stdout,
N_("output pack to stdout")),
OPT_BOOL(0, "include-tag", &include_tag,
@@ -2658,24 +2708,28 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (pack_to_stdout != !base_name || argc)
usage_with_options(pack_usage, pack_objects_options);
- rp_av[rp_ac++] = "pack-objects";
+ argv_array_push(&rp, "pack-objects");
if (thin) {
use_internal_rev_list = 1;
- rp_av[rp_ac++] = "--objects-edge";
+ argv_array_push(&rp, "--objects-edge");
} else
- rp_av[rp_ac++] = "--objects";
+ argv_array_push(&rp, "--objects");
if (rev_list_all) {
use_internal_rev_list = 1;
- rp_av[rp_ac++] = "--all";
+ argv_array_push(&rp, "--all");
}
if (rev_list_reflog) {
use_internal_rev_list = 1;
- rp_av[rp_ac++] = "--reflog";
+ argv_array_push(&rp, "--reflog");
+ }
+ if (rev_list_index) {
+ use_internal_rev_list = 1;
+ argv_array_push(&rp, "--indexed-objects");
}
if (rev_list_unpacked) {
use_internal_rev_list = 1;
- rp_av[rp_ac++] = "--unpacked";
+ argv_array_push(&rp, "--unpacked");
}
if (!reuse_object)
@@ -2706,6 +2760,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (keep_unreachable && unpack_unreachable)
die("--keep-unreachable and --unpack-unreachable are incompatible.");
+ if (!rev_list_all || !rev_list_reflog || !rev_list_index)
+ unpack_unreachable_expiration = 0;
if (!use_internal_rev_list || !pack_to_stdout || is_repository_shallow())
use_bitmap_index = 0;
@@ -2723,8 +2779,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (!use_internal_rev_list)
read_object_list_from_stdin();
else {
- rp_av[rp_ac] = NULL;
- get_object_list(rp_ac, rp_av);
+ get_object_list(rp.argc, rp.argv);
+ argv_array_clear(&rp);
}
cleanup_preferred_base();
if (include_tag && nr_result)
diff --git a/builtin/prune-packed.c b/builtin/prune-packed.c
index d430731d7..f24a2c2bd 100644
--- a/builtin/prune-packed.c
+++ b/builtin/prune-packed.c
@@ -10,65 +10,42 @@ static const char * const prune_packed_usage[] = {
static struct progress *progress;
-static void prune_dir(int i, DIR *dir, struct strbuf *pathname, int opts)
+static int prune_subdir(int nr, const char *path, void *data)
{
- struct dirent *de;
- char hex[40];
- int top_len = pathname->len;
+ int *opts = data;
+ display_progress(progress, nr + 1);
+ if (!(*opts & PRUNE_PACKED_DRY_RUN))
+ rmdir(path);
+ return 0;
+}
+
+static int prune_object(const unsigned char *sha1, const char *path,
+ void *data)
+{
+ int *opts = data;
- sprintf(hex, "%02x", i);
- while ((de = readdir(dir)) != NULL) {
- unsigned char sha1[20];
- if (strlen(de->d_name) != 38)
- continue;
- memcpy(hex + 2, de->d_name, 38);
- if (get_sha1_hex(hex, sha1))
- continue;
- if (!has_sha1_pack(sha1))
- continue;
+ if (!has_sha1_pack(sha1))
+ return 0;
- strbuf_add(pathname, de->d_name, 38);
- if (opts & PRUNE_PACKED_DRY_RUN)
- printf("rm -f %s\n", pathname->buf);
- else
- unlink_or_warn(pathname->buf);
- display_progress(progress, i + 1);
- strbuf_setlen(pathname, top_len);
- }
+ if (*opts & PRUNE_PACKED_DRY_RUN)
+ printf("rm -f %s\n", path);
+ else
+ unlink_or_warn(path);
+ return 0;
}
void prune_packed_objects(int opts)
{
- int i;
- const char *dir = get_object_directory();
- struct strbuf pathname = STRBUF_INIT;
- int top_len;
-
- strbuf_addstr(&pathname, dir);
if (opts & PRUNE_PACKED_VERBOSE)
progress = start_progress_delay(_("Removing duplicate objects"),
256, 95, 2);
- if (pathname.len && pathname.buf[pathname.len - 1] != '/')
- strbuf_addch(&pathname, '/');
-
- top_len = pathname.len;
- for (i = 0; i < 256; i++) {
- DIR *d;
+ for_each_loose_file_in_objdir(get_object_directory(),
+ prune_object, NULL, prune_subdir, &opts);
- display_progress(progress, i + 1);
- strbuf_setlen(&pathname, top_len);
- strbuf_addf(&pathname, "%02x/", i);
- d = opendir(pathname.buf);
- if (!d)
- continue;
- prune_dir(i, d, &pathname, opts);
- closedir(d);
- strbuf_setlen(&pathname, top_len + 2);
- rmdir(pathname.buf);
- }
+ /* Ensure we show 100% before finishing progress */
+ display_progress(progress, 256);
stop_progress(&progress);
- strbuf_release(&pathname);
}
int cmd_prune_packed(int argc, const char **argv, const char *prefix)
diff --git a/builtin/prune.c b/builtin/prune.c
index 144a3bdb3..04d3b12ae 100644
--- a/builtin/prune.c
+++ b/builtin/prune.c
@@ -31,11 +31,23 @@ static int prune_tmp_file(const char *fullpath)
return 0;
}
-static int prune_object(const char *fullpath, const unsigned char *sha1)
+static int prune_object(const unsigned char *sha1, const char *fullpath,
+ void *data)
{
struct stat st;
- if (lstat(fullpath, &st))
- return error("Could not stat '%s'", fullpath);
+
+ /*
+ * Do we know about this object?
+ * It must have been reachable
+ */
+ if (lookup_object(sha1))
+ return 0;
+
+ if (lstat(fullpath, &st)) {
+ /* report errors, but do not stop pruning */
+ error("Could not stat '%s'", fullpath);
+ return 0;
+ }
if (st.st_mtime > expire)
return 0;
if (show_only || verbose) {
@@ -48,68 +60,20 @@ static int prune_object(const char *fullpath, const unsigned char *sha1)
return 0;
}
-static int prune_dir(int i, struct strbuf *path)
+static int prune_cruft(const char *basename, const char *path, void *data)
{
- size_t baselen = path->len;
- DIR *dir = opendir(path->buf);
- struct dirent *de;
-
- if (!dir)
- return 0;
-
- while ((de = readdir(dir)) != NULL) {
- char name[100];
- unsigned char sha1[20];
-
- if (is_dot_or_dotdot(de->d_name))
- continue;
- if (strlen(de->d_name) == 38) {
- sprintf(name, "%02x", i);
- memcpy(name+2, de->d_name, 39);
- if (get_sha1_hex(name, sha1) < 0)
- break;
-
- /*
- * Do we know about this object?
- * It must have been reachable
- */
- if (lookup_object(sha1))
- continue;
-
- strbuf_addf(path, "/%s", de->d_name);
- prune_object(path->buf, sha1);
- strbuf_setlen(path, baselen);
- continue;
- }
- if (starts_with(de->d_name, "tmp_obj_")) {
- strbuf_addf(path, "/%s", de->d_name);
- prune_tmp_file(path->buf);
- strbuf_setlen(path, baselen);
- continue;
- }
- fprintf(stderr, "bad sha1 file: %s/%s\n", path->buf, de->d_name);
- }
- closedir(dir);
- if (!show_only)
- rmdir(path->buf);
+ if (starts_with(basename, "tmp_obj_"))
+ prune_tmp_file(path);
+ else
+ fprintf(stderr, "bad sha1 file: %s\n", path);
return 0;
}
-static void prune_object_dir(const char *path)
+static int prune_subdir(int nr, const char *path, void *data)
{
- struct strbuf buf = STRBUF_INIT;
- size_t baselen;
- int i;
-
- strbuf_addstr(&buf, path);
- strbuf_addch(&buf, '/');
- baselen = buf.len;
-
- for (i = 0; i < 256; i++) {
- strbuf_addf(&buf, "%02x", i);
- prune_dir(i, &buf);
- strbuf_setlen(&buf, baselen);
- }
+ if (!show_only)
+ rmdir(path);
+ return 0;
}
/*
@@ -171,9 +135,10 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
if (show_progress)
progress = start_progress_delay(_("Checking connectivity"), 0, 0, 2);
- mark_reachable_objects(&revs, 1, progress);
+ mark_reachable_objects(&revs, 1, expire, progress);
stop_progress(&progress);
- prune_object_dir(get_object_directory());
+ for_each_loose_file_in_objdir(get_object_directory(), prune_object,
+ prune_cruft, prune_subdir, NULL);
prune_packed_objects(show_only ? PRUNE_PACKED_DRY_RUN : 0);
remove_temporary_files(get_object_directory());
diff --git a/builtin/reflog.c b/builtin/reflog.c
index b6388f75b..2d85d260c 100644
--- a/builtin/reflog.c
+++ b/builtin/reflog.c
@@ -649,7 +649,7 @@ static int cmd_reflog_expire(int argc, const char **argv, const char *prefix)
init_revisions(&cb.revs, prefix);
if (cb.verbose)
printf("Marking reachable objects...");
- mark_reachable_objects(&cb.revs, 0, NULL);
+ mark_reachable_objects(&cb.revs, 0, 0, NULL);
if (cb.verbose)
putchar('\n');
}
diff --git a/builtin/repack.c b/builtin/repack.c
index 2aae05d36..28456206c 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -209,6 +209,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
argv_array_push(&cmd_args, "--non-empty");
argv_array_push(&cmd_args, "--all");
argv_array_push(&cmd_args, "--reflog");
+ argv_array_push(&cmd_args, "--indexed-objects");
if (window)
argv_array_pushf(&cmd_args, "--window=%s", window);
if (window_memory)