aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile1
-rw-r--r--builtin/pack-objects.c40
-rw-r--r--cache.h8
-rw-r--r--mru.c50
-rw-r--r--mru.h45
-rw-r--r--pack-write.c1
-rw-r--r--sha1_file.c66
-rwxr-xr-xt/perf/p5303-many-packs.sh87
8 files changed, 248 insertions, 50 deletions
diff --git a/Makefile b/Makefile
index 6a13386c2..ad3624d95 100644
--- a/Makefile
+++ b/Makefile
@@ -755,6 +755,7 @@ LIB_OBJS += merge.o
LIB_OBJS += merge-blobs.o
LIB_OBJS += merge-recursive.o
LIB_OBJS += mergesort.o
+LIB_OBJS += mru.o
LIB_OBJS += name-hash.o
LIB_OBJS += notes.o
LIB_OBJS += notes-cache.o
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 92e2e5f7a..4a6339896 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -46,6 +46,7 @@ static int keep_unreachable, unpack_unreachable, include_tag;
static unsigned long unpack_unreachable_expiration;
static int pack_loose_unreachable;
static int local;
+static int have_non_local_packs;
static int incremental;
static int ignore_packed_keep;
static int allow_ofs_delta;
@@ -978,6 +979,23 @@ static int want_object_in_pack(const unsigned char *sha1,
return 1;
if (incremental)
return 0;
+
+ /*
+ * When asked to do --local (do not include an
+ * object that appears in a pack we borrow
+ * from elsewhere) or --honor-pack-keep (do not
+ * include an object that appears in a pack marked
+ * with .keep), we need to make sure no copy of this
+ * object come from in _any_ pack that causes us to
+ * omit it, and need to complete this loop. When
+ * neither option is in effect, we know the object
+ * we just found is going to be packed, so break
+ * out of the loop to return 1 now.
+ */
+ if (!ignore_packed_keep &&
+ (!local || !have_non_local_packs))
+ break;
+
if (local && !p->pack_local)
return 0;
if (ignore_packed_keep && p->pack_local && p->pack_keep)
@@ -2784,6 +2802,28 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
progress = 2;
prepare_packed_git();
+ if (ignore_packed_keep) {
+ struct packed_git *p;
+ for (p = packed_git; p; p = p->next)
+ if (p->pack_local && p->pack_keep)
+ break;
+ if (!p) /* no keep-able packs found */
+ ignore_packed_keep = 0;
+ }
+ if (local) {
+ /*
+ * unlike ignore_packed_keep above, we do not want to
+ * unset "local" based on looking at packs, as it
+ * also covers non-local objects
+ */
+ struct packed_git *p;
+ for (p = packed_git; p; p = p->next) {
+ if (!p->pack_local) {
+ have_non_local_packs = 1;
+ break;
+ }
+ }
+ }
if (progress)
progress_state = start_progress(_("Counting objects"), 0);
diff --git a/cache.h b/cache.h
index e8128fc5d..7c8051be0 100644
--- a/cache.h
+++ b/cache.h
@@ -1378,6 +1378,13 @@ extern struct packed_git {
char pack_name[FLEX_ARRAY]; /* more */
} *packed_git;
+/*
+ * A most-recently-used ordered version of the packed_git list, which can
+ * be iterated instead of packed_git (and marked via mru_mark).
+ */
+struct mru;
+extern struct mru *packed_git_mru;
+
struct pack_entry {
off_t offset;
unsigned char sha1[20];
@@ -1417,7 +1424,6 @@ extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t
extern void close_pack_windows(struct packed_git *);
extern void close_all_packs(void);
extern void unuse_pack(struct pack_window **);
-extern void free_pack_by_name(const char *);
extern void clear_delta_base_cache(void);
extern struct packed_git *add_packed_git(const char *path, size_t path_len, int local);
diff --git a/mru.c b/mru.c
new file mode 100644
index 000000000..9dedae028
--- /dev/null
+++ b/mru.c
@@ -0,0 +1,50 @@
+#include "cache.h"
+#include "mru.h"
+
+void mru_append(struct mru *mru, void *item)
+{
+ struct mru_entry *cur = xmalloc(sizeof(*cur));
+ cur->item = item;
+ cur->prev = mru->tail;
+ cur->next = NULL;
+
+ if (mru->tail)
+ mru->tail->next = cur;
+ else
+ mru->head = cur;
+ mru->tail = cur;
+}
+
+void mru_mark(struct mru *mru, struct mru_entry *entry)
+{
+ /* If we're already at the front of the list, nothing to do */
+ if (mru->head == entry)
+ return;
+
+ /* Otherwise, remove us from our current slot... */
+ if (entry->prev)
+ entry->prev->next = entry->next;
+ if (entry->next)
+ entry->next->prev = entry->prev;
+ else
+ mru->tail = entry->prev;
+
+ /* And insert us at the beginning. */
+ entry->prev = NULL;
+ entry->next = mru->head;
+ if (mru->head)
+ mru->head->prev = entry;
+ mru->head = entry;
+}
+
+void mru_clear(struct mru *mru)
+{
+ struct mru_entry *p = mru->head;
+
+ while (p) {
+ struct mru_entry *to_free = p;
+ p = p->next;
+ free(to_free);
+ }
+ mru->head = mru->tail = NULL;
+}
diff --git a/mru.h b/mru.h
new file mode 100644
index 000000000..42e4aeaa1
--- /dev/null
+++ b/mru.h
@@ -0,0 +1,45 @@
+#ifndef MRU_H
+#define MRU_H
+
+/**
+ * A simple most-recently-used cache, backed by a doubly-linked list.
+ *
+ * Usage is roughly:
+ *
+ * // Create a list. Zero-initialization is required.
+ * static struct mru cache;
+ * mru_append(&cache, item);
+ * ...
+ *
+ * // Iterate in MRU order.
+ * struct mru_entry *p;
+ * for (p = cache.head; p; p = p->next) {
+ * if (matches(p->item))
+ * break;
+ * }
+ *
+ * // Mark an item as used, moving it to the front of the list.
+ * mru_mark(&cache, p);
+ *
+ * // Reset the list to empty, cleaning up all resources.
+ * mru_clear(&cache);
+ *
+ * Note that you SHOULD NOT call mru_mark() and then continue traversing the
+ * list; it reorders the marked item to the front of the list, and therefore
+ * you will begin traversing the whole list again.
+ */
+
+struct mru_entry {
+ void *item;
+ struct mru_entry *prev, *next;
+};
+
+struct mru {
+ struct mru_entry *head, *tail;
+};
+
+void mru_append(struct mru *mru, void *item);
+void mru_mark(struct mru *mru, struct mru_entry *entry);
+void mru_clear(struct mru *mru);
+
+#endif /* MRU_H */
diff --git a/pack-write.c b/pack-write.c
index 33293ce2a..ea0b78813 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -354,7 +354,6 @@ void finish_tmp_packfile(struct strbuf *name_buffer,
die_errno("unable to make temporary index file readable");
strbuf_addf(name_buffer, "%s.pack", sha1_to_hex(sha1));
- free_pack_by_name(name_buffer->buf);
if (rename(pack_tmp_name, name_buffer->buf))
die_errno("unable to rename temporary pack file");
diff --git a/sha1_file.c b/sha1_file.c
index cb571ac6e..3066b5f71 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -23,6 +23,7 @@
#include "bulk-checkin.h"
#include "streaming.h"
#include "dir.h"
+#include "mru.h"
#ifndef O_NOATIME
#if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -59,14 +60,6 @@ static struct cached_object empty_tree = {
0
};
-/*
- * A pointer to the last packed_git in which an object was found.
- * When an object is sought, we look in this packfile first, because
- * objects that are looked up at similar times are often in the same
- * packfile as one another.
- */
-static struct packed_git *last_found_pack;
-
static struct cached_object *find_cached_object(const unsigned char *sha1)
{
int i;
@@ -522,6 +515,9 @@ static size_t peak_pack_mapped;
static size_t pack_mapped;
struct packed_git *packed_git;
+static struct mru packed_git_mru_storage;
+struct mru *packed_git_mru = &packed_git_mru_storage;
+
void pack_report(void)
{
fprintf(stderr,
@@ -891,36 +887,6 @@ void close_pack_index(struct packed_git *p)
}
}
-/*
- * This is used by git-repack in case a newly created pack happens to
- * contain the same set of objects as an existing one. In that case
- * the resulting file might be different even if its name would be the
- * same. It is best to close any reference to the old pack before it is
- * replaced on disk. Of course no index pointers or windows for given pack
- * must subsist at this point. If ever objects from this pack are requested
- * again, the new version of the pack will be reinitialized through
- * reprepare_packed_git().
- */
-void free_pack_by_name(const char *pack_name)
-{
- struct packed_git *p, **pp = &packed_git;
-
- while (*pp) {
- p = *pp;
- if (strcmp(pack_name, p->pack_name) == 0) {
- clear_delta_base_cache();
- close_pack(p);
- free(p->bad_object_sha1);
- *pp = p->next;
- if (last_found_pack == p)
- last_found_pack = NULL;
- free(p);
- return;
- }
- pp = &p->next;
- }
-}
-
static unsigned int get_max_fd_limit(void)
{
#ifdef RLIMIT_NOFILE
@@ -1385,6 +1351,15 @@ static void rearrange_packed_git(void)
free(ary);
}
+static void prepare_packed_git_mru(void)
+{
+ struct packed_git *p;
+
+ mru_clear(packed_git_mru);
+ for (p = packed_git; p; p = p->next)
+ mru_append(packed_git_mru, p);
+}
+
static int prepare_packed_git_run_once = 0;
void prepare_packed_git(void)
{
@@ -1400,6 +1375,7 @@ void prepare_packed_git(void)
alt->name[-1] = '/';
}
rearrange_packed_git();
+ prepare_packed_git_mru();
prepare_packed_git_run_once = 1;
}
@@ -2604,21 +2580,15 @@ static int fill_pack_entry(const unsigned char *sha1,
*/
static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
{
- struct packed_git *p;
+ struct mru_entry *p;
prepare_packed_git();
if (!packed_git)
return 0;
- if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack))
- return 1;
-
- for (p = packed_git; p; p = p->next) {
- if (p == last_found_pack)
- continue; /* we already checked this one */
-
- if (fill_pack_entry(sha1, e, p)) {
- last_found_pack = p;
+ for (p = packed_git_mru->head; p; p = p->next) {
+ if (fill_pack_entry(sha1, e, p->item)) {
+ mru_mark(packed_git_mru, p);
return 1;
}
}
diff --git a/t/perf/p5303-many-packs.sh b/t/perf/p5303-many-packs.sh
new file mode 100755
index 000000000..377985194
--- /dev/null
+++ b/t/perf/p5303-many-packs.sh
@@ -0,0 +1,87 @@
+#!/bin/sh
+
+test_description='performance with large numbers of packs'
+. ./perf-lib.sh
+
+test_perf_large_repo
+
+# A real many-pack situation would probably come from having a lot of pushes
+# over time. We don't know how big each push would be, but we can fake it by
+# just walking the first-parent chain and having every 5 commits be their own
+# "push". This isn't _entirely_ accurate, as real pushes would have some
+# duplicate objects due to thin-pack fixing, but it's a reasonable
+# approximation.
+#
+# And then all of the rest of the objects can go in a single packfile that
+# represents the state before any of those pushes (actually, we'll generate
+# that first because in such a setup it would be the oldest pack, and we sort
+# the packs by reverse mtime inside git).
+repack_into_n () {
+ rm -rf staging &&
+ mkdir staging &&
+
+ git rev-list --first-parent HEAD |
+ sed -n '1~5p' |
+ head -n "$1" |
+ perl -e 'print reverse <>' \
+ >pushes
+
+ # create base packfile
+ head -n 1 pushes |
+ git pack-objects --delta-base-offset --revs staging/pack
+
+ # and then incrementals between each pair of commits
+ last= &&
+ while read rev
+ do
+ if test -n "$last"; then
+ {
+ echo "$rev" &&
+ echo "^$last"
+ } |
+ git pack-objects --delta-base-offset --revs \
+ staging/pack || return 1
+ fi
+ last=$rev
+ done <pushes &&
+
+ # and install the whole thing
+ rm -f .git/objects/pack/* &&
+ mv staging/* .git/objects/pack/
+}
+
+# Pretend we just have a single branch and no reflogs, and that everything is
+# in objects/pack; that makes our fake pack-building via repack_into_n()
+# much simpler.
+test_expect_success 'simplify reachability' '
+ tip=$(git rev-parse --verify HEAD) &&
+ git for-each-ref --format="option no-deref%0adelete %(refname)" |
+ git update-ref --stdin &&
+ rm -rf .git/logs &&
+ git update-ref refs/heads/master $tip &&
+ git symbolic-ref HEAD refs/heads/master &&
+ git repack -ad
+'
+
+for nr_packs in 1 50 1000
+do
+ test_expect_success "create $nr_packs-pack scenario" '
+ repack_into_n $nr_packs
+ '
+
+ test_perf "rev-list ($nr_packs)" '
+ git rev-list --objects --all >/dev/null
+ '
+
+ # This simulates the interesting part of the repack, which is the
+ # actual pack generation, without smudging the on-disk setup
+ # between trials.
+ test_perf "repack ($nr_packs)" '
+ git pack-objects --keep-true-parents \
+ --honor-pack-keep --non-empty --all \
+ --reflog --indexed-objects --delta-base-offset \
+ --stdout </dev/null >/dev/null
+ '
+done
+
+test_done