aboutsummaryrefslogtreecommitdiff
path: root/builtin
diff options
context:
space:
mode:
authorJeff King <peff@peff.net>2016-08-11 05:26:36 -0400
committerJunio C Hamano <gitster@pobox.com>2016-08-11 10:44:13 -0700
commit4cf2143e02993440d6661ddccbd896bee7756c02 (patch)
tree3320029fcb1d4d211a6dc883e59eafda221d7453 /builtin
parentca79c985727653656443da8d92c9f5c2b4da93ed (diff)
downloadgit-4cf2143e02993440d6661ddccbd896bee7756c02.tar.gz
git-4cf2143e02993440d6661ddccbd896bee7756c02.tar.xz
pack-objects: break delta cycles before delta-search phase
We do not allow cycles in the delta graph of a pack (i.e., A is a delta of B which is a delta of A) for the obvious reason that you cannot actually access any of the objects in such a case. There's a last-ditch attempt to notice cycles during the write phase, during which we issue a warning to the user and write one of the objects out in full. However, this is "last-ditch" for two reasons: 1. By this time, it's too late to find another delta for the object, so the resulting pack is larger than it otherwise could be. 2. The warning is there because this is something that _shouldn't_ ever happen. If it does, then either: a. a pack we are reusing deltas from had its own cycle b. we are reusing deltas from multiple packs, and we found a cycle among them (i.e., A is a delta of B in one pack, but B is a delta of A in another, and we choose to use both deltas). c. there is a bug in the delta-search code So this code serves as a final check that none of these things has happened, warns the user, and prevents us from writing a bogus pack. Right now, (2b) should never happen because of the static ordering of packs in want_object_in_pack(). If two objects have a delta relationship, then they must be in the same pack, and therefore we will find them from that same pack. However, a future patch would like to change that static ordering, which will make (2b) a common occurrence. In preparation, we should be able to handle those kinds of cycles better. This patch does by introducing a cycle-breaking step during the get_object_details() phase, when we are deciding which deltas can be reused. That gives us the chance to feed the objects into the delta search as if the cycle did not exist. We'll leave the detection and warning in the write_object() phase in place, as it still serves as a check for case (2c). This does mean we will stop warning for (2a). That case is caused by bogus input packs, and we ideally would warn the user about it. However, since those cycles show up after picking reusable deltas, they look the same as (2b) to us; our new code will break the cycles early and the last-ditch check will never see them. We could do analysis on any cycles that we find to distinguish the two cases (i.e., it is a bogus pack if and only if every delta in the cycle is in the same pack), but we don't need to. If there is a cycle inside a pack, we'll run into problems not only reusing the delta, but accessing the object data at all. So when we try to dig up the actual size of the object, we'll hit that same cycle and kick in our usual complain-and-try-another-source code. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'builtin')
-rw-r--r--builtin/pack-objects.c84
1 files changed, 84 insertions, 0 deletions
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index c4c2a3c79..ed77355c6 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1494,6 +1494,83 @@ static int pack_offset_sort(const void *_a, const void *_b)
(a->in_pack_offset > b->in_pack_offset);
}
+/*
+ * Drop an on-disk delta we were planning to reuse. Naively, this would
+ * just involve blanking out the "delta" field, but we have to deal
+ * with some extra book-keeping:
+ *
+ * 1. Removing ourselves from the delta_sibling linked list.
+ *
+ * 2. Updating our size/type to the non-delta representation. These were
+ * either not recorded initially (size) or overwritten with the delta type
+ * (type) when check_object() decided to reuse the delta.
+ */
+static void drop_reused_delta(struct object_entry *entry)
+{
+ struct object_entry **p = &entry->delta->delta_child;
+ struct object_info oi = OBJECT_INFO_INIT;
+
+ while (*p) {
+ if (*p == entry)
+ *p = (*p)->delta_sibling;
+ else
+ p = &(*p)->delta_sibling;
+ }
+ entry->delta = NULL;
+
+ oi.sizep = &entry->size;
+ oi.typep = &entry->type;
+ if (packed_object_info(entry->in_pack, entry->in_pack_offset, &oi) < 0) {
+ /*
+ * We failed to get the info from this pack for some reason;
+ * fall back to sha1_object_info, which may find another copy.
+ * And if that fails, the error will be recorded in entry->type
+ * and dealt with in prepare_pack().
+ */
+ entry->type = sha1_object_info(entry->idx.sha1, &entry->size);
+ }
+}
+
+/*
+ * Follow the chain of deltas from this entry onward, throwing away any links
+ * that cause us to hit a cycle (as determined by the DFS state flags in
+ * the entries).
+ */
+static void break_delta_chains(struct object_entry *entry)
+{
+ /* If it's not a delta, it can't be part of a cycle. */
+ if (!entry->delta) {
+ entry->dfs_state = DFS_DONE;
+ return;
+ }
+
+ switch (entry->dfs_state) {
+ case DFS_NONE:
+ /*
+ * This is the first time we've seen the object. We mark it as
+ * part of the active potential cycle and recurse.
+ */
+ entry->dfs_state = DFS_ACTIVE;
+ break_delta_chains(entry->delta);
+ entry->dfs_state = DFS_DONE;
+ break;
+
+ case DFS_DONE:
+ /* object already examined, and not part of a cycle */
+ break;
+
+ case DFS_ACTIVE:
+ /*
+ * We found a cycle that needs broken. It would be correct to
+ * break any link in the chain, but it's convenient to
+ * break this one.
+ */
+ drop_reused_delta(entry);
+ entry->dfs_state = DFS_DONE;
+ break;
+ }
+}
+
static void get_object_details(void)
{
uint32_t i;
@@ -1511,6 +1588,13 @@ static void get_object_details(void)
entry->no_try_delta = 1;
}
+ /*
+ * This must happen in a second pass, since we rely on the delta
+ * information for the whole list being completed.
+ */
+ for (i = 0; i < to_pack.nr_objects; i++)
+ break_delta_chains(&to_pack.objects[i]);
+
free(sorted_by_offset);
}