From 4e8da1958111796d55ad63b229ebd3ae6c54bf87 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 15 May 2006 11:40:05 -0400 Subject: simple euristic for further free packing improvements Given that the early eviction of objects with maximum delta depth may exhibit bad packing on its own, why not considering a bias against deep base objects in try_delta() to mitigate that bad behavior. This patch adjust the MAX_size allowed for a delta based on the depth of the base object as well as enabling the early eviction of max depth objects from the object window. When used separately, those two things produce slightly better and much worse results respectively. But their combined effect is a surprising significant packing improvement. With this really simple patch the GIT repo gets nearly 15% smaller, and the Linux kernel repo about 5% smaller, with no significantly measurable CPU usage difference. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- pack-objects.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'pack-objects.c') diff --git a/pack-objects.c b/pack-objects.c index 5466b1516..526c090c6 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -1039,8 +1039,8 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, /* Now some size filtering euristics. */ size = trg_entry->size; - max_size = size / 2 - 20; - if (trg_entry->delta) + max_size = (size/2 - 20) / (src_entry->depth + 1); + if (trg_entry->delta && trg_entry->delta_size <= max_size) max_size = trg_entry->delta_size-1; src_size = src_entry->size; sizediff = src_size < size ? size - src_size : 0; @@ -1129,15 +1129,12 @@ static void find_deltas(struct object_entry **list, int window, int depth) if (try_delta(n, m, m->index, depth) < 0) break; } -#if 0 /* if we made n a delta, and if n is already at max * depth, leaving it in the window is pointless. we * should evict it first. - * ... in theory only; somehow this makes things worse. */ if (entry->delta && depth <= entry->depth) continue; -#endif idx++; if (idx >= window) idx = 0; -- cgit v1.2.1 From ff45715ce50b80ab16ee0d0dc7fff0c47a51959a Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 15 May 2006 13:47:16 -0400 Subject: pack-object: slightly more efficient Avoid creating a delta index for objects with maximum depth since they are not going to be used as delta base anyway. This also reduce peak memory usage slightly as the current object's delta index is not useful until the next object in the loop is considered for deltification. This saves a bit more than 1% on CPU usage. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- pack-objects.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'pack-objects.c') diff --git a/pack-objects.c b/pack-objects.c index 526c090c6..b430b02cf 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -1105,17 +1105,14 @@ static void find_deltas(struct object_entry **list, int window, int depth) if (entry->size < 50) continue; - if (n->index) - free_delta_index(n->index); + free_delta_index(n->index); + n->index = NULL; free(n->data); n->entry = entry; n->data = read_sha1_file(entry->sha1, type, &size); if (size != entry->size) die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); - n->index = create_delta_index(n->data, size); - if (!n->index) - die("out of memory"); j = window; while (--j > 0) { @@ -1135,6 +1132,11 @@ static void find_deltas(struct object_entry **list, int window, int depth) */ if (entry->delta && depth <= entry->depth) continue; + + n->index = create_delta_index(n->data, size); + if (!n->index) + die("out of memory"); + idx++; if (idx >= window) idx = 0; @@ -1144,8 +1146,7 @@ static void find_deltas(struct object_entry **list, int window, int depth) fputc('\n', stderr); for (i = 0; i < window; ++i) { - if (array[i].index) - free_delta_index(array[i].index); + free_delta_index(array[i].index); free(array[i].data); } free(array); -- cgit v1.2.1 From c3b06a69ffc41b3ac3600628593dd0fdd3988607 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 16 May 2006 16:29:14 -0400 Subject: improve depth heuristic for maximum delta size This provides a linear decrement on the penalty related to delta depth instead of being an 1/x function. With this another 5% reduction is observed on packs for both the GIT repo and the Linux kernel repo, as well as fixing a pack size regression in another sample repo I have. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- pack-objects.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'pack-objects.c') diff --git a/pack-objects.c b/pack-objects.c index b430b02cf..33751797f 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -1037,9 +1037,12 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, if (src_entry->depth >= max_depth) return 0; - /* Now some size filtering euristics. */ + /* Now some size filtering heuristics. */ size = trg_entry->size; - max_size = (size/2 - 20) / (src_entry->depth + 1); + max_size = size/2 - 20; + max_size = max_size * (max_depth - src_entry->depth) / max_depth; + if (max_size == 0) + return 0; if (trg_entry->delta && trg_entry->delta_size <= max_size) max_size = trg_entry->delta_size-1; src_size = src_entry->size; -- cgit v1.2.1