From 2834bc27c16e4ae103768dd6dda530f0e46116af Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 24 Oct 2013 14:01:06 -0400 Subject: pack-objects: refactor the packing list The hash table that stores the packing list for a given `pack-objects` run was tightly coupled to the pack-objects code. In this commit, we refactor the hash table and the underlying storage array into a `packing_data` struct. The functionality for accessing and adding entries to the packing list is hence accessible from other parts of Git besides the `pack-objects` builtin. This refactoring is a requirement for further patches in this series that will require accessing the commit packing list from outside of `pack-objects`. The hash table implementation has been minimally altered: we now use table sizes which are always a power of two, to ensure a uniform index distribution in the array. Signed-off-by: Vicent Marti Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-objects.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 pack-objects.c (limited to 'pack-objects.c') diff --git a/pack-objects.c b/pack-objects.c new file mode 100644 index 000000000..d01d851ce --- /dev/null +++ b/pack-objects.c @@ -0,0 +1,111 @@ +#include "cache.h" +#include "object.h" +#include "pack.h" +#include "pack-objects.h" + +static uint32_t locate_object_entry_hash(struct packing_data *pdata, + const unsigned char *sha1, + int *found) +{ + uint32_t i, hash, mask = (pdata->index_size - 1); + + memcpy(&hash, sha1, sizeof(uint32_t)); + i = hash & mask; + + while (pdata->index[i] > 0) { + uint32_t pos = pdata->index[i] - 1; + + if (!hashcmp(sha1, pdata->objects[pos].idx.sha1)) { + *found = 1; + return i; + } + + i = (i + 1) & mask; + } + + *found = 0; + return i; +} + +static inline uint32_t closest_pow2(uint32_t v) +{ + v = v - 1; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return v + 1; +} + +static void rehash_objects(struct packing_data *pdata) +{ + uint32_t i; + struct object_entry *entry; + + pdata->index_size = closest_pow2(pdata->nr_objects * 3); + if (pdata->index_size < 1024) + pdata->index_size = 1024; + + pdata->index = xrealloc(pdata->index, sizeof(uint32_t) * pdata->index_size); + memset(pdata->index, 0, sizeof(int) * pdata->index_size); + + entry = pdata->objects; + + for (i = 0; i < pdata->nr_objects; i++) { + int found; + uint32_t ix = locate_object_entry_hash(pdata, entry->idx.sha1, &found); + + if (found) + die("BUG: Duplicate object in hash"); + + pdata->index[ix] = i + 1; + entry++; + } +} + +struct object_entry *packlist_find(struct packing_data *pdata, + const unsigned char *sha1, + uint32_t *index_pos) +{ + uint32_t i; + int found; + + if (!pdata->index_size) + return NULL; + + i = locate_object_entry_hash(pdata, sha1, &found); + + if (index_pos) + *index_pos = i; + + if (!found) + return NULL; + + return &pdata->objects[pdata->index[i] - 1]; +} + +struct object_entry *packlist_alloc(struct packing_data *pdata, + const unsigned char *sha1, + uint32_t index_pos) +{ + struct object_entry *new_entry; + + if (pdata->nr_objects >= pdata->nr_alloc) { + pdata->nr_alloc = (pdata->nr_alloc + 1024) * 3 / 2; + pdata->objects = xrealloc(pdata->objects, + pdata->nr_alloc * sizeof(*new_entry)); + } + + new_entry = pdata->objects + pdata->nr_objects++; + + memset(new_entry, 0, sizeof(*new_entry)); + hashcpy(new_entry->idx.sha1, sha1); + + if (pdata->index_size * 3 <= pdata->nr_objects * 4) + rehash_objects(pdata); + else + pdata->index[index_pos] = pdata->nr_objects; + + return new_entry; +} -- cgit v1.2.1