aboutsummaryrefslogtreecommitdiff
path: root/sha1_name.c
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2017-11-06 14:24:25 +0900
committerJunio C Hamano <gitster@pobox.com>2017-11-06 14:24:25 +0900
commitcb52b49db526488bd39a6fbfb8f9bdfa70d24802 (patch)
tree96f9f867d6b5eb1cc90cba2cf4c7aecfc042f327 /sha1_name.c
parentfb4cd88ad47da361e5b1a93dde71c61b9dfa2cd2 (diff)
parent0e87b85683df145007862d23b5f0773368d66464 (diff)
downloadgit-cb52b49db526488bd39a6fbfb8f9bdfa70d24802.tar.gz
git-cb52b49db526488bd39a6fbfb8f9bdfa70d24802.tar.xz
Merge branch 'ds/find-unique-abbrev-optim'
Optimize the code to find shortest unique prefix of object names. * ds/find-unique-abbrev-optim: sha1_name: minimize OID comparisons during disambiguation sha1_name: parse less while finding common prefix sha1_name: unroll len loop in find_unique_abbrev_r() p4211-line-log.sh: add log --online --raw --parents perf test
Diffstat (limited to 'sha1_name.c')
-rw-r--r--sha1_name.c135
1 files changed, 119 insertions, 16 deletions
diff --git a/sha1_name.c b/sha1_name.c
index c7c5ab376..2d6b48047 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -153,7 +153,9 @@ static void unique_in_pack(struct packed_git *p,
uint32_t num, last, i, first = 0;
const struct object_id *current = NULL;
- open_pack_index(p);
+ if (open_pack_index(p) || !p->num_objects)
+ return;
+
num = p->num_objects;
last = num;
while (first < last) {
@@ -474,10 +476,104 @@ static unsigned msb(unsigned long val)
return r;
}
-int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
+struct min_abbrev_data {
+ unsigned int init_len;
+ unsigned int cur_len;
+ char *hex;
+ const unsigned char *hash;
+};
+
+static inline char get_hex_char_from_oid(const struct object_id *oid,
+ unsigned int pos)
{
- int status, exists;
+ static const char hex[] = "0123456789abcdef";
+ if ((pos & 1) == 0)
+ return hex[oid->hash[pos >> 1] >> 4];
+ else
+ return hex[oid->hash[pos >> 1] & 0xf];
+}
+
+static int extend_abbrev_len(const struct object_id *oid, void *cb_data)
+{
+ struct min_abbrev_data *mad = cb_data;
+
+ unsigned int i = mad->init_len;
+ while (mad->hex[i] && mad->hex[i] == get_hex_char_from_oid(oid, i))
+ i++;
+
+ if (i < GIT_MAX_RAWSZ && i >= mad->cur_len)
+ mad->cur_len = i + 1;
+
+ return 0;
+}
+
+static void find_abbrev_len_for_pack(struct packed_git *p,
+ struct min_abbrev_data *mad)
+{
+ int match = 0;
+ uint32_t num, last, first = 0;
+ struct object_id oid;
+
+ if (open_pack_index(p) || !p->num_objects)
+ return;
+
+ num = p->num_objects;
+ last = num;
+ while (first < last) {
+ uint32_t mid = first + (last - first) / 2;
+ const unsigned char *current;
+ int cmp;
+
+ current = nth_packed_object_sha1(p, mid);
+ cmp = hashcmp(mad->hash, current);
+ if (!cmp) {
+ match = 1;
+ first = mid;
+ break;
+ }
+ if (cmp > 0) {
+ first = mid + 1;
+ continue;
+ }
+ last = mid;
+ }
+
+ /*
+ * first is now the position in the packfile where we would insert
+ * mad->hash if it does not exist (or the position of mad->hash if
+ * it does exist). Hence, we consider a maximum of three objects
+ * nearby for the abbreviation length.
+ */
+ mad->init_len = 0;
+ if (!match) {
+ nth_packed_object_oid(&oid, p, first);
+ extend_abbrev_len(&oid, mad);
+ } else if (first < num - 1) {
+ nth_packed_object_oid(&oid, p, first + 1);
+ extend_abbrev_len(&oid, mad);
+ }
+ if (first > 0) {
+ nth_packed_object_oid(&oid, p, first - 1);
+ extend_abbrev_len(&oid, mad);
+ }
+ mad->init_len = mad->cur_len;
+}
+
+static void find_abbrev_len_packed(struct min_abbrev_data *mad)
+{
+ struct packed_git *p;
+
+ prepare_packed_git();
+ for (p = packed_git; p; p = p->next)
+ find_abbrev_len_for_pack(p, mad);
+}
+
+int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
+{
+ struct disambiguate_state ds;
+ struct min_abbrev_data mad;
+ struct object_id oid_ret;
if (len < 0) {
unsigned long count = approximate_object_count();
/*
@@ -503,19 +599,26 @@ int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
sha1_to_hex_r(hex, sha1);
if (len == GIT_SHA1_HEXSZ || !len)
return GIT_SHA1_HEXSZ;
- exists = has_sha1_file(sha1);
- while (len < GIT_SHA1_HEXSZ) {
- struct object_id oid_ret;
- status = get_short_oid(hex, len, &oid_ret, GET_OID_QUIETLY);
- if (exists
- ? !status
- : status == SHORT_NAME_NOT_FOUND) {
- hex[len] = 0;
- return len;
- }
- len++;
- }
- return len;
+
+ mad.init_len = len;
+ mad.cur_len = len;
+ mad.hex = hex;
+ mad.hash = sha1;
+
+ find_abbrev_len_packed(&mad);
+
+ if (init_object_disambiguation(hex, mad.cur_len, &ds) < 0)
+ return -1;
+
+ ds.fn = extend_abbrev_len;
+ ds.always_call_fn = 1;
+ ds.cb_data = (void *)&mad;
+
+ find_short_object_filename(&ds);
+ (void)finish_object_disambiguation(&ds, &oid_ret);
+
+ hex[mad.cur_len] = 0;
+ return mad.cur_len;
}
const char *find_unique_abbrev(const unsigned char *sha1, int len)