diff options
author | Michael Haggerty <mhagger@alum.mit.edu> | 2017-05-22 16:17:55 +0200 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2017-05-24 21:21:21 +0900 |
commit | f23092f19e73f5d8c3480ef02104af627a90361f (patch) | |
tree | ece864208e7a78aea9707bd928153c1cba23533f | |
parent | cfe004a5a9e5116d003332a64142b0f73f8f9cdf (diff) | |
download | git-f23092f19e73f5d8c3480ef02104af627a90361f.tar.gz git-f23092f19e73f5d8c3480ef02104af627a90361f.tar.xz |
cache_ref_iterator_begin(): avoid priming unneeded directories
When iterating over references, reference priming is used to make sure
that loose references are read into the ref-cache before packed
references, to avoid races. It used to be that the prefix passed to
reference iterators almost always ended in `/`, for example
`refs/heads/`. In that case, the priming code would read all loose
references under `find_containing_dir("refs/heads/")`, which is
"refs/heads/". That's just what we want.
But now that `ref-filter` knows how to pass refname prefixes to
`for_each_fullref_in()`, the prefix might come from user input; for
example,
git for-each-ref refs/heads
Since the argument doesn't include a trailing slash, the reference
iteration code would prime all of the loose references under
`find_containing_dir("refs/heads")`, which is "refs/". Thus we would
unnecessarily read tags, remote-tracking references, etc., when the
user is only interested in branches.
It is a bit awkward to get around this problem. We can't just append a
slash to the argument, because we don't know ab initio whether an
argument like `refs/tags/release` corresponds to a single tag or to a
directory containing tags.
Moreover, until now a `prefix_ref_iterator` was used to make the final
decision about which references fall within the prefix (the
`cache_ref_iterator` only did a rough cut). This is also inefficient,
because the `prefix_ref_iterator` can't know, for example, that while
you are in a subdirectory that is completely within the prefix, you
don't have to do the prefix check.
So:
* Move the responsibility for doing the prefix check directly to
`cache_ref_iterator`. This means that `cache_ref_iterator_begin()`
never has to wrap its return value in a `prefix_ref_iterator`.
* Teach `cache_ref_iterator_begin()` (and `prime_ref_dir()`) to be
stricter about what they iterate over and what directories they
prime.
* Teach `cache_ref_iterator` to keep track of whether the current
`cache_ref_iterator_level` is fully within the prefix. If so, skip
the prefix checks entirely.
The main benefit of these optimizations is for loose references, since
packed references are always read all at once.
Note that after this change, `prefix_ref_iterator` is only ever used
for its trimming feature and not for its "prefix" feature. But I'm not
ripping out the latter yet, because it might be useful for another
patch series that I'm working on.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r-- | refs/ref-cache.c | 95 |
1 files changed, 85 insertions, 10 deletions
diff --git a/refs/ref-cache.c b/refs/ref-cache.c index ec97f3a38..af2fcb2c1 100644 --- a/refs/ref-cache.c +++ b/refs/ref-cache.c @@ -312,11 +312,42 @@ static void sort_ref_dir(struct ref_dir *dir) dir->sorted = dir->nr = i; } +enum prefix_state { + /* All refs within the directory would match prefix: */ + PREFIX_CONTAINS_DIR, + + /* Some, but not all, refs within the directory might match prefix: */ + PREFIX_WITHIN_DIR, + + /* No refs within the directory could possibly match prefix: */ + PREFIX_EXCLUDES_DIR +}; + /* - * Load all of the refs from `dir` (recursively) into our in-memory - * cache. + * Return a `prefix_state` constant describing the relationship + * between the directory with the specified `dirname` and `prefix`. */ -static void prime_ref_dir(struct ref_dir *dir) +static enum prefix_state overlaps_prefix(const char *dirname, + const char *prefix) +{ + while (*prefix && *dirname == *prefix) { + dirname++; + prefix++; + } + if (!*prefix) + return PREFIX_CONTAINS_DIR; + else if (!*dirname) + return PREFIX_WITHIN_DIR; + else + return PREFIX_EXCLUDES_DIR; +} + +/* + * Load all of the refs from `dir` (recursively) that could possibly + * contain references matching `prefix` into our in-memory cache. If + * `prefix` is NULL, prime unconditionally. + */ +static void prime_ref_dir(struct ref_dir *dir, const char *prefix) { /* * The hard work of loading loose refs is done by get_ref_dir(), so we @@ -327,8 +358,29 @@ static void prime_ref_dir(struct ref_dir *dir) int i; for (i = 0; i < dir->nr; i++) { struct ref_entry *entry = dir->entries[i]; - if (entry->flag & REF_DIR) - prime_ref_dir(get_ref_dir(entry)); + if (!(entry->flag & REF_DIR)) { + /* Not a directory; no need to recurse. */ + } else if (!prefix) { + /* Recurse in any case: */ + prime_ref_dir(get_ref_dir(entry), NULL); + } else { + switch (overlaps_prefix(entry->name, prefix)) { + case PREFIX_CONTAINS_DIR: + /* + * Recurse, and from here down we + * don't have to check the prefix + * anymore: + */ + prime_ref_dir(get_ref_dir(entry), NULL); + break; + case PREFIX_WITHIN_DIR: + prime_ref_dir(get_ref_dir(entry), prefix); + break; + case PREFIX_EXCLUDES_DIR: + /* No need to prime this directory. */ + break; + } + } } } @@ -343,6 +395,8 @@ struct cache_ref_iterator_level { */ struct ref_dir *dir; + enum prefix_state prefix_state; + /* * The index of the current entry within dir (which might * itself be a directory). If index == -1, then the iteration @@ -370,6 +424,13 @@ struct cache_ref_iterator { size_t levels_alloc; /* + * Only include references with this prefix in the iteration. + * The prefix is matched textually, without regard for path + * component boundaries. + */ + const char *prefix; + + /* * A stack of levels. levels[0] is the uppermost level that is * being iterated over in this iteration. (This is not * necessary the top level in the references hierarchy. If we @@ -390,6 +451,7 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator) &iter->levels[iter->levels_nr - 1]; struct ref_dir *dir = level->dir; struct ref_entry *entry; + enum prefix_state entry_prefix_state; if (level->index == -1) sort_ref_dir(dir); @@ -404,6 +466,14 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator) entry = dir->entries[level->index]; + if (level->prefix_state == PREFIX_WITHIN_DIR) { + entry_prefix_state = overlaps_prefix(entry->name, iter->prefix); + if (entry_prefix_state == PREFIX_EXCLUDES_DIR) + continue; + } else { + entry_prefix_state = level->prefix_state; + } + if (entry->flag & REF_DIR) { /* push down a level */ ALLOC_GROW(iter->levels, iter->levels_nr + 1, @@ -411,6 +481,7 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator) level = &iter->levels[iter->levels_nr++]; level->dir = get_ref_dir(entry); + level->prefix_state = entry_prefix_state; level->index = -1; } else { iter->base.refname = entry->name; @@ -471,6 +542,7 @@ static int cache_ref_iterator_abort(struct ref_iterator *ref_iterator) struct cache_ref_iterator *iter = (struct cache_ref_iterator *)ref_iterator; + free((char *)iter->prefix); free(iter->levels); base_ref_iterator_free(ref_iterator); return ITER_DONE; @@ -496,10 +568,10 @@ struct ref_iterator *cache_ref_iterator_begin(struct ref_cache *cache, dir = find_containing_dir(dir, prefix, 0); if (!dir) /* There's nothing to iterate over. */ - return empty_ref_iterator_begin(); + return empty_ref_iterator_begin(); if (prime_dir) - prime_ref_dir(dir); + prime_ref_dir(dir, prefix); iter = xcalloc(1, sizeof(*iter)); ref_iterator = &iter->base; @@ -511,9 +583,12 @@ struct ref_iterator *cache_ref_iterator_begin(struct ref_cache *cache, level->index = -1; level->dir = dir; - if (prefix && *prefix) - ref_iterator = prefix_ref_iterator_begin(ref_iterator, - prefix, 0); + if (prefix && *prefix) { + iter->prefix = xstrdup(prefix); + level->prefix_state = PREFIX_WITHIN_DIR; + } else { + level->prefix_state = PREFIX_CONTAINS_DIR; + } return ref_iterator; } |