From f3badaed5106a16499d0fae31a382f9047b272d7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 11 Feb 2016 17:26:18 -0500 Subject: list-objects: convert name_path to a strbuf The "struct name_path" data is examined in only two places: we generate it in process_tree(), and we convert it to a single string in path_name(). Everyone else just passes it through to those functions. We can further note that process_tree() already keeps a single strbuf with the leading tree path, for use with tree_entry_interesting(). Instead of building a separate name_path linked list, let's just use the one we already build in "base". This reduces the amount of code (especially tricky code in path_name() which did not check for integer overflows caused by deep or large pathnames). It is also more efficient in some instances. Any time we were using tree_entry_interesting, we were building up the strbuf anyway, so this is an immediate and obvious win there. In cases where we were not, we trade off storing "pathname/" in a strbuf on the heap for each level of the path, instead of two pointers and an int on the stack (with one pointer into the tree object). On a 64-bit system, the latter is 20 bytes; so if path components are less than that on average, this has lower peak memory usage. In practice it probably doesn't matter either way; we are already holding in memory all of the tree objects leading up to each pathname, and for normal-depth pathnames, we are only talking about hundreds of bytes. This patch leaves "struct name_path" as a thin wrapper around the strbuf, to avoid disrupting callbacks. We should fix them, but leaving it out makes this diff easier to view. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- list-objects.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'list-objects.c') diff --git a/list-objects.c b/list-objects.c index 41736d237..dc46b9a2b 100644 --- a/list-objects.c +++ b/list-objects.c @@ -62,7 +62,6 @@ static void process_gitlink(struct rev_info *revs, static void process_tree(struct rev_info *revs, struct tree *tree, show_object_fn show, - struct name_path *path, struct strbuf *base, const char *name, void *cb_data) @@ -86,17 +85,14 @@ static void process_tree(struct rev_info *revs, return; die("bad tree object %s", sha1_to_hex(obj->sha1)); } + obj->flags |= SEEN; - show(obj, path, name, cb_data); - me.up = path; - me.elem = name; - me.elem_len = strlen(name); - - if (!match) { - strbuf_addstr(base, name); - if (base->len) - strbuf_addch(base, '/'); - } + me.base = base; + show(obj, &me, name, cb_data); + + strbuf_addstr(base, name); + if (base->len) + strbuf_addch(base, '/'); init_tree_desc(&desc, tree->buffer, tree->size); @@ -113,7 +109,7 @@ static void process_tree(struct rev_info *revs, if (S_ISDIR(entry.mode)) process_tree(revs, lookup_tree(entry.sha1), - show, &me, base, entry.path, + show, base, entry.path, cb_data); else if (S_ISGITLINK(entry.mode)) process_gitlink(revs, entry.sha1, @@ -220,7 +216,7 @@ void traverse_commit_list(struct rev_info *revs, path = ""; if (obj->type == OBJ_TREE) { process_tree(revs, (struct tree *)obj, show_object, - NULL, &base, path, data); + &base, path, data); continue; } if (obj->type == OBJ_BLOB) { -- cgit v1.2.1 From dc06dc880013d48f2b09c6b4295419382f3b8230 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 11 Feb 2016 17:26:44 -0500 Subject: list-objects: drop name_path entirely In the previous commit, we left name_path as a thin wrapper around a strbuf. This patch drops it entirely. As a result, every show_object_fn callback needs to be adjusted. However, none of their code needs to be changed at all, because the only use was to pass it to path_name(), which now handles the bare strbuf. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- list-objects.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'list-objects.c') diff --git a/list-objects.c b/list-objects.c index dc46b9a2b..0d56b50a1 100644 --- a/list-objects.c +++ b/list-objects.c @@ -11,7 +11,7 @@ static void process_blob(struct rev_info *revs, struct blob *blob, show_object_fn show, - struct name_path *path, + struct strbuf *path, const char *name, void *cb_data) { @@ -52,7 +52,7 @@ static void process_blob(struct rev_info *revs, static void process_gitlink(struct rev_info *revs, const unsigned char *sha1, show_object_fn show, - struct name_path *path, + struct strbuf *path, const char *name, void *cb_data) { @@ -69,7 +69,6 @@ static void process_tree(struct rev_info *revs, struct object *obj = &tree->object; struct tree_desc desc; struct name_entry entry; - struct name_path me; enum interesting match = revs->diffopt.pathspec.nr == 0 ? all_entries_interesting: entry_not_interesting; int baselen = base->len; @@ -87,8 +86,7 @@ static void process_tree(struct rev_info *revs, } obj->flags |= SEEN; - me.base = base; - show(obj, &me, name, cb_data); + show(obj, base, name, cb_data); strbuf_addstr(base, name); if (base->len) @@ -113,12 +111,12 @@ static void process_tree(struct rev_info *revs, cb_data); else if (S_ISGITLINK(entry.mode)) process_gitlink(revs, entry.sha1, - show, &me, entry.path, + show, base, entry.path, cb_data); else process_blob(revs, lookup_blob(entry.sha1), - show, &me, entry.path, + show, base, entry.path, cb_data); } strbuf_setlen(base, baselen); -- cgit v1.2.1 From 2824e1841b99393d2469c495253d547c643bd8f1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 11 Feb 2016 17:28:36 -0500 Subject: list-objects: pass full pathname to callbacks When we find a blob at "a/b/c", we currently pass this to our show_object_fn callbacks as two components: "a/b/" and "c". Callbacks which want the full value then call path_name(), which concatenates the two. But this is an inefficient interface; the path is a strbuf, and we could simply append "c" to it temporarily, then roll back the length, without creating a new copy. So we could improve this by teaching the callsites of path_name() this trick (and there are only 3). But we can also notice that no callback actually cares about the broken-down representation, and simply pass each callback the full path "a/b/c" as a string. The callback code becomes even simpler, then, as we do not have to worry about freeing an allocated buffer, nor rolling back our modification to the strbuf. This is theoretically less efficient, as some callbacks would not bother to format the final path component. But in practice this is not measurable. Since we use the same strbuf over and over, our work to grow it is amortized, and we really only pay to memcpy a few bytes. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- list-objects.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'list-objects.c') diff --git a/list-objects.c b/list-objects.c index 0d56b50a1..37d0d10ab 100644 --- a/list-objects.c +++ b/list-objects.c @@ -16,6 +16,7 @@ static void process_blob(struct rev_info *revs, void *cb_data) { struct object *obj = &blob->object; + size_t pathlen; if (!revs->blob_objects) return; @@ -24,7 +25,11 @@ static void process_blob(struct rev_info *revs, if (obj->flags & (UNINTERESTING | SEEN)) return; obj->flags |= SEEN; - show(obj, path, name, cb_data); + + pathlen = path->len; + strbuf_addstr(path, name); + show(obj, path->buf, cb_data); + strbuf_setlen(path, pathlen); } /* @@ -86,9 +91,8 @@ static void process_tree(struct rev_info *revs, } obj->flags |= SEEN; - show(obj, base, name, cb_data); - strbuf_addstr(base, name); + show(obj, base->buf, cb_data); if (base->len) strbuf_addch(base, '/'); @@ -207,7 +211,7 @@ void traverse_commit_list(struct rev_info *revs, continue; if (obj->type == OBJ_TAG) { obj->flags |= SEEN; - show_object(obj, NULL, name, data); + show_object(obj, name, data); continue; } if (!path) @@ -219,7 +223,7 @@ void traverse_commit_list(struct rev_info *revs, } if (obj->type == OBJ_BLOB) { process_blob(revs, (struct blob *)obj, show_object, - NULL, path, data); + &base, path, data); continue; } die("unknown pending object %s (%s)", -- cgit v1.2.1