From 96c4f4a370591b4796628abe18f27f0133b21954 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 9 Apr 2013 02:52:56 -0400 Subject: commit: allow associating auxiliary info on-demand The "indegree" field in the commit object is only used while sorting a list of commits in topological order, and wasting memory otherwise. We would prefer to shrink the size of individual commit objects, which we may have to hold thousands of in-core. We could eject "indegree" field out from the commit object and represent it as a dynamic table based on the decoration infrastructure, but the decoration is meant for sparse annotation and is not a good match. Instead, let's try a different approach. - Assign an integer (commit->index) to each commit we keep in-core (reuse the space of "indegree" field for it); - When running the topological sort, allocate an array of integers in bulk (called "slab"), use the commit->index as an index into this array, and store the "indegree" information there. This does _not_ reduce the memory footprint of a commit object, but the commit->index can be used as the index to dynamically associate commits with other kinds of information as needed. Signed-off-by: Junio C Hamano --- commit.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 9 deletions(-) (limited to 'commit.c') diff --git a/commit.c b/commit.c index 1a41757ee..9365e3b34 100644 --- a/commit.c +++ b/commit.c @@ -14,6 +14,7 @@ static struct commit_extra_header *read_commit_extra_header_lines(const char *bu int save_commit_buffer = 1; const char *commit_type = "commit"; +static int commit_count; static struct commit *check_commit(struct object *obj, const unsigned char *sha1, @@ -58,8 +59,11 @@ struct commit *lookup_commit_or_die(const unsigned char *sha1, const char *ref_n struct commit *lookup_commit(const unsigned char *sha1) { struct object *obj = lookup_object(sha1); - if (!obj) - return create_object(sha1, OBJ_COMMIT, alloc_commit_node()); + if (!obj) { + struct commit *c = alloc_commit_node(); + c->index = commit_count++; + return create_object(sha1, OBJ_COMMIT, c); + } if (!obj->type) obj->type = OBJ_COMMIT; return check_commit(obj, sha1, 0); @@ -497,6 +501,36 @@ struct commit *pop_commit(struct commit_list **stack) return item; } +struct commit_slab { + int *buf; + int alloc; +}; + +static void slab_init(struct commit_slab *s) +{ + memset(s, 0, sizeof(*s)); +} + +static void slab_clear(struct commit_slab *s) +{ + free(s->buf); + slab_init(s); +} + +static inline int *slab_at(struct commit_slab *s, const struct commit *c) +{ + if (s->alloc <= c->index) { + int new_alloc = alloc_nr(s->alloc); + if (new_alloc <= c->index) + new_alloc = c->index + 1; + + s->buf = xrealloc(s->buf, new_alloc * sizeof(*s->buf)); + memset(s->buf + s->alloc, 0, new_alloc - s->alloc); + s->alloc = new_alloc; + } + return s->buf + c->index; +} + /* * Performs an in-place topological sort on the list supplied. */ @@ -505,15 +539,18 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) struct commit_list *next, *orig = *list; struct commit_list *work, **insert; struct commit_list **pptr; + struct commit_slab indegree; if (!orig) return; *list = NULL; + slab_init(&indegree); + /* Mark them and clear the indegree */ for (next = orig; next; next = next->next) { struct commit *commit = next->item; - commit->indegree = 1; + *slab_at(&indegree, commit) = 1; } /* update the indegree */ @@ -521,9 +558,10 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) struct commit_list * parents = next->item->parents; while (parents) { struct commit *parent = parents->item; + int *pi = slab_at(&indegree, parent); - if (parent->indegree) - parent->indegree++; + if (*pi) + (*pi)++; parents = parents->next; } } @@ -540,7 +578,7 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) for (next = orig; next; next = next->next) { struct commit *commit = next->item; - if (commit->indegree == 1) + if (*slab_at(&indegree, commit) == 1) insert = &commit_list_insert(commit, insert)->next; } @@ -561,8 +599,9 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) commit = work_item->item; for (parents = commit->parents; parents ; parents = parents->next) { struct commit *parent = parents->item; + int *pi = slab_at(&indegree, parent); - if (!parent->indegree) + if (!*pi) continue; /* @@ -570,7 +609,7 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) * when all their children have been emitted thereby * guaranteeing topological order. */ - if (--parent->indegree == 1) { + if (--(*pi) == 1) { if (!lifo) commit_list_insert_by_date(parent, &work); else @@ -581,10 +620,12 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) * work_item is a commit all of whose children * have already been emitted. we can emit it now. */ - commit->indegree = 0; + *slab_at(&indegree, commit) = 0; *pptr = work_item; pptr = &work_item->next; } + + slab_clear(&indegree); } /* merge-base stuff */ -- cgit v1.2.1 From 66eb375d3d334efa3f467775a5f2a647c131c4b1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 12 Apr 2013 23:25:49 -0700 Subject: commit-slab: avoid large realloc Instead of using a single "slab" and keep reallocating it as we find that we need to deal with commits with larger values of commit->index, make a "slab" an array of many "slab_piece"s. Each access may need two levels of indirections, but we only need to reallocate the first level array of pointers when we have to grow the table this way. Signed-off-by: Junio C Hamano --- commit.c | 62 ++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 20 deletions(-) (limited to 'commit.c') diff --git a/commit.c b/commit.c index 9365e3b34..4c05b3966 100644 --- a/commit.c +++ b/commit.c @@ -501,34 +501,56 @@ struct commit *pop_commit(struct commit_list **stack) return item; } +struct commit_slab_piece { + int buf; +}; + struct commit_slab { - int *buf; - int alloc; + int piece_size; + int piece_count; + struct commit_slab_piece **piece; }; static void slab_init(struct commit_slab *s) { - memset(s, 0, sizeof(*s)); + /* allocate ~512kB at once, allowing for malloc overhead */ + int size = (512*1024-32) / sizeof(struct commit_slab_piece); + + s->piece_size = size; + s->piece_count = 0; + s->piece = NULL; } static void slab_clear(struct commit_slab *s) { - free(s->buf); - slab_init(s); + int i; + + for (i = 0; i < s->piece_count; i++) + free(s->piece[i]); + s->piece_count = 0; + free(s->piece); + s->piece = NULL; } -static inline int *slab_at(struct commit_slab *s, const struct commit *c) +static inline struct commit_slab_piece *slab_at(struct commit_slab *s, + const struct commit *c) { - if (s->alloc <= c->index) { - int new_alloc = alloc_nr(s->alloc); - if (new_alloc <= c->index) - new_alloc = c->index + 1; - - s->buf = xrealloc(s->buf, new_alloc * sizeof(*s->buf)); - memset(s->buf + s->alloc, 0, new_alloc - s->alloc); - s->alloc = new_alloc; + int nth_piece, nth_slot; + + nth_piece = c->index / s->piece_size; + nth_slot = c->index % s->piece_size; + + if (s->piece_count <= nth_piece) { + int i; + + s->piece = xrealloc(s->piece, (nth_piece + 1) * sizeof(s->piece)); + for (i = s->piece_count; i <= nth_piece; i++) + s->piece[i] = NULL; + s->piece_count = nth_piece + 1; } - return s->buf + c->index; + if (!s->piece[nth_piece]) + s->piece[nth_piece] = xcalloc(s->piece_size, sizeof(**s->piece)); + return &s->piece[nth_piece][nth_slot]; } /* @@ -550,7 +572,7 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) /* Mark them and clear the indegree */ for (next = orig; next; next = next->next) { struct commit *commit = next->item; - *slab_at(&indegree, commit) = 1; + slab_at(&indegree, commit)->buf = 1; } /* update the indegree */ @@ -558,7 +580,7 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) struct commit_list * parents = next->item->parents; while (parents) { struct commit *parent = parents->item; - int *pi = slab_at(&indegree, parent); + int *pi = &slab_at(&indegree, parent)->buf; if (*pi) (*pi)++; @@ -578,7 +600,7 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) for (next = orig; next; next = next->next) { struct commit *commit = next->item; - if (*slab_at(&indegree, commit) == 1) + if (slab_at(&indegree, commit)->buf == 1) insert = &commit_list_insert(commit, insert)->next; } @@ -599,7 +621,7 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) commit = work_item->item; for (parents = commit->parents; parents ; parents = parents->next) { struct commit *parent = parents->item; - int *pi = slab_at(&indegree, parent); + int *pi = &slab_at(&indegree, parent)->buf; if (!*pi) continue; @@ -620,7 +642,7 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) * work_item is a commit all of whose children * have already been emitted. we can emit it now. */ - *slab_at(&indegree, commit) = 0; + slab_at(&indegree, commit)->buf = 0; *pptr = work_item; pptr = &work_item->next; } -- cgit v1.2.1 From a84b794ad0622103cae98639d7176b2451dc6f92 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 13 Apr 2013 11:56:41 -0700 Subject: commit-slab: introduce a macro to define a slab for new type Introduce a header file to define a macro that can define the struct type, initializer, accessor and cleanup functions to manage a commit slab. Update the "indegree" topological sort facility using it. To associate 32 flag bits with each commit, you can write: define_commit_slab(flag32, uint32); to declare "struct flag32" type, define an instance of it with struct flag32 flags; and initialize it by calling init_flag32(&flags); After that, a call to flag32_at() function uint32 *fp = flag32_at(&flags, commit); will return a pointer pointing at a uint32 for that commit. Once you are done with these flags, clean them up with clear_flag32(&flags); Callers that cannot hard-code how wide the data to be associated with the commit be at compile time can use the "_with_stride" variant to initialize the slab. Suppose you want to give one bit per existing ref, and paint commits down to find which refs are descendants of each commit. Saying typedef uint32 bits320[5]; define_commit_slab(flagbits, bits320); at compile time will still limit your code with hard-coded limit, because you may find that you have more than 320 refs at runtime. The code can declare a commit slab "struct flagbits" like this instead: define_commit_slab(flagbits, unsigned char); struct flagbits flags; and initialize it by: nrefs = ... count number of refs ... init_flagbits_with_stride(&flags, (nrefs + 7) / 8); so that unsigned char *fp = flagbits_at(&flags, commit); will return a pointer pointing at an array of 40 "unsigned char"s associated with the commit, once you figure out nrefs is 320 at runtime. Signed-off-by: Junio C Hamano --- commit.c | 72 +++++++++++++--------------------------------------------------- 1 file changed, 14 insertions(+), 58 deletions(-) (limited to 'commit.c') diff --git a/commit.c b/commit.c index 4c05b3966..f97456ddf 100644 --- a/commit.c +++ b/commit.c @@ -8,6 +8,7 @@ #include "notes.h" #include "gpg-interface.h" #include "mergesort.h" +#include "commit-slab.h" static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **); @@ -501,57 +502,12 @@ struct commit *pop_commit(struct commit_list **stack) return item; } -struct commit_slab_piece { - int buf; -}; - -struct commit_slab { - int piece_size; - int piece_count; - struct commit_slab_piece **piece; -}; - -static void slab_init(struct commit_slab *s) -{ - /* allocate ~512kB at once, allowing for malloc overhead */ - int size = (512*1024-32) / sizeof(struct commit_slab_piece); - - s->piece_size = size; - s->piece_count = 0; - s->piece = NULL; -} - -static void slab_clear(struct commit_slab *s) -{ - int i; - - for (i = 0; i < s->piece_count; i++) - free(s->piece[i]); - s->piece_count = 0; - free(s->piece); - s->piece = NULL; -} - -static inline struct commit_slab_piece *slab_at(struct commit_slab *s, - const struct commit *c) -{ - int nth_piece, nth_slot; - - nth_piece = c->index / s->piece_size; - nth_slot = c->index % s->piece_size; - - if (s->piece_count <= nth_piece) { - int i; +/* + * Topological sort support + */ - s->piece = xrealloc(s->piece, (nth_piece + 1) * sizeof(s->piece)); - for (i = s->piece_count; i <= nth_piece; i++) - s->piece[i] = NULL; - s->piece_count = nth_piece + 1; - } - if (!s->piece[nth_piece]) - s->piece[nth_piece] = xcalloc(s->piece_size, sizeof(**s->piece)); - return &s->piece[nth_piece][nth_slot]; -} +/* count number of children that have not been emitted */ +define_commit_slab(indegree_slab, int); /* * Performs an in-place topological sort on the list supplied. @@ -561,18 +517,18 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) struct commit_list *next, *orig = *list; struct commit_list *work, **insert; struct commit_list **pptr; - struct commit_slab indegree; + struct indegree_slab indegree; if (!orig) return; *list = NULL; - slab_init(&indegree); + init_indegree_slab(&indegree); /* Mark them and clear the indegree */ for (next = orig; next; next = next->next) { struct commit *commit = next->item; - slab_at(&indegree, commit)->buf = 1; + *(indegree_slab_at(&indegree, commit)) = 1; } /* update the indegree */ @@ -580,7 +536,7 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) struct commit_list * parents = next->item->parents; while (parents) { struct commit *parent = parents->item; - int *pi = &slab_at(&indegree, parent)->buf; + int *pi = indegree_slab_at(&indegree, parent); if (*pi) (*pi)++; @@ -600,7 +556,7 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) for (next = orig; next; next = next->next) { struct commit *commit = next->item; - if (slab_at(&indegree, commit)->buf == 1) + if (*(indegree_slab_at(&indegree, commit)) == 1) insert = &commit_list_insert(commit, insert)->next; } @@ -621,7 +577,7 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) commit = work_item->item; for (parents = commit->parents; parents ; parents = parents->next) { struct commit *parent = parents->item; - int *pi = &slab_at(&indegree, parent)->buf; + int *pi = indegree_slab_at(&indegree, parent); if (!*pi) continue; @@ -642,12 +598,12 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) * work_item is a commit all of whose children * have already been emitted. we can emit it now. */ - slab_at(&indegree, commit)->buf = 0; + *(indegree_slab_at(&indegree, commit)) = 0; *pptr = work_item; pptr = &work_item->next; } - slab_clear(&indegree); + clear_indegree_slab(&indegree); } /* merge-base stuff */ -- cgit v1.2.1