From 5c3894c39d4095e6875376a6c05c6390b9a50754 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 18 Jan 2016 15:02:40 -0500 Subject: shortlog: match both "Author:" and "author" on stdin The original git-shortlog could read both the normal "git log" output as well as "git log --format=raw". However, when it was converted to C by b8ec592 (Build in shortlog, 2006-10-22), the trailing colon became mandatory, and we no longer matched the raw output. Given the amount of intervening time without any bug reports, it's probable that nobody cares. But it's relatively easy to fix, and the end result is hopefully more readable than the original. Note that this no longer matches "author: ", which we did before, but that has never been a format generated by git. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/shortlog.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'builtin/shortlog.c') diff --git a/builtin/shortlog.c b/builtin/shortlog.c index 35ebd17f8..ab25b443d 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -94,8 +94,9 @@ static void read_from_stdin(struct shortlog *log) char author[1024], oneline[1024]; while (fgets(author, sizeof(author), stdin) != NULL) { - if (!(author[0] == 'A' || author[0] == 'a') || - !starts_with(author + 1, "uthor: ")) + const char *v; + if (!skip_prefix(author, "Author: ", &v) && + !skip_prefix(author, "author ", &v)) continue; while (fgets(oneline, sizeof(oneline), stdin) && oneline[0] != '\n') @@ -103,7 +104,7 @@ static void read_from_stdin(struct shortlog *log) while (fgets(oneline, sizeof(oneline), stdin) && oneline[0] == '\n') ; /* discard blanks */ - insert_one_record(log, author + 8, oneline); + insert_one_record(log, v, oneline); } } -- cgit v1.2.1 From 50250491bded3190e16978e836a4dbe129c632cf Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 18 Jan 2016 15:02:44 -0500 Subject: shortlog: use strbufs to read from stdin We currently use fixed-size buffers with fgets(), which could lead to incorrect results in the unlikely event that a line had something like "Author:" at exactly its 1024th character. But it's easy to convert this to a strbuf, and because we can reuse the same buffer through the loop, we don't even pay the extra allocation cost. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/shortlog.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'builtin/shortlog.c') diff --git a/builtin/shortlog.c b/builtin/shortlog.c index ab25b443d..6c0a72ede 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -91,21 +91,24 @@ static void insert_one_record(struct shortlog *log, static void read_from_stdin(struct shortlog *log) { - char author[1024], oneline[1024]; + struct strbuf author = STRBUF_INIT; + struct strbuf oneline = STRBUF_INIT; - while (fgets(author, sizeof(author), stdin) != NULL) { + while (strbuf_getline(&author, stdin, '\n') != EOF) { const char *v; - if (!skip_prefix(author, "Author: ", &v) && - !skip_prefix(author, "author ", &v)) + if (!skip_prefix(author.buf, "Author: ", &v) && + !skip_prefix(author.buf, "author ", &v)) continue; - while (fgets(oneline, sizeof(oneline), stdin) && - oneline[0] != '\n') + while (strbuf_getline(&oneline, stdin, '\n') != EOF && + oneline.len) ; /* discard headers */ - while (fgets(oneline, sizeof(oneline), stdin) && - oneline[0] == '\n') + while (strbuf_getline(&oneline, stdin, '\n') != EOF && + !oneline.len) ; /* discard blanks */ - insert_one_record(log, v, oneline); + insert_one_record(log, v, oneline.buf); } + strbuf_release(&author); + strbuf_release(&oneline); } void shortlog_add_commit(struct shortlog *log, struct commit *commit) -- cgit v1.2.1 From 2db6b83d189bb82d1d45805fa6c85a9c8b507920 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 18 Jan 2016 15:02:48 -0500 Subject: shortlog: replace hand-parsing of author with pretty-printer When gathering the author and oneline subject for each commit, we hand-parse the commit headers to find the "author" line, and then continue past to the blank line at the end of the header. We can replace this tricky hand-parsing by simply asking the pretty-printer for the relevant items. This also decouples the author and oneline parsing, opening up some new optimizations in further commits. One reason to avoid the pretty-printer is that it might be less efficient than hand-parsing. However, I measured no slowdown at all running "git shortlog -ns HEAD" on linux.git. As a bonus, we also fix a memory leak in the (uncommon) case that the author field is blank. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/shortlog.c | 62 +++++++++++++++++++++++------------------------------- 1 file changed, 26 insertions(+), 36 deletions(-) (limited to 'builtin/shortlog.c') diff --git a/builtin/shortlog.c b/builtin/shortlog.c index 6c0a72ede..1261ec4dd 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -113,45 +113,35 @@ static void read_from_stdin(struct shortlog *log) void shortlog_add_commit(struct shortlog *log, struct commit *commit) { - const char *author = NULL, *buffer; - struct strbuf buf = STRBUF_INIT; - struct strbuf ufbuf = STRBUF_INIT; - - pp_commit_easy(CMIT_FMT_RAW, commit, &buf); - buffer = buf.buf; - while (*buffer && *buffer != '\n') { - const char *eol = strchr(buffer, '\n'); - - if (eol == NULL) - eol = buffer + strlen(buffer); - else - eol++; - - if (starts_with(buffer, "author ")) - author = buffer + 7; - buffer = eol; - } - if (!author) { + struct strbuf author = STRBUF_INIT; + struct strbuf oneline = STRBUF_INIT; + struct pretty_print_context ctx = {0}; + + ctx.fmt = CMIT_FMT_USERFORMAT; + ctx.abbrev = log->abbrev; + ctx.subject = ""; + ctx.after_subject = ""; + ctx.date_mode.type = DATE_NORMAL; + ctx.output_encoding = get_log_output_encoding(); + + format_commit_message(commit, "%an <%ae>", &author, &ctx); + /* we can detect a total failure only by seeing " <>" in the output */ + if (author.len <= 3) { warning(_("Missing author: %s"), oid_to_hex(&commit->object.oid)); - return; - } - if (log->user_format) { - struct pretty_print_context ctx = {0}; - ctx.fmt = CMIT_FMT_USERFORMAT; - ctx.abbrev = log->abbrev; - ctx.subject = ""; - ctx.after_subject = ""; - ctx.date_mode.type = DATE_NORMAL; - ctx.output_encoding = get_log_output_encoding(); - pretty_print_commit(&ctx, commit, &ufbuf); - buffer = ufbuf.buf; - } else if (*buffer) { - buffer++; + goto out; } - insert_one_record(log, author, !*buffer ? "" : buffer); - strbuf_release(&ufbuf); - strbuf_release(&buf); + + if (log->user_format) + pretty_print_commit(&ctx, commit, &oneline); + else + format_commit_message(commit, "%s", &oneline, &ctx); + + insert_one_record(log, author.buf, oneline.len ? oneline.buf : ""); + +out: + strbuf_release(&author); + strbuf_release(&oneline); } static void get_from_rev(struct rev_info *rev, struct shortlog *log) -- cgit v1.2.1 From 4e1d1a2eea25878a2128e376bff8b4a1b2216b15 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 18 Jan 2016 15:02:52 -0500 Subject: shortlog: optimize "--summary" mode If the user asked us only to show counts for each author, rather than the individual summary lines, then there is no point in us generating the summaries only to throw them away. With this patch, I measured the following speedup for "git shortlog -ns HEAD" on linux.git (best-of-five): [before] real 0m5.644s user 0m5.472s sys 0m0.176s [after] real 0m5.257s user 0m5.104s sys 0m0.156s That's only ~7%, but it's so easy to do, there's no good reason not to. We don't have to touch any downstream code, since we already fill in the magic string "" to handle commits without a message. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/shortlog.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'builtin/shortlog.c') diff --git a/builtin/shortlog.c b/builtin/shortlog.c index 1261ec4dd..973b50d41 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -132,10 +132,12 @@ void shortlog_add_commit(struct shortlog *log, struct commit *commit) goto out; } - if (log->user_format) - pretty_print_commit(&ctx, commit, &oneline); - else - format_commit_message(commit, "%s", &oneline, &ctx); + if (!log->summary) { + if (log->user_format) + pretty_print_commit(&ctx, commit, &oneline); + else + format_commit_message(commit, "%s", &oneline, &ctx); + } insert_one_record(log, author.buf, oneline.len ? oneline.buf : ""); -- cgit v1.2.1 From ed7eba902202be030aae28aba29fe7b294cbee5d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 18 Jan 2016 15:02:56 -0500 Subject: shortlog: optimize out useless "" normalization If we are in --summary mode, we will always pass to insert_one_record, which will then do some normalization (e.g., cutting out "[PATCH]"). There's no point in doing so if we aren't going to use the result anyway. This drops my best-of-five for "git shortlog -ns HEAD" on linux.git from: real 0m5.257s user 0m5.104s sys 0m0.156s to: real 0m5.194s user 0m5.028s sys 0m0.168s That's only 1%, but arguably the result is clearer to read, as we're able to group our variable declarations inside the conditional block. It also opens up further optimization possibilities for future patches. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/shortlog.c | 63 +++++++++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 29 deletions(-) (limited to 'builtin/shortlog.c') diff --git a/builtin/shortlog.c b/builtin/shortlog.c index 973b50d41..a7708c3a3 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -31,13 +31,9 @@ static void insert_one_record(struct shortlog *log, const char *author, const char *oneline) { - const char *dot3 = log->common_repo_prefix; - char *buffer, *p; struct string_list_item *item; const char *mailbuf, *namebuf; size_t namelen, maillen; - const char *eol; - struct strbuf subject = STRBUF_INIT; struct strbuf namemailbuf = STRBUF_INIT; struct ident_split ident; @@ -59,34 +55,43 @@ static void insert_one_record(struct shortlog *log, if (item->util == NULL) item->util = xcalloc(1, sizeof(struct string_list)); - /* Skip any leading whitespace, including any blank lines. */ - while (*oneline && isspace(*oneline)) - oneline++; - eol = strchr(oneline, '\n'); - if (!eol) - eol = oneline + strlen(oneline); - if (starts_with(oneline, "[PATCH")) { - char *eob = strchr(oneline, ']'); - if (eob && (!eol || eob < eol)) - oneline = eob + 1; - } - while (*oneline && isspace(*oneline) && *oneline != '\n') - oneline++; - format_subject(&subject, oneline, " "); - buffer = strbuf_detach(&subject, NULL); - - if (dot3) { - int dot3len = strlen(dot3); - if (dot3len > 5) { - while ((p = strstr(buffer, dot3)) != NULL) { - int taillen = strlen(p) - dot3len; - memcpy(p, "/.../", 5); - memmove(p + 5, p + dot3len, taillen + 1); + if (log->summary) + string_list_append(item->util, xstrdup("")); + else { + const char *dot3 = log->common_repo_prefix; + char *buffer, *p; + struct strbuf subject = STRBUF_INIT; + const char *eol; + + /* Skip any leading whitespace, including any blank lines. */ + while (*oneline && isspace(*oneline)) + oneline++; + eol = strchr(oneline, '\n'); + if (!eol) + eol = oneline + strlen(oneline); + if (starts_with(oneline, "[PATCH")) { + char *eob = strchr(oneline, ']'); + if (eob && (!eol || eob < eol)) + oneline = eob + 1; + } + while (*oneline && isspace(*oneline) && *oneline != '\n') + oneline++; + format_subject(&subject, oneline, " "); + buffer = strbuf_detach(&subject, NULL); + + if (dot3) { + int dot3len = strlen(dot3); + if (dot3len > 5) { + while ((p = strstr(buffer, dot3)) != NULL) { + int taillen = strlen(p) - dot3len; + memcpy(p, "/.../", 5); + memmove(p + 5, p + dot3len, taillen + 1); + } } } - } - string_list_append(item->util, buffer); + string_list_append(item->util, buffer); + } } static void read_from_stdin(struct shortlog *log) -- cgit v1.2.1 From 9b21a34a968080873519a927afd9c2570f464785 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 18 Jan 2016 15:02:59 -0500 Subject: shortlog: optimize out useless string list If we are in "--summary" mode, then we do not care about the actual list of subject onelines associated with each author. We care only about the number. So rather than store a string-list for each author full of "", let's just keep a count. This drops my best-of-five for "git shortlog -ns HEAD" on linux.git from: real 0m5.194s user 0m5.028s sys 0m0.168s to: real 0m5.057s user 0m4.916s sys 0m0.144s That's about 2.5%. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/shortlog.c | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) (limited to 'builtin/shortlog.c') diff --git a/builtin/shortlog.c b/builtin/shortlog.c index a7708c3a3..adbf1fd99 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -14,7 +14,26 @@ static char const * const shortlog_usage[] = { NULL }; -static int compare_by_number(const void *a1, const void *a2) +/* + * The util field of our string_list_items will contain one of two things: + * + * - if --summary is not in use, it will point to a string list of the + * oneline subjects assigned to this author + * + * - if --summary is in use, we don't need that list; we only need to know + * its size. So we abuse the pointer slot to store our integer counter. + * + * This macro accesses the latter. + */ +#define UTIL_TO_INT(x) ((intptr_t)(x)->util) + +static int compare_by_counter(const void *a1, const void *a2) +{ + const struct string_list_item *i1 = a1, *i2 = a2; + return UTIL_TO_INT(i2) - UTIL_TO_INT(i1); +} + +static int compare_by_list(const void *a1, const void *a2) { const struct string_list_item *i1 = a1, *i2 = a2; const struct string_list *l1 = i1->util, *l2 = i2->util; @@ -52,11 +71,9 @@ static void insert_one_record(struct shortlog *log, strbuf_addf(&namemailbuf, " <%.*s>", (int)maillen, mailbuf); item = string_list_insert(&log->list, namemailbuf.buf); - if (item->util == NULL) - item->util = xcalloc(1, sizeof(struct string_list)); if (log->summary) - string_list_append(item->util, xstrdup("")); + item->util = (void *)(UTIL_TO_INT(item) + 1); else { const char *dot3 = log->common_repo_prefix; char *buffer, *p; @@ -90,6 +107,8 @@ static void insert_one_record(struct shortlog *log, } } + if (item->util == NULL) + item->util = xcalloc(1, sizeof(struct string_list)); string_list_append(item->util, buffer); } } @@ -295,14 +314,14 @@ void shortlog_output(struct shortlog *log) if (log->sort_by_number) qsort(log->list.items, log->list.nr, sizeof(struct string_list_item), - compare_by_number); + log->summary ? compare_by_counter : compare_by_list); for (i = 0; i < log->list.nr; i++) { - struct string_list *onelines = log->list.items[i].util; - + const struct string_list_item *item = &log->list.items[i]; if (log->summary) { - printf("%6d\t%s\n", onelines->nr, log->list.items[i].string); + printf("%6d\t%s\n", (int)UTIL_TO_INT(item), item->string); } else { - printf("%s (%d):\n", log->list.items[i].string, onelines->nr); + struct string_list *onelines = item->util; + printf("%s (%d):\n", item->string, onelines->nr); for (j = onelines->nr - 1; j >= 0; j--) { const char *msg = onelines->items[j].string; @@ -315,11 +334,11 @@ void shortlog_output(struct shortlog *log) printf(" %s\n", msg); } putchar('\n'); + onelines->strdup_strings = 1; + string_list_clear(onelines, 0); + free(onelines); } - onelines->strdup_strings = 1; - string_list_clear(onelines, 0); - free(onelines); log->list.items[i].util = NULL; } -- cgit v1.2.1 From d6b16ce9147e787cce3551fc79d52b3e30c3ad3a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 18 Jan 2016 18:04:35 -0500 Subject: shortlog: don't warn on empty author Git tries to avoid creating a commit with an empty author name or email. However, commits created by older, less strict versions of git may still be in the history. There's not much point in issuing a warning to stderr for an empty author. The user can't do anything about it now, and we are better off to simply include it in the shortlog output as an empty name/email, and let the caller process it however they see fit. Older versions of shortlog differentiated between "author header not present" (which complained) and "author name/email are blank" (which included the empty ident in the output). But since switching to format_commit_message, we complain to stderr about either case (linux.git has a blank author deep in its history which triggers this). We could try to restore the older behavior (complaining only about the missing header), but in retrospect, there's not much point in differentiating these cases. A missing author header is bogus, but as for the "blank" case, the only useful behavior is to add it to the "empty name" collection. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/shortlog.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'builtin/shortlog.c') diff --git a/builtin/shortlog.c b/builtin/shortlog.c index adbf1fd99..e32be3993 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -149,13 +149,6 @@ void shortlog_add_commit(struct shortlog *log, struct commit *commit) ctx.output_encoding = get_log_output_encoding(); format_commit_message(commit, "%an <%ae>", &author, &ctx); - /* we can detect a total failure only by seeing " <>" in the output */ - if (author.len <= 3) { - warning(_("Missing author: %s"), - oid_to_hex(&commit->object.oid)); - goto out; - } - if (!log->summary) { if (log->user_format) pretty_print_commit(&ctx, commit, &oneline); @@ -165,7 +158,6 @@ void shortlog_add_commit(struct shortlog *log, struct commit *commit) insert_one_record(log, author.buf, oneline.len ? oneline.buf : ""); -out: strbuf_release(&author); strbuf_release(&oneline); } -- cgit v1.2.1