From 91229834c293302c4456e732ddef7ace0df6e471 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Jan 2017 23:17:40 -0500 Subject: blame: fix alignment with --abbrev=40 The blame command internally adds 1 to any requested sha1 abbreviation length, and then subtracts it when outputting a boundary commit. This lets regular and boundary sha1s line up visually, but it misses one corner case. When the requested length is 40, we bump the value to 41. But since we only have 40 characters, that's all we can show (fortunately the truncation is done by a printf precision field, so it never tries to read past the end of the buffer). So a normal sha1 shows 40 hex characters, and a boundary sha1 shows "^" plus 40 hex characters. The result is misaligned. The "-l" option to show long sha1s gets around this by skipping the "abbrev" variable entirely and just always using GIT_SHA1_HEXSZ. This avoids the "+1" issue, but it does mean that boundary commits only have 39 characters printed. This is somewhat odd, but it does look good visually: the results are aligned and left-justified. The alternative would be to allocate an extra column that would contain either an extra space or the "^" boundary marker. As this is by definition the human-readable view, it's probably not that big a deal either way (and of course --porcelain, etc, correctly produce correct 40-hex sha1s). But for consistency, this patch teaches --abbrev=40 to produce the same output as "-l" (always left-aligned, with 40-hex for normal sha1s, and "^" plus 39-hex for boundaries). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/blame.c | 2 +- t/t8002-blame.sh | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/builtin/blame.c b/builtin/blame.c index f618392e5..cbb7dc2ad 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -2606,7 +2606,7 @@ parse_done: } else if (show_progress < 0) show_progress = isatty(2); - if (0 < abbrev) + if (0 < abbrev && abbrev < GIT_SHA1_HEXSZ) /* one more abbrev length is needed for the boundary commit */ abbrev++; diff --git a/t/t8002-blame.sh b/t/t8002-blame.sh index ab79de954..c6347ad8f 100755 --- a/t/t8002-blame.sh +++ b/t/t8002-blame.sh @@ -86,4 +86,32 @@ test_expect_success 'blame with showEmail config true' ' test_cmp expected_n result ' +test_expect_success 'set up abbrev tests' ' + test_commit abbrev && + sha1=$(git rev-parse --verify HEAD) && + check_abbrev () { + expect=$1; shift + echo $sha1 | cut -c 1-$expect >expect && + git blame "$@" abbrev.t >actual && + perl -lne "/[0-9a-f]+/ and print \$&" actual.sha && + test_cmp expect actual.sha + } +' + +test_expect_success 'blame --abbrev= works' ' + # non-boundary commits get +1 for alignment + check_abbrev 31 --abbrev=30 HEAD && + check_abbrev 30 --abbrev=30 ^HEAD +' + +test_expect_success 'blame -l aligns regular and boundary commits' ' + check_abbrev 40 -l HEAD && + check_abbrev 39 -l ^HEAD +' + +test_expect_success 'blame --abbrev=40 behaves like -l' ' + check_abbrev 40 --abbrev=40 HEAD && + check_abbrev 39 --abbrev=40 ^HEAD +' + test_done -- cgit v1.2.1 From ed58d8088b570e7629bfc94b87e433f05229ef3c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Jan 2017 23:18:08 -0500 Subject: blame: handle --no-abbrev You can already ask blame for full sha1s with "-l" or with "--abbrev=40". But for consistency with other parts of Git, we should support "--no-abbrev". Worse, blame already accepts --no-abbrev, but it's totally broken. When we see --no-abbrev, the abbrev variable is set to 0, which is then used as a printf precision. For regular sha1s, that means we print nothing at all (which is very wrong). For boundary commits we decrement it to "-1", which printf interprets as "no limit" (which is almost correct, except it misses the 39-length magic explained in the previous commit). Let's detect --no-abbrev and behave as if --abbrev=40 was given. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/blame.c | 2 ++ t/t8002-blame.sh | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/builtin/blame.c b/builtin/blame.c index cbb7dc2ad..1fccbe6bf 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -2609,6 +2609,8 @@ parse_done: if (0 < abbrev && abbrev < GIT_SHA1_HEXSZ) /* one more abbrev length is needed for the boundary commit */ abbrev++; + else if (!abbrev) + abbrev = GIT_SHA1_HEXSZ; if (revs_file && read_ancestry(revs_file)) die_errno("reading graft file '%s' failed", revs_file); diff --git a/t/t8002-blame.sh b/t/t8002-blame.sh index c6347ad8f..380e1c105 100755 --- a/t/t8002-blame.sh +++ b/t/t8002-blame.sh @@ -114,4 +114,8 @@ test_expect_success 'blame --abbrev=40 behaves like -l' ' check_abbrev 39 --abbrev=40 ^HEAD ' +test_expect_success '--no-abbrev works like --abbrev=40' ' + check_abbrev 40 --no-abbrev +' + test_done -- cgit v1.2.1 From 4e768329847c8226a230406041fe249adea245cf Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Jan 2017 23:20:51 -0500 Subject: blame: output porcelain "previous" header for each file It's possible for content currently found in one file to have originated in two separate files, each of which may have been modified in some single older commit. The --porcelain output generates an incorrect "previous" header in this case, whereas --line-porcelain gets it right. The problem is that the porcelain output tries to omit repeated details of commits, and treats "previous" as a property of the commit, when it is really a property of the blamed block of lines. Let's look at an example. In a case like this, you might see this output from --line-porcelain: SOME_SHA1 1 1 1 author ... committer ... previous SOME_SHA1^ file_one filename file_one ...some line content... SOME_SHA1 2 1 1 author ... committer ... previous SOME_SHA1^ file_two filename file_two ...some different content.... The "filename" fields tell us that the two lines are from two different files. But notice that the filename also appears in the "previous" field, which tells us where to start a re-blame. The second content line never appeared in file_one at all, so we would obviously need to re-blame from file_two (or possibly even some other file, if had just been renamed to file_two in SOME_SHA1). So far so good. Now here's what --porcelain looks like: SOME_SHA1 1 1 1 author ... committer ... previous SOME_SHA1^ file_one filename file_one ...some line content... SOME_SHA1 2 1 1 filename file_two ...some different content.... We've dropped the author and committer fields from the second line, as they would just be repeats. But we can't omit "filename", because it depends on the actual block of blamed lines, not just the commit. This is handled by emit_porcelain_details(), which will show the filename either if it is the first mention of the commit _or_ if the commit has multiple paths in it. But we don't give "previous" the same handling. It's written inside emit_one_suspect_detail(), which bails early if we've already seen that commit. And so the output above is wrong; a reader would assume that the correct place to re-blame line two is from file_one, but that's obviously nonsense. Let's treat "previous" the same as "filename", and show it fresh whenever we know we are in a confusing case like this. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/blame.c | 23 +++++---- t/t8011-blame-split-file.sh | 117 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 9 deletions(-) create mode 100755 t/t8011-blame-split-file.sh diff --git a/builtin/blame.c b/builtin/blame.c index 1fccbe6bf..d4f3ce96a 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1700,13 +1700,23 @@ static void get_commit_info(struct commit *commit, } /* + * Write out any suspect information which depends on the path. This must be + * handled separately from emit_one_suspect_detail(), because a given commit + * may have changes in multiple paths. So this needs to appear each time + * we mention a new group. + * * To allow LF and other nonportable characters in pathnames, * they are c-style quoted as needed. */ -static void write_filename_info(const char *path) +static void write_filename_info(struct origin *suspect) { + if (suspect->previous) { + struct origin *prev = suspect->previous; + printf("previous %s ", oid_to_hex(&prev->commit->object.oid)); + write_name_quoted(prev->path, stdout, '\n'); + } printf("filename "); - write_name_quoted(path, stdout, '\n'); + write_name_quoted(suspect->path, stdout, '\n'); } /* @@ -1735,11 +1745,6 @@ static int emit_one_suspect_detail(struct origin *suspect, int repeat) printf("summary %s\n", ci.summary.buf); if (suspect->commit->object.flags & UNINTERESTING) printf("boundary\n"); - if (suspect->previous) { - struct origin *prev = suspect->previous; - printf("previous %s ", oid_to_hex(&prev->commit->object.oid)); - write_name_quoted(prev->path, stdout, '\n'); - } commit_info_destroy(&ci); @@ -1760,7 +1765,7 @@ static void found_guilty_entry(struct blame_entry *ent, oid_to_hex(&suspect->commit->object.oid), ent->s_lno + 1, ent->lno + 1, ent->num_lines); emit_one_suspect_detail(suspect, 0); - write_filename_info(suspect->path); + write_filename_info(suspect); maybe_flush_or_die(stdout, "stdout"); } pi->blamed_lines += ent->num_lines; @@ -1884,7 +1889,7 @@ static void emit_porcelain_details(struct origin *suspect, int repeat) { if (emit_one_suspect_detail(suspect, repeat) || (suspect->commit->object.flags & MORE_THAN_ONE_PATH)) - write_filename_info(suspect->path); + write_filename_info(suspect); } static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent, diff --git a/t/t8011-blame-split-file.sh b/t/t8011-blame-split-file.sh new file mode 100755 index 000000000..831125047 --- /dev/null +++ b/t/t8011-blame-split-file.sh @@ -0,0 +1,117 @@ +#!/bin/sh + +test_description=' +The general idea is that we have a single file whose lines come from +multiple other files, and those individual files were modified in the same +commits. That means that we will see the same commit in multiple contexts, +and each one should be attributed to the correct file. + +Note that we need to use "blame -C" to find the commit for all lines. We will +not bother testing that the non-C case fails to find it. That is how blame +behaves now, but it is not a property we want to make sure is retained. +' +. ./test-lib.sh + +# help avoid typing and reading long strings of similar lines +# in the tests below +generate_expect () { + while read nr data + do + i=0 + while test $i -lt $nr + do + echo $data + i=$((i + 1)) + done + done +} + +test_expect_success 'setup split file case' ' + # use lines long enough to trigger content detection + test_seq 1000 1010 >one && + test_seq 2000 2010 >two && + git add one two && + test_commit base && + + sed "6s/^/modified /" one.tmp && + mv one.tmp one && + sed "6s/^/modified /" two.tmp && + mv two.tmp two && + git add -u && + test_commit modified && + + cat one two >combined && + git add combined && + git rm one two && + test_commit combined +' + +test_expect_success 'setup simulated porcelain' ' + # This just reads porcelain-ish output and tries + # to output the value of a given field for each line (either by + # reading the field that accompanies this line, or referencing + # the information found last time the commit was mentioned). + cat >read-porcelain.pl <<-\EOF + my $field = shift; + while (<>) { + if (/^[0-9a-f]{40} /) { + flush(); + $hash = $&; + } elsif (/^$field (.*)/) { + $cache{$hash} = $1; + } + } + flush(); + + sub flush { + return unless defined $hash; + if (defined $cache{$hash}) { + print "$cache{$hash}\n"; + } else { + print "NONE\n"; + } + } + EOF +' + +for output in porcelain line-porcelain +do + test_expect_success "generate --$output output" ' + git blame --root -C --$output combined >output + ' + + test_expect_success "$output output finds correct commits" ' + generate_expect >expect <<-\EOF && + 5 base + 1 modified + 10 base + 1 modified + 5 base + EOF + perl read-porcelain.pl summary actual && + test_cmp expect actual + ' + + test_expect_success "$output output shows correct filenames" ' + generate_expect >expect <<-\EOF && + 11 one + 11 two + EOF + perl read-porcelain.pl filename actual && + test_cmp expect actual + ' + + test_expect_success "$output output shows correct previous pointer" ' + generate_expect >expect <<-EOF && + 5 NONE + 1 $(git rev-parse modified^) one + 10 NONE + 1 $(git rev-parse modified^) two + 5 NONE + EOF + perl read-porcelain.pl previous actual && + test_cmp expect actual + ' +done + +test_done -- cgit v1.2.1