From 5af297185ee189b3d09464badf55f855cf94c493 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 14 Apr 2016 10:18:11 -0700 Subject: fsck_commit_buffer(): do not special case the last validation The pattern taken by all the validations in this function is: if (notice a violation exists) { err = report(... VIOLATION_KIND ...); if (err) return err; } where report() returns zero if specified kind of violation is set to be ignored, and otherwise shows an error message and returns non-zero. The last validation in the function immediately before the function returns 0 to declare "all good" can cheat and directly return the return value from report(), and the current code does so, i.e. if (notice a violation exists) return report(... VIOLATION_KIND ...); return 0; But that is a selfish code that declares it is the ultimate and final form of the function, never to be enhanced later. To allow and invite future enhancements, make the last test follow the same pattern. Signed-off-by: Junio C Hamano --- fsck.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fsck.c b/fsck.c index ca4c68537..21dfa5f98 100644 --- a/fsck.c +++ b/fsck.c @@ -666,9 +666,11 @@ static int fsck_commit_buffer(struct commit *commit, const char *buffer, err = fsck_ident(&buffer, &commit->object, options); if (err) return err; - if (!commit->tree) - return report(options, &commit->object, FSCK_MSG_BAD_TREE, "could not load commit's tree %s", sha1_to_hex(tree_sha1)); - + if (!commit->tree) { + err = report(options, &commit->object, FSCK_MSG_BAD_TREE, "could not load commit's tree %s", sha1_to_hex(tree_sha1)); + if (err) + return err; + } return 0; } -- cgit v1.2.1 From 6d2d780f6359df424a625a51f09da80ab6dc1ef8 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 14 Apr 2016 10:58:22 -0700 Subject: fsck: detect and warn a commit with embedded NUL Even though a Git commit object is designed to be capable of storing any binary data as its payload, in practice people use it to describe the changes in textual form, and tools like "git log" are designed to treat the payload as text. Detect and warn when we see any commit object with a NUL byte in it. Note that a NUL byte in the header part is already detected as a grave error. This change is purely about the message part. Signed-off-by: Junio C Hamano --- fsck.c | 8 ++++++++ t/t1450-fsck.sh | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/fsck.c b/fsck.c index 21dfa5f98..3366b3fb6 100644 --- a/fsck.c +++ b/fsck.c @@ -59,6 +59,7 @@ FUNC(HAS_DOTGIT, WARN) \ FUNC(NULL_SHA1, WARN) \ FUNC(ZERO_PADDED_FILEMODE, WARN) \ + FUNC(NUL_IN_COMMIT, WARN) \ /* infos (reported as warnings, but ignored by default) */ \ FUNC(BAD_TAG_NAME, INFO) \ FUNC(MISSING_TAGGER_ENTRY, INFO) @@ -610,6 +611,7 @@ static int fsck_commit_buffer(struct commit *commit, const char *buffer, struct commit_graft *graft; unsigned parent_count, parent_line_count = 0, author_count; int err; + const char *buffer_begin = buffer; if (verify_headers(buffer, size, &commit->object, options)) return -1; @@ -671,6 +673,12 @@ static int fsck_commit_buffer(struct commit *commit, const char *buffer, if (err) return err; } + if (memchr(buffer_begin, '\0', size)) { + err = report(options, &commit->object, FSCK_MSG_NUL_IN_COMMIT, + "NUL byte in the commit object body"); + if (err) + return err; + } return 0; } diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index e66b7cb69..7ee8ea004 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -427,6 +427,24 @@ test_expect_success 'fsck allows .Ňit' ' ) ' +test_expect_success 'NUL in commit' ' + rm -fr nul-in-commit && + git init nul-in-commit && + ( + cd nul-in-commit && + git commit --allow-empty -m "initial commitQNUL after message" && + git cat-file commit HEAD >original && + q_to_nul munged && + git hash-object -w -t commit --stdin name && + git branch bad $(cat name) && + + test_must_fail git -c fsck.nulInCommit=error fsck 2>warn.1 && + grep nulInCommit warn.1 && + git fsck 2>warn.2 && + grep nulInCommit warn.2 + ) +' + # create a static test repo which is broken by omitting # one particular object ($1, which is looked up via rev-parse # in the new repository). -- cgit v1.2.1