From daae19224a05be9efb9a39c2a2c1c9a60fe906f1 Mon Sep 17 00:00:00 2001
From: Jonathan Nieder <jrnieder@gmail.com>
Date: Sat, 24 Apr 2010 11:06:08 -0500
Subject: fsck: check ident lines in commit objects

Check that email addresses do not contain <, >, or newline so they can
be quickly scanned without trouble.  The copy() function in ident.c
already ensures that ordinary git commands will not write email
addresses without this property.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 fsck.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'fsck.c')

diff --git a/fsck.c b/fsck.c
index 89278c145..ae9ae1abe 100644
--- a/fsck.c
+++ b/fsck.c
@@ -222,12 +222,47 @@ static int fsck_tree(struct tree *item, int strict, fsck_error error_func)
 	return retval;
 }
 
+static int fsck_ident(char **ident, struct object *obj, fsck_error error_func)
+{
+	if (**ident == '<' || **ident == '\n')
+		return error_func(obj, FSCK_ERROR, "invalid author/committer line - missing space before email");
+	*ident += strcspn(*ident, "<\n");
+	if ((*ident)[-1] != ' ')
+		return error_func(obj, FSCK_ERROR, "invalid author/committer line - missing space before email");
+	if (**ident != '<')
+		return error_func(obj, FSCK_ERROR, "invalid author/committer line - missing email");
+	(*ident)++;
+	*ident += strcspn(*ident, "<>\n");
+	if (**ident != '>')
+		return error_func(obj, FSCK_ERROR, "invalid author/committer line - bad email");
+	(*ident)++;
+	if (**ident != ' ')
+		return error_func(obj, FSCK_ERROR, "invalid author/committer line - missing space before date");
+	(*ident)++;
+	if (**ident == '0' && (*ident)[1] != ' ')
+		return error_func(obj, FSCK_ERROR, "invalid author/committer line - zero-padded date");
+	*ident += strspn(*ident, "0123456789");
+	if (**ident != ' ')
+		return error_func(obj, FSCK_ERROR, "invalid author/committer line - bad date");
+	(*ident)++;
+	if ((**ident != '+' && **ident != '-') ||
+	    !isdigit((*ident)[1]) ||
+	    !isdigit((*ident)[2]) ||
+	    !isdigit((*ident)[3]) ||
+	    !isdigit((*ident)[4]) ||
+	    ((*ident)[5] != '\n'))
+		return error_func(obj, FSCK_ERROR, "invalid author/committer line - bad time zone");
+	(*ident) += 6;
+	return 0;
+}
+
 static int fsck_commit(struct commit *commit, fsck_error error_func)
 {
 	char *buffer = commit->buffer;
 	unsigned char tree_sha1[20], sha1[20];
 	struct commit_graft *graft;
 	int parents = 0;
+	int err;
 
 	if (commit->date == ULONG_MAX)
 		return error_func(&commit->object, FSCK_ERROR, "invalid author/committer line");
@@ -266,6 +301,18 @@ static int fsck_commit(struct commit *commit, fsck_error error_func)
 	}
 	if (memcmp(buffer, "author ", 7))
 		return error_func(&commit->object, FSCK_ERROR, "invalid format - expected 'author' line");
+	buffer += 7;
+	err = fsck_ident(&buffer, &commit->object, error_func);
+	if (err)
+		return err;
+	if (memcmp(buffer, "committer ", strlen("committer ")))
+		return error_func(&commit->object, FSCK_ERROR, "invalid format - expected 'committer' line");
+	buffer += strlen("committer ");
+	err = fsck_ident(&buffer, &commit->object, error_func);
+	if (err)
+		return err;
+	if (*buffer != '\n')
+		return error_func(&commit->object, FSCK_ERROR, "invalid format - expected blank line");
 	if (!commit->tree)
 		return error_func(&commit->object, FSCK_ERROR, "could not load commit's tree %s", sha1_to_hex(tree_sha1));
 
-- 
cgit v1.2.1


From 0adc6a3d49a46436780b2dd636918c9840d82236 Mon Sep 17 00:00:00 2001
From: Jonathan Nieder <jrnieder@gmail.com>
Date: Wed, 26 May 2010 16:50:34 -0500
Subject: fsck: fix bogus commit header check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

daae1922 (fsck: check ident lines in commit objects, 2010-04-24)
taught fsck to expect commit objects to have the form

  tree <object name>
  <parents>
  author <valid ident string>
  committer <valid ident string>

  log message

The check is overly strict: for example, it errors out with the
message “expected blank line” for perfectly valid commits with an
"encoding ISO-8859-1" line.

Later it might make sense to teach fsck about the rest of the header
and warn about unrecognized header lines, but for simplicity, let’s
accept arbitrary trailing lines for now.

Reported-by: Tuncer Ayaz <tuncer.ayaz@gmail.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 fsck.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fsck.c')

diff --git a/fsck.c b/fsck.c
index ae9ae1abe..3d05d4a79 100644
--- a/fsck.c
+++ b/fsck.c
@@ -311,8 +311,6 @@ static int fsck_commit(struct commit *commit, fsck_error error_func)
 	err = fsck_ident(&buffer, &commit->object, error_func);
 	if (err)
 		return err;
-	if (*buffer != '\n')
-		return error_func(&commit->object, FSCK_ERROR, "invalid format - expected blank line");
 	if (!commit->tree)
 		return error_func(&commit->object, FSCK_ERROR, "could not load commit's tree %s", sha1_to_hex(tree_sha1));
 
-- 
cgit v1.2.1