git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jonathan Nieder <jrnieder@gmail.com>
To: Mark Lodato <lodatom@gmail.com>
Cc: git@vger.kernel.org, "Shawn O. Pearce" <spearce@spearce.org>,
	Nicolas Pitre <nico@fluxnic.net>
Subject: [PATCH] fsck: check ident lines in commit objects
Date: Sat, 24 Apr 2010 11:06:08 -0500	[thread overview]
Message-ID: <20100424160608.GA14690@progeny.tock> (raw)
In-Reply-To: <1272069944-20626-1-git-send-email-lodatom@gmail.com>

Check that email addresses do not contain <, >, or newline so they can
be quickly scanned without trouble.  The copy() function in ident.c
already ensures that ordinary git commands will not write email
addresses without this property.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
Thoughts?  Should some of these errors be warnings?

git fast-import is capable of producing commits with some of these
problems: for example, it is fine with

	committer C O Mitter <foo@b>ar.net> 005 -    +5

 fsck.c          |   47 +++++++++++++++++++++++++++++++++++++++++++++++
 t/t1450-fsck.sh |   25 +++++++++++++++++++++++++
 2 files changed, 72 insertions(+), 0 deletions(-)

diff --git a/fsck.c b/fsck.c
index 89278c1..ae9ae1a 100644
--- a/fsck.c
+++ b/fsck.c
@@ -222,12 +222,47 @@ static int fsck_tree(struct tree *item, int strict, fsck_error error_func)
 	return retval;
 }
 
+static int fsck_ident(char **ident, struct object *obj, fsck_error error_func)
+{
+	if (**ident == '<' || **ident == '\n')
+		return error_func(obj, FSCK_ERROR, "invalid author/committer line - missing space before email");
+	*ident += strcspn(*ident, "<\n");
+	if ((*ident)[-1] != ' ')
+		return error_func(obj, FSCK_ERROR, "invalid author/committer line - missing space before email");
+	if (**ident != '<')
+		return error_func(obj, FSCK_ERROR, "invalid author/committer line - missing email");
+	(*ident)++;
+	*ident += strcspn(*ident, "<>\n");
+	if (**ident != '>')
+		return error_func(obj, FSCK_ERROR, "invalid author/committer line - bad email");
+	(*ident)++;
+	if (**ident != ' ')
+		return error_func(obj, FSCK_ERROR, "invalid author/committer line - missing space before date");
+	(*ident)++;
+	if (**ident == '0' && (*ident)[1] != ' ')
+		return error_func(obj, FSCK_ERROR, "invalid author/committer line - zero-padded date");
+	*ident += strspn(*ident, "0123456789");
+	if (**ident != ' ')
+		return error_func(obj, FSCK_ERROR, "invalid author/committer line - bad date");
+	(*ident)++;
+	if ((**ident != '+' && **ident != '-') ||
+	    !isdigit((*ident)[1]) ||
+	    !isdigit((*ident)[2]) ||
+	    !isdigit((*ident)[3]) ||
+	    !isdigit((*ident)[4]) ||
+	    ((*ident)[5] != '\n'))
+		return error_func(obj, FSCK_ERROR, "invalid author/committer line - bad time zone");
+	(*ident) += 6;
+	return 0;
+}
+
 static int fsck_commit(struct commit *commit, fsck_error error_func)
 {
 	char *buffer = commit->buffer;
 	unsigned char tree_sha1[20], sha1[20];
 	struct commit_graft *graft;
 	int parents = 0;
+	int err;
 
 	if (commit->date == ULONG_MAX)
 		return error_func(&commit->object, FSCK_ERROR, "invalid author/committer line");
@@ -266,6 +301,18 @@ static int fsck_commit(struct commit *commit, fsck_error error_func)
 	}
 	if (memcmp(buffer, "author ", 7))
 		return error_func(&commit->object, FSCK_ERROR, "invalid format - expected 'author' line");
+	buffer += 7;
+	err = fsck_ident(&buffer, &commit->object, error_func);
+	if (err)
+		return err;
+	if (memcmp(buffer, "committer ", strlen("committer ")))
+		return error_func(&commit->object, FSCK_ERROR, "invalid format - expected 'committer' line");
+	buffer += strlen("committer ");
+	err = fsck_ident(&buffer, &commit->object, error_func);
+	if (err)
+		return err;
+	if (*buffer != '\n')
+		return error_func(&commit->object, FSCK_ERROR, "invalid format - expected blank line");
 	if (!commit->tree)
 		return error_func(&commit->object, FSCK_ERROR, "could not load commit's tree %s", sha1_to_hex(tree_sha1));
 
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index 49cae3e..d8eed9b 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -57,6 +57,31 @@ test_expect_success 'branch pointing to non-commit' '
 	git update-ref -d refs/heads/invalid
 '
 
+new=nothing
+test_expect_success 'email without @ is okay' '
+	git cat-file commit HEAD >basis &&
+	sed "s/@/AT/" basis >okay &&
+	new=$(git hash-object -t commit -w --stdin <okay) &&
+	echo "$new" &&
+	git update-ref refs/heads/bogus "$new" &&
+	git fsck
+'
+git update-ref -d refs/heads/bogus
+rm -f ".git/objects/$new"
+
+new=nothing
+test_expect_success 'email with embedded > is not okay' '
+	git cat-file commit HEAD >basis &&
+	sed "s/@[a-z]/&>/" basis >bad-email &&
+	new=$(git hash-object -t commit -w --stdin <bad-email) &&
+	echo "$new" &&
+	git update-ref refs/heads/bogus "$new" &&
+	git fsck 2>out &&
+	grep "error in commit $new" out
+'
+git update-ref -d refs/heads/bogus
+rm -f ".git/objects/$new"
+
 cat > invalid-tag <<EOF
 object ffffffffffffffffffffffffffffffffffffffff
 type commit
-- 
1.7.0.6.2.g02f3f0.dirty

  reply	other threads:[~2010-04-24 16:05 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-24  0:45 [PATCH] fast-import docs: LT is valid in email, GT is not Mark Lodato
2010-04-24 16:06 ` Jonathan Nieder [this message]
2010-04-24 16:59   ` [PATCH] fsck: check ident lines in commit objects Jonathan Nieder
2010-04-24 19:04   ` Shawn O. Pearce
2010-04-24 20:38     ` [PATCH 0/2] fast-import: tighten up parsing ident line Jonathan Nieder
2010-04-24 20:50       ` [PATCH 1/2] fast-import: be strict about formatting of raw dates Jonathan Nieder
2010-04-24 21:10       ` [PATCH 2/2] fast-import: validate entire ident string Jonathan Nieder
2010-04-26 16:02         ` Shawn O. Pearce
2010-04-26 16:24           ` Jonathan Nieder
2010-04-26 16:30             ` Jonathan Nieder
2010-05-04 17:11           ` Junio C Hamano
2010-04-24 16:12 ` [PATCH] fast-import docs: LT is valid in email, GT is not Jonathan Nieder
2010-04-24 16:59   ` Mark Lodato

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100424160608.GA14690@progeny.tock \
    --to=jrnieder@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=lodatom@gmail.com \
    --cc=nico@fluxnic.net \
    --cc=spearce@spearce.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).