All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jonathan Nieder <jrnieder@gmail.com>
To: "Shawn O. Pearce" <spearce@spearce.org>
Cc: Mark Lodato <lodatom@gmail.com>,
	git@vger.kernel.org, Nicolas Pitre <nico@fluxnic.net>
Subject: [PATCH 2/2] fast-import: validate entire ident string
Date: Sat, 24 Apr 2010 16:10:42 -0500	[thread overview]
Message-ID: <20100424211042.GC24948@progeny.tock> (raw)
In-Reply-To: <20100424203827.GA24948@progeny.tock>

The author, committer, and tagger name and email should not include
any embedded <, >, or newline characters.  The format of the
identification string is

  ('author'|'committer'|'tagger') sp name sp < email > sp date

If an object has no name attached, then git expects to find two spaces
in a row.

Helped-by: Mark Lodato <lodatom@gmail.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
For malformed input, the parser in pretty.c and ‘git commit --amend’
tend to end up with different ideas of who the author is.  A lot of
the time, commit --amend gives up with "fatal: invalid commit".

 Documentation/git-fast-import.txt |    9 ++--
 fast-import.c                     |   54 ++++++++++++++++----------
 t/t9300-fast-import.sh            |   75 +++++++++++++++++++++++++++++++++++++
 3 files changed, 113 insertions(+), 25 deletions(-)

diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt
index 19082b0..ee725c6 100644
--- a/Documentation/git-fast-import.txt
+++ b/Documentation/git-fast-import.txt
@@ -337,8 +337,8 @@ change to the project.
 ....
 	'commit' SP <ref> LF
 	mark?
-	('author' (SP <name>)? SP LT <email> GT SP <when> LF)?
-	'committer' (SP <name>)? SP LT <email> GT SP <when> LF
+	('author' SP <name>? SP LT <email> GT SP <when> LF)?
+	'committer' SP <name>? SP LT <email> GT SP <when> LF
 	data
 	('from' SP <committish> LF)?
 	('merge' SP <committish> LF)?
@@ -393,8 +393,9 @@ Here `<name>` is the person's display name (for example
 (``cm@example.com'').  `LT` and `GT` are the literal less-than (\x3c)
 and greater-than (\x3e) symbols.  These are required to delimit
 the email address from the other fields in the line.  Note that
-`<name>` is free-form and may contain any sequence of bytes, except
-`LT` and `LF`.  It is typically UTF-8 encoded.
+`<name>` and `<email>` are free-form and may contain any sequence
+of bytes that are not `LT`, `GT`, or `LF`.  Both are typically UTF-8
+encoded.
 
 The time of the change is specified by `<when>` using the date format
 that was selected by the \--date-format=<fmt> command line option.
diff --git a/fast-import.c b/fast-import.c
index 1701cf1..d919168 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -19,8 +19,8 @@ Format of STDIN stream:
 
   new_commit ::= 'commit' sp ref_str lf
     mark?
-    ('author' (sp name)? sp '<' email '>' sp when lf)?
-    'committer' (sp name)? sp '<' email '>' sp when lf
+    ('author' sp name? sp '<' email '>' sp when lf)?
+    'committer' sp name? sp '<' email '>' sp when lf
     commit_msg
     ('from' sp committish lf)?
     ('merge' sp committish lf)*
@@ -47,7 +47,7 @@ Format of STDIN stream:
 
   new_tag ::= 'tag' sp tag_str lf
     'from' sp committish lf
-    ('tagger' (sp name)? sp '<' email '>' sp when lf)?
+    ('tagger' sp name? sp '<' email '>' sp when lf)?
     tag_msg;
   tag_msg ::= data;
 
@@ -123,9 +123,8 @@ Format of STDIN stream:
   sha1exp ::= # Any valid GIT SHA1 expression;
   hexsha1 ::= # SHA1 in hexadecimal format;
 
-     # note: name and email are UTF8 strings, however name must not
-     # contain '<' or lf and email must not contain any of the
-     # following: '<', '>', lf.
+     # note: name and email are UTF8 strings, however name and email
+     # must not contain any of the following: '<', '>', lf.
      #
   name  ::= # valid GIT author/committer name;
   email ::= # valid GIT author/committer email;
@@ -1929,34 +1928,47 @@ static int validate_raw_date(const char *src, char *result, int maxlen)
 	return 0;
 }
 
-static char *parse_ident(const char *buf)
+static size_t parse_name_and_email(const char *src, char **result, size_t extra)
 {
-	const char *gt;
+	const char *lt, *gt;
 	size_t name_len;
-	char *ident;
 
-	gt = strrchr(buf, '>');
-	if (!gt)
-		die("Missing > in ident string: %s", buf);
+	lt = src + strcspn(src, "<>\n");
+	if (lt == src || lt[-1] != ' ' || *lt != '<')
+		die("Invalid name in ident string: %s", src);
+	gt = lt + 1 + strcspn(lt + 1, "<>\n");
+	if (*gt != '>')
+		die("Invalid email in ident string: %s", src);
 	gt++;
 	if (*gt != ' ')
-		die("Missing space after > in ident string: %s", buf);
+		die("Missing space after > in ident string: %s", src);
 	gt++;
-	name_len = gt - buf;
-	ident = xmalloc(name_len + 24);
-	strncpy(ident, buf, name_len);
+	name_len = gt - src;
+	*result = xmalloc(name_len + extra);
+	memcpy(*result, src, name_len);
+	return name_len;
+}
+
+static char *parse_ident(const char *buf)
+{
+	const char *date;
+	size_t name_len;
+	char *ident;
+
+	name_len = parse_name_and_email(buf, &ident, 24);
+	date = buf + name_len;
 
 	switch (whenspec) {
 	case WHENSPEC_RAW:
-		if (validate_raw_date(gt, ident + name_len, 24) < 0)
-			die("Invalid raw date \"%s\" in ident: %s", gt, buf);
+		if (validate_raw_date(date, ident + name_len, 24) < 0)
+			die("Invalid raw date \"%s\" in ident: %s", date, buf);
 		break;
 	case WHENSPEC_RFC2822:
-		if (parse_date(gt, ident + name_len, 24) < 0)
-			die("Invalid rfc2822 date \"%s\" in ident: %s", gt, buf);
+		if (parse_date(date, ident + name_len, 24) < 0)
+			die("Invalid rfc2822 date \"%s\" in ident: %s", date, buf);
 		break;
 	case WHENSPEC_NOW:
-		if (strcmp("now", gt))
+		if (strcmp("now", date))
 			die("Date in ident must be 'now': %s", buf);
 		datestamp(ident + name_len, 24);
 		break;
diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh
index ed653a7..a7e379f 100755
--- a/t/t9300-fast-import.sh
+++ b/t/t9300-fast-import.sh
@@ -348,6 +348,81 @@ test_expect_success \
 
 cat >input <<INPUT_END
 commit refs/heads/branch
+author <$GIT_AUTHOR_EMAIL> Sat, 24 Apr 2010 14:49:52 -0500
+committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> Tue Feb 6 12:35:01 2007 -0500
+data <<COMMIT
+Nameless author, first attempt
+COMMIT
+
+from refs/heads/branch^0
+
+INPUT_END
+test_expect_success 'E: require space after author name' '
+    test_must_fail git fast-import --date-format=rfc2822 <input
+'
+
+cat >input <<INPUT_END
+commit refs/heads/branch
+author  <$GIT_AUTHOR_EMAIL> Sat, 24 Apr 2010 14:49:52 -0500
+committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> Tue Feb 6 12:35:01 2007 -0500
+data <<COMMIT
+Nameless author
+COMMIT
+
+from refs/heads/branch^0
+
+INPUT_END
+test_expect_success 'E: do not require author name, though' '
+    git fast-import --date-format=rfc2822 <input
+'
+
+cat >input <<INPUT_END
+commit refs/heads/branch
+author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> Sat, 24 Apr 2010 14:49:52 -0500
+committer C O >Mitter <$GIT_COMMITTER_EMAIL> Tue Feb 6 12:35:01 2007 -0500
+data <<COMMIT
+Odd committer
+COMMIT
+
+from refs/heads/branch^0
+
+INPUT_END
+test_expect_success 'E: unparsable committer' '
+    test_must_fail git fast-import --date-format=rfc2822 <input
+'
+
+cat >input <<INPUT_END
+commit refs/heads/branch
+author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> Sat, 24 Apr 2010 15:05:27 -0500
+committer $GIT_COMMITTER_NAME <aggh@<example.com> Tue Feb 6 12:35:01 2007 -0500
+data <<COMMIT
+Odd email
+COMMIT
+
+from refs/heads/branch^0
+
+INPUT_END
+test_expect_success 'E: unparsable email' '
+    test_must_fail git fast-import --date-format=rfc2822 <input
+'
+
+cat >input <<INPUT_END
+commit refs/heads/branch
+author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> Sat, 24 Apr 2010 15:05:27 -0500
+committer $GIT_COMMITTER_NAME <äggh!some!other!machine!example> Tue Feb 6 12:35:01 2007 -0500
+data <<COMMIT
+Bang path
+COMMIT
+
+from refs/heads/branch^0
+
+INPUT_END
+test_expect_success 'E: okay email' '
+    git fast-import --date-format=rfc2822 <input
+'
+
+cat >input <<INPUT_END
+commit refs/heads/branch
 author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> 1170783301 -  0500
 committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> 117078330 -0500
 data <<COMMIT
-- 
1.7.1.rc1

  parent reply	other threads:[~2010-04-24 21:10 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-24  0:45 [PATCH] fast-import docs: LT is valid in email, GT is not Mark Lodato
2010-04-24 16:06 ` [PATCH] fsck: check ident lines in commit objects Jonathan Nieder
2010-04-24 16:59   ` Jonathan Nieder
2010-04-24 19:04   ` Shawn O. Pearce
2010-04-24 20:38     ` [PATCH 0/2] fast-import: tighten up parsing ident line Jonathan Nieder
2010-04-24 20:50       ` [PATCH 1/2] fast-import: be strict about formatting of raw dates Jonathan Nieder
2010-04-24 21:10       ` Jonathan Nieder [this message]
2010-04-26 16:02         ` [PATCH 2/2] fast-import: validate entire ident string Shawn O. Pearce
2010-04-26 16:24           ` Jonathan Nieder
2010-04-26 16:30             ` Jonathan Nieder
2010-05-04 17:11           ` Junio C Hamano
2010-04-24 16:12 ` [PATCH] fast-import docs: LT is valid in email, GT is not Jonathan Nieder
2010-04-24 16:59   ` Mark Lodato

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100424211042.GC24948@progeny.tock \
    --to=jrnieder@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=lodatom@gmail.com \
    --cc=nico@fluxnic.net \
    --cc=spearce@spearce.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.