From: Jonathan Nieder <jrnieder@gmail.com>
To: "Shawn O. Pearce" <spearce@spearce.org>
Cc: Mark Lodato <lodatom@gmail.com>,
git@vger.kernel.org, Nicolas Pitre <nico@fluxnic.net>
Subject: [PATCH 2/2] fast-import: validate entire ident string
Date: Sat, 24 Apr 2010 16:10:42 -0500 [thread overview]
Message-ID: <20100424211042.GC24948@progeny.tock> (raw)
In-Reply-To: <20100424203827.GA24948@progeny.tock>
The author, committer, and tagger name and email should not include
any embedded <, >, or newline characters. The format of the
identification string is
('author'|'committer'|'tagger') sp name sp < email > sp date
If an object has no name attached, then git expects to find two spaces
in a row.
Helped-by: Mark Lodato <lodatom@gmail.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
For malformed input, the parser in pretty.c and ‘git commit --amend’
tend to end up with different ideas of who the author is. A lot of
the time, commit --amend gives up with "fatal: invalid commit".
Documentation/git-fast-import.txt | 9 ++--
fast-import.c | 54 ++++++++++++++++----------
t/t9300-fast-import.sh | 75 +++++++++++++++++++++++++++++++++++++
3 files changed, 113 insertions(+), 25 deletions(-)
diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt
index 19082b0..ee725c6 100644
--- a/Documentation/git-fast-import.txt
+++ b/Documentation/git-fast-import.txt
@@ -337,8 +337,8 @@ change to the project.
....
'commit' SP <ref> LF
mark?
- ('author' (SP <name>)? SP LT <email> GT SP <when> LF)?
- 'committer' (SP <name>)? SP LT <email> GT SP <when> LF
+ ('author' SP <name>? SP LT <email> GT SP <when> LF)?
+ 'committer' SP <name>? SP LT <email> GT SP <when> LF
data
('from' SP <committish> LF)?
('merge' SP <committish> LF)?
@@ -393,8 +393,9 @@ Here `<name>` is the person's display name (for example
(``cm@example.com''). `LT` and `GT` are the literal less-than (\x3c)
and greater-than (\x3e) symbols. These are required to delimit
the email address from the other fields in the line. Note that
-`<name>` is free-form and may contain any sequence of bytes, except
-`LT` and `LF`. It is typically UTF-8 encoded.
+`<name>` and `<email>` are free-form and may contain any sequence
+of bytes that are not `LT`, `GT`, or `LF`. Both are typically UTF-8
+encoded.
The time of the change is specified by `<when>` using the date format
that was selected by the \--date-format=<fmt> command line option.
diff --git a/fast-import.c b/fast-import.c
index 1701cf1..d919168 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -19,8 +19,8 @@ Format of STDIN stream:
new_commit ::= 'commit' sp ref_str lf
mark?
- ('author' (sp name)? sp '<' email '>' sp when lf)?
- 'committer' (sp name)? sp '<' email '>' sp when lf
+ ('author' sp name? sp '<' email '>' sp when lf)?
+ 'committer' sp name? sp '<' email '>' sp when lf
commit_msg
('from' sp committish lf)?
('merge' sp committish lf)*
@@ -47,7 +47,7 @@ Format of STDIN stream:
new_tag ::= 'tag' sp tag_str lf
'from' sp committish lf
- ('tagger' (sp name)? sp '<' email '>' sp when lf)?
+ ('tagger' sp name? sp '<' email '>' sp when lf)?
tag_msg;
tag_msg ::= data;
@@ -123,9 +123,8 @@ Format of STDIN stream:
sha1exp ::= # Any valid GIT SHA1 expression;
hexsha1 ::= # SHA1 in hexadecimal format;
- # note: name and email are UTF8 strings, however name must not
- # contain '<' or lf and email must not contain any of the
- # following: '<', '>', lf.
+ # note: name and email are UTF8 strings, however name and email
+ # must not contain any of the following: '<', '>', lf.
#
name ::= # valid GIT author/committer name;
email ::= # valid GIT author/committer email;
@@ -1929,34 +1928,47 @@ static int validate_raw_date(const char *src, char *result, int maxlen)
return 0;
}
-static char *parse_ident(const char *buf)
+static size_t parse_name_and_email(const char *src, char **result, size_t extra)
{
- const char *gt;
+ const char *lt, *gt;
size_t name_len;
- char *ident;
- gt = strrchr(buf, '>');
- if (!gt)
- die("Missing > in ident string: %s", buf);
+ lt = src + strcspn(src, "<>\n");
+ if (lt == src || lt[-1] != ' ' || *lt != '<')
+ die("Invalid name in ident string: %s", src);
+ gt = lt + 1 + strcspn(lt + 1, "<>\n");
+ if (*gt != '>')
+ die("Invalid email in ident string: %s", src);
gt++;
if (*gt != ' ')
- die("Missing space after > in ident string: %s", buf);
+ die("Missing space after > in ident string: %s", src);
gt++;
- name_len = gt - buf;
- ident = xmalloc(name_len + 24);
- strncpy(ident, buf, name_len);
+ name_len = gt - src;
+ *result = xmalloc(name_len + extra);
+ memcpy(*result, src, name_len);
+ return name_len;
+}
+
+static char *parse_ident(const char *buf)
+{
+ const char *date;
+ size_t name_len;
+ char *ident;
+
+ name_len = parse_name_and_email(buf, &ident, 24);
+ date = buf + name_len;
switch (whenspec) {
case WHENSPEC_RAW:
- if (validate_raw_date(gt, ident + name_len, 24) < 0)
- die("Invalid raw date \"%s\" in ident: %s", gt, buf);
+ if (validate_raw_date(date, ident + name_len, 24) < 0)
+ die("Invalid raw date \"%s\" in ident: %s", date, buf);
break;
case WHENSPEC_RFC2822:
- if (parse_date(gt, ident + name_len, 24) < 0)
- die("Invalid rfc2822 date \"%s\" in ident: %s", gt, buf);
+ if (parse_date(date, ident + name_len, 24) < 0)
+ die("Invalid rfc2822 date \"%s\" in ident: %s", date, buf);
break;
case WHENSPEC_NOW:
- if (strcmp("now", gt))
+ if (strcmp("now", date))
die("Date in ident must be 'now': %s", buf);
datestamp(ident + name_len, 24);
break;
diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh
index ed653a7..a7e379f 100755
--- a/t/t9300-fast-import.sh
+++ b/t/t9300-fast-import.sh
@@ -348,6 +348,81 @@ test_expect_success \
cat >input <<INPUT_END
commit refs/heads/branch
+author <$GIT_AUTHOR_EMAIL> Sat, 24 Apr 2010 14:49:52 -0500
+committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> Tue Feb 6 12:35:01 2007 -0500
+data <<COMMIT
+Nameless author, first attempt
+COMMIT
+
+from refs/heads/branch^0
+
+INPUT_END
+test_expect_success 'E: require space after author name' '
+ test_must_fail git fast-import --date-format=rfc2822 <input
+'
+
+cat >input <<INPUT_END
+commit refs/heads/branch
+author <$GIT_AUTHOR_EMAIL> Sat, 24 Apr 2010 14:49:52 -0500
+committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> Tue Feb 6 12:35:01 2007 -0500
+data <<COMMIT
+Nameless author
+COMMIT
+
+from refs/heads/branch^0
+
+INPUT_END
+test_expect_success 'E: do not require author name, though' '
+ git fast-import --date-format=rfc2822 <input
+'
+
+cat >input <<INPUT_END
+commit refs/heads/branch
+author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> Sat, 24 Apr 2010 14:49:52 -0500
+committer C O >Mitter <$GIT_COMMITTER_EMAIL> Tue Feb 6 12:35:01 2007 -0500
+data <<COMMIT
+Odd committer
+COMMIT
+
+from refs/heads/branch^0
+
+INPUT_END
+test_expect_success 'E: unparsable committer' '
+ test_must_fail git fast-import --date-format=rfc2822 <input
+'
+
+cat >input <<INPUT_END
+commit refs/heads/branch
+author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> Sat, 24 Apr 2010 15:05:27 -0500
+committer $GIT_COMMITTER_NAME <aggh@<example.com> Tue Feb 6 12:35:01 2007 -0500
+data <<COMMIT
+Odd email
+COMMIT
+
+from refs/heads/branch^0
+
+INPUT_END
+test_expect_success 'E: unparsable email' '
+ test_must_fail git fast-import --date-format=rfc2822 <input
+'
+
+cat >input <<INPUT_END
+commit refs/heads/branch
+author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> Sat, 24 Apr 2010 15:05:27 -0500
+committer $GIT_COMMITTER_NAME <äggh!some!other!machine!example> Tue Feb 6 12:35:01 2007 -0500
+data <<COMMIT
+Bang path
+COMMIT
+
+from refs/heads/branch^0
+
+INPUT_END
+test_expect_success 'E: okay email' '
+ git fast-import --date-format=rfc2822 <input
+'
+
+cat >input <<INPUT_END
+commit refs/heads/branch
author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> 1170783301 - 0500
committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> 117078330 -0500
data <<COMMIT
--
1.7.1.rc1
next prev parent reply other threads:[~2010-04-24 21:10 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-04-24 0:45 [PATCH] fast-import docs: LT is valid in email, GT is not Mark Lodato
2010-04-24 16:06 ` [PATCH] fsck: check ident lines in commit objects Jonathan Nieder
2010-04-24 16:59 ` Jonathan Nieder
2010-04-24 19:04 ` Shawn O. Pearce
2010-04-24 20:38 ` [PATCH 0/2] fast-import: tighten up parsing ident line Jonathan Nieder
2010-04-24 20:50 ` [PATCH 1/2] fast-import: be strict about formatting of raw dates Jonathan Nieder
2010-04-24 21:10 ` Jonathan Nieder [this message]
2010-04-26 16:02 ` [PATCH 2/2] fast-import: validate entire ident string Shawn O. Pearce
2010-04-26 16:24 ` Jonathan Nieder
2010-04-26 16:30 ` Jonathan Nieder
2010-05-04 17:11 ` Junio C Hamano
2010-04-24 16:12 ` [PATCH] fast-import docs: LT is valid in email, GT is not Jonathan Nieder
2010-04-24 16:59 ` Mark Lodato
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100424211042.GC24948@progeny.tock \
--to=jrnieder@gmail.com \
--cc=git@vger.kernel.org \
--cc=lodatom@gmail.com \
--cc=nico@fluxnic.net \
--cc=spearce@spearce.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).