All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jonathan Nieder <jrnieder@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
	"Guido Günther" <agx@sigxcpu.org>,
	"Giuseppe Iuculano" <iuculano@debian.org>
Subject: [PATCH 3/3] apply: Handle traditional patches with space in filename
Date: Fri, 23 Jul 2010 20:20:58 -0500	[thread overview]
Message-ID: <20100724012058.GD13670@burratino> (raw)
In-Reply-To: <20100724010618.GA13670@burratino>

To discover filenames from the --- and +++ lines in a traditional
unified diff, currently ‘git apply’ scans forward for a whitespace
character on each line and stops there.  It can’t use the whole line
because ‘diff -u’ likes to include timestamps, like so:

 --- foo	2000-07-12 16:56:50.020000414 -0500
 +++ bar	2010-07-12 16:56:50.020000414 -0500

The whitespace-seeking heuristic works great, even when the tab
has been converted to spaces by some email + copy-and-paste
related corruption.

Except for one problem: if the filename itself contains whitespace,
the inferred filename will be too short.

When Giuseppe ran into this problem, it was for a file creation
patch (filename ‘debian/licenses/LICENSE.global BSD-style Chromium’).
So one can’t use the list of files present in the index to deduce an
appropriate filename (not to mention that way lies madness; see
v0.99~402, 2005-05-31).

Instead, look for a timestamp and use that if present to mark the end
of the filename.  If no timestamp is present, the old heuristic is
used, with one exception: the space character \040 is not considered
terminating whitespace any more unless it is followed by a timestamp.

Reported-by: Giuseppe Iuculano <iuculano@debian.org>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Acked-by: Guido Günther <agx@sigxcpu.org>
---
Guido, I have carried over your ack from <http://bugs.debian.org/578752>.
I hope that is okay.  And to both Guido and Giuseppe, sorry to have
taken so long on this.

That is the end of the series.  I hope it was not too unpleasant a read.
As always, thoughts, improvements, bugs welcome.

Regards,
Jonathan

 builtin/apply.c                  |  193 +++++++++++++++++++++++++++++++++++---
 t/t4135-apply-weird-filenames.sh |    4 +-
 2 files changed, 181 insertions(+), 16 deletions(-)

diff --git a/builtin/apply.c b/builtin/apply.c
index efc109e..b975c99 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -449,23 +449,157 @@ static char *find_name_gnu(const char *line, char *def, int p_value)
 	return squash_slash(strbuf_detach(&name, NULL));
 }
 
-static char *find_name(const char *line, char *def, int p_value, int terminate)
+static size_t tz_len(const char *line, size_t len)
+{
+	const char *tz, *p;
+
+	if (len < strlen(" +0500") || line[len-strlen(" +0500")] != ' ')
+		return 0;
+	tz = line + len - strlen(" +0500");
+
+	if (tz[1] != '+' && tz[1] != '-')
+		return 0;
+
+	for (p = tz + 2; p != line + len; p++)
+		if (!isdigit(*p))
+			return 0;
+
+	return line + len - tz;
+}
+
+static size_t date_len(const char *line, size_t len)
+{
+	const char *date, *p;
+
+	if (len < strlen("72-02-05") || line[len-strlen("-05")] != '-')
+		return 0;
+	p = date = line + len - strlen("72-02-05");
+
+	if (!isdigit(*p++) || !isdigit(*p++) || *p++ != '-' ||
+	    !isdigit(*p++) || !isdigit(*p++) || *p++ != '-' ||
+	    !isdigit(*p++) || !isdigit(*p++))	/* Not a date. */
+		return 0;
+
+	if (date - line >= strlen("19") &&
+	    isdigit(date[-1]) && isdigit(date[-2]))	/* 4-digit year */
+		date -= strlen("19");
+
+	return line + len - date;
+}
+
+static size_t short_time_len(const char *line, size_t len)
+{
+	const char *time, *p;
+
+	if (len < strlen(" 07:01:32") || line[len-strlen(":32")] != ':')
+		return 0;
+	p = time = line + len - strlen(" 07:01:32");
+
+	/* Permit 1-digit hours? */
+	if (*p++ != ' ' ||
+	    !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
+	    !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
+	    !isdigit(*p++) || !isdigit(*p++))	/* Not a time. */
+		return 0;
+
+	return line + len - time;
+}
+
+static size_t fractional_time_len(const char *line, size_t len)
+{
+	const char *p;
+	size_t n;
+
+	/* Expected format: 19:41:17.620000023 */
+	if (!len || !isdigit(line[len - 1]))
+		return 0;
+	p = line + len - 1;
+
+	/* Fractional seconds. */
+	while (p > line && isdigit(*p))
+		p--;
+	if (*p != '.')
+		return 0;
+
+	/* Hours, minutes, and whole seconds. */
+	n = short_time_len(line, p - line);
+	if (!n)
+		return 0;
+
+	return line + len - p + n;
+}
+
+static size_t trailing_spaces_len(const char *line, size_t len)
+{
+	const char *p;
+
+	/* Expected format: ' ' x (1 or more)  */
+	if (!len || line[len - 1] != ' ')
+		return 0;
+
+	p = line + len;
+	while (p != line) {
+		p--;
+		if (*p != ' ')
+			return line + len - (p + 1);
+	}
+
+	/* All spaces! */
+	return len;
+}
+
+static size_t diff_timestamp_len(const char *line, size_t len)
+{
+	const char *end = line + len;
+	size_t n;
+
+	/*
+	 * Posix: 2010-07-05 19:41:17
+	 * GNU: 2010-07-05 19:41:17.620000023 -0500
+	 */
+
+	if (!isdigit(end[-1]))
+		return 0;
+
+	n = tz_len(line, end - line);
+	end -= n;
+
+	n = short_time_len(line, end - line);
+	if (!n)
+		n = fractional_time_len(line, end - line);
+	end -= n;
+
+	n = date_len(line, end - line);
+	if (!n)	/* No date.  Too bad. */
+		return 0;
+	end -= n;
+
+	if (end == line)	/* No space before date. */
+		return 0;
+	if (end[-1] == '\t') {	/* Success! */
+		end--;
+		return line + len - end;
+	}
+	if (end[-1] != ' ')	/* No space before date. */
+		return 0;
+
+	/* Whitespace damage. */
+	end -= trailing_spaces_len(line, end - line);
+	return line + len - end;
+}
+
+static char *find_name_common(const char *line, char *def, int p_value,
+				const char *end, int terminate)
 {
 	int len;
 	const char *start = NULL;
 
-	if (*line == '"') {
-		char *name = find_name_gnu(line, def, p_value);
-		if (name)
-			return name;
-	}
-
 	if (p_value == 0)
 		start = line;
-	for (;;) {
+	while (line != end) {
 		char c = *line;
 
-		if (isspace(c)) {
+		if (!end && isspace(c)) {
 			if (c == '\n')
 				break;
 			if (name_terminate(start, line-start, c, terminate))
@@ -505,6 +639,37 @@ static char *find_name(const char *line, char *def, int p_value, int terminate)
 	return squash_slash(xmemdupz(start, len));
 }
 
+static char *find_name(const char *line, char *def, int p_value, int terminate)
+{
+	if (*line == '"') {
+		char *name = find_name_gnu(line, def, p_value);
+		if (name)
+			return name;
+	}
+
+	return find_name_common(line, def, p_value, NULL, terminate);
+}
+
+static char *find_name_traditional(const char *line, char *def, int p_value)
+{
+	size_t len = strlen(line);
+	size_t date_len;
+
+	if (*line == '"') {
+		char *name = find_name_gnu(line, def, p_value);
+		if (name)
+			return name;
+	}
+
+	len = strchrnul(line, '\n') - line;
+	date_len = diff_timestamp_len(line, len);
+	if (!date_len)
+		return find_name(line, def, p_value, TERM_TAB);
+	len -= date_len;
+
+	return find_name_common(line, def, p_value, line + len, 0);
+}
+
 static int count_slashes(const char *cp)
 {
 	int cnt = 0;
@@ -527,7 +692,7 @@ static int guess_p_value(const char *nameline)
 
 	if (is_dev_null(nameline))
 		return -1;
-	name = find_name(nameline, NULL, 0, TERM_SPACE | TERM_TAB);
+	name = find_name_traditional(nameline, NULL, 0);
 	if (!name)
 		return -1;
 	cp = strchr(name, '/');
@@ -646,16 +811,16 @@ static void parse_traditional_patch(const char *first, const char *second, struc
 	if (is_dev_null(first)) {
 		patch->is_new = 1;
 		patch->is_delete = 0;
-		name = find_name(second, NULL, p_value, TERM_SPACE | TERM_TAB);
+		name = find_name_traditional(second, NULL, p_value);
 		patch->new_name = name;
 	} else if (is_dev_null(second)) {
 		patch->is_new = 0;
 		patch->is_delete = 1;
-		name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
+		name = find_name_traditional(first, NULL, p_value);
 		patch->old_name = name;
 	} else {
-		name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
-		name = find_name(second, name, p_value, TERM_SPACE | TERM_TAB);
+		name = find_name_traditional(first, NULL, p_value);
+		name = find_name_traditional(second, name, p_value);
 		if (has_epoch_timestamp(first)) {
 			patch->is_new = 1;
 			patch->is_delete = 0;
diff --git a/t/t4135-apply-weird-filenames.sh b/t/t4135-apply-weird-filenames.sh
index 2dcb040..52f9f5b 100755
--- a/t/t4135-apply-weird-filenames.sh
+++ b/t/t4135-apply-weird-filenames.sh
@@ -94,8 +94,8 @@ try_filename() {
 }
 
 try_filename 'plain'            'postimage.txt'
-try_filename 'with spaces'      'post image.txt' success failure failure
-try_filename 'with tab'         'post	image.txt' success failure failure
+try_filename 'with spaces'      'post image.txt'
+try_filename 'with tab'         'post	image.txt'
 try_filename 'with backslash'   'post\image.txt'
 try_filename 'with quote'       '"postimage".txt' success failure success
 
-- 
1.7.2.rc3

  parent reply	other threads:[~2010-07-24  1:22 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-07-24  1:06 [PATCH 0/3] apply: handle traditional patches with spaces in filename Jonathan Nieder
2010-07-24  1:09 ` [PATCH 1/3] apply: Split quoted filename handling into new function Jonathan Nieder
2010-07-24  1:11 ` [PATCH 2/3] tests: Test how well "git apply" copes with weird filenames Jonathan Nieder
2010-07-24  8:03   ` Andreas Schwab
2010-07-24  8:48     ` Jonathan Nieder
2010-07-24  1:20 ` Jonathan Nieder [this message]
  -- strict thread matches above, loose matches on Subject: below --
2010-08-11 23:35 What's cooking in git.git (Aug 2010, #02; Wed, 11) Junio C Hamano
2010-08-13 21:44 ` jn/apply-filename-with-sp (Re: What's cooking in git.git (Aug 2010, #02; Wed, 11)) Johannes Sixt
2010-08-14  2:27   ` Jonathan Nieder
2010-08-14 18:37     ` Johannes Sixt
2010-08-19  1:45       ` [PATCH v2 0/3] apply: handle traditional patches with space in filename Jonathan Nieder
2010-08-19  1:50         ` [PATCH 3/3] " Jonathan Nieder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100724012058.GD13670@burratino \
    --to=jrnieder@gmail.com \
    --cc=agx@sigxcpu.org \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=iuculano@debian.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.