git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] git apply: cope with whitespace differences
@ 2009-07-02 12:52 Giuseppe Bilotta
  2009-07-02 12:52 ` [PATCH 1/2] git apply: option to ignore " Giuseppe Bilotta
  0 siblings, 1 reply; 6+ messages in thread
From: Giuseppe Bilotta @ 2009-07-02 12:52 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Robert Fitzsimons, Giuseppe Bilotta

A small patchset that allows git-apply (and thus git-am and git-rebase)
to cope with whitespace differences that would otherwise generate a conflict.

The first patch is basically an update of Rober Fitzsimons' previous work
http://permalink.gmane.org/gmane.comp.version-control.git/7876
whereas the second one is entirely mine and makes the context lines
match the original instead of the patched whitespace.

Giuseppe Bilotta (2):
  git apply: option to ignore whitespace differences
  git apply: preserve original whitespace with --ignore-whitespace

 builtin-apply.c                        |   83 ++++++++++++++++++++++++++++++--
 contrib/completion/git-completion.bash |    2 +
 git-am.sh                              |    3 +-
 git-rebase.sh                          |    3 +
 t/t4107-apply-ignore-whitespace.sh     |   74 ++++++++++++++++++++++++++++
 5 files changed, 159 insertions(+), 6 deletions(-)
 create mode 100755 t/t4107-apply-ignore-whitespace.sh

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] git apply: option to ignore whitespace differences
  2009-07-02 12:52 [PATCH 0/2] git apply: cope with whitespace differences Giuseppe Bilotta
@ 2009-07-02 12:52 ` Giuseppe Bilotta
  2009-07-02 12:52   ` [PATCH 2/2] git apply: preserve original whitespace with --ignore-whitespace Giuseppe Bilotta
  2009-07-02 16:46   ` [PATCH 1/2] git apply: option to ignore whitespace differences Junio C Hamano
  0 siblings, 2 replies; 6+ messages in thread
From: Giuseppe Bilotta @ 2009-07-02 12:52 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Robert Fitzsimons, Giuseppe Bilotta

Introduce --ignore-whitespace option to ignore whitespace differences
while applying the patch. The patch is an adaptatin of Robert
Fitzsimons' previous work, available at
http://permalink.gmane.org/gmane.comp.version-control.git/7876
including his test case.

'git am' and 'git rebase' are made aware of this option and pass it
through to 'git apply', and so is the bash git completion.
---
 builtin-apply.c                        |   53 +++++++++++++++++++++-
 contrib/completion/git-completion.bash |    2 +
 git-am.sh                              |    3 +-
 git-rebase.sh                          |    3 +
 t/t4107-apply-ignore-whitespace.sh     |   74 ++++++++++++++++++++++++++++++++
 5 files changed, 131 insertions(+), 4 deletions(-)
 create mode 100755 t/t4107-apply-ignore-whitespace.sh

diff --git a/builtin-apply.c b/builtin-apply.c
index dc0ff5e..86860d6 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -39,6 +39,7 @@ static int diffstat;
 static int numstat;
 static int summary;
 static int check;
+static int ignore_whitespace = 0;
 static int apply = 1;
 static int apply_in_reverse;
 static int apply_with_reject;
@@ -214,6 +215,45 @@ static uint32_t hash_line(const char *cp, size_t len)
 	return h;
 }
 
+/*
+ * Compare two memory areas ignoring whitespace differences
+ */
+static int memcmp_ignore_whitespace(const char *s1, const char *s2, size_t n)
+{
+	const char *stop1 = s1 + n;
+	const char *stop2 = s2 + n;
+	int result;
+
+	if (!n)
+		return 0;
+
+	do {
+		if (isspace(*s1) && isspace(*s2)) {
+			while (isspace(*s1)) {
+				s1++;
+			}
+			while (isspace(*s2))
+				s2++;
+		}
+		/* Check here instead of in the while because
+		   the whitespace discarding might have moved us
+		   past the end */
+		if ((s1 >= stop1) || (s2 >= stop2))
+			break;
+		result = *s1++ - *s2++;
+	} while (!result);
+
+	return result;
+}
+
+static int memcmp_switch(const char *s1, const char *s2, size_t n)
+{
+	if (ignore_whitespace)
+		return memcmp_ignore_whitespace(s1, s2, n);
+	else
+		return memcmp(s1, s2, n);
+}
+
 static void add_line_info(struct image *img, const char *bol, size_t len, unsigned flag)
 {
 	ALLOC_GROW(img->line_allocated, img->nr + 1, img->alloc);
@@ -1690,10 +1730,10 @@ static int match_fragment(struct image *img,
 	if ((match_end
 	     ? (try + preimage->len == img->len)
 	     : (try + preimage->len <= img->len)) &&
-	    !memcmp(img->buf + try, preimage->buf, preimage->len))
+	    !memcmp_switch(img->buf + try, preimage->buf, preimage->len))
 		return 1;
 
-	if (ws_error_action != correct_ws_error)
+	if (!ignore_whitespace && (ws_error_action != correct_ws_error))
 		return 0;
 
 	/*
@@ -1731,8 +1771,13 @@ static int match_fragment(struct image *img,
 		 * In either case, we are fixing the whitespace breakages
 		 * so we might as well take the fix together with their
 		 * real change.
+		 * If we are ignoring whitespace differences, don't check
+		 * for length equality.
 		 */
-		match = (tgtfixlen == fixlen && !memcmp(tgtfix, buf, fixlen));
+		if (ignore_whitespace)
+			match = !memcmp_ignore_whitespace(buf, tgtfix, fixlen);
+		else
+			match = (tgtfixlen == fixlen && !memcmp(tgtfix, buf, fixlen));
 
 		if (tgtfix != tgtfixbuf)
 			free(tgtfix);
@@ -3304,6 +3349,8 @@ int cmd_apply(int argc, const char **argv, const char *unused_prefix)
 		{ OPTION_CALLBACK, 0, "whitespace", &whitespace_option, "action",
 			"detect new or modified lines that have whitespace errors",
 			0, option_parse_whitespace },
+		OPT_BOOLEAN(0, "ignore-whitespace", &ignore_whitespace,
+			"ignore whitespace differences when finding context"),
 		OPT_BOOLEAN('R', "reverse", &apply_in_reverse,
 			"apply the patch in reverse"),
 		OPT_BOOLEAN(0, "unidiff-zero", &unidiff_zero,
diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index ddb71e2..d3415b5 100755
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -663,6 +663,7 @@ _git_am ()
 	--*)
 		__gitcomp "
 			--3way --committer-date-is-author-date --ignore-date
+			--ignore-whitespace
 			--interactive --keep --no-utf8 --signoff --utf8
 			--whitespace=
 			"
@@ -684,6 +685,7 @@ _git_apply ()
 			--stat --numstat --summary --check --index
 			--cached --index-info --reverse --reject --unidiff-zero
 			--apply --no-add --exclude=
+			--ignore-whitespace
 			--whitespace= --inaccurate-eof --verbose
 			"
 		return
diff --git a/git-am.sh b/git-am.sh
index d64d997..fe024b1 100755
--- a/git-am.sh
+++ b/git-am.sh
@@ -16,6 +16,7 @@ s,signoff       add a Signed-off-by line to the commit message
 u,utf8          recode into utf8 (default)
 k,keep          pass -k flag to git-mailinfo
 whitespace=     pass it through git-apply
+ignore-whitespace pass it through git-apply
 directory=      pass it through git-apply
 C=              pass it through git-apply
 p=              pass it through git-apply
@@ -303,7 +304,7 @@ do
 		git_apply_opt="$git_apply_opt $(sq "$1$2")"; shift ;;
 	--patch-format)
 		shift ; patch_format="$1" ;;
-	--reject)
+	--reject|--ignore-whitespace)
 		git_apply_opt="$git_apply_opt $1" ;;
 	--committer-date-is-author-date)
 		committer_date_is_author_date=t ;;
diff --git a/git-rebase.sh b/git-rebase.sh
index 18bc694..d741752 100755
--- a/git-rebase.sh
+++ b/git-rebase.sh
@@ -333,6 +333,9 @@ do
 			;;
 		esac
 		;;
+	--ignore-whitespace)
+		git_am_opt="$git_am_opt $1"
+		;;
 	--committer-date-is-author-date|--ignore-date)
 		git_am_opt="$git_am_opt $1"
 		force_rebase=t
diff --git a/t/t4107-apply-ignore-whitespace.sh b/t/t4107-apply-ignore-whitespace.sh
new file mode 100755
index 0000000..d31e0f3
--- /dev/null
+++ b/t/t4107-apply-ignore-whitespace.sh
@@ -0,0 +1,74 @@
+#!/bin/sh
+#
+# Copyright (c) 2005 Junio C Hamano
+# Copyright (c) 2005 Robert Fitzsimons
+#
+
+test_description='git-apply --ignore-whitespace.
+
+'
+. ./test-lib.sh
+
+# setup
+
+cat > patch1.patch <<\EOF
+diff --git a/main.c b/main.c
+new file mode 100644
+--- /dev/null
++++ b/main.c
+@@ -0,0 +1,23 @@
++#include <stdio.h>
++
++void print_int(int num);
++int func(int num);
++
++int main() {
++       int i;
++
++       for (i = 0; i < 10; i++) {
++               print_int(func(i));
++       }
++
++       return 0;
++}
++
++int func(int num) {
++       return num * num;
++}
++
++void print_int(int num) {
++       printf("%d", num);
++}
++
+EOF
+cat > patch2.patch <<\EOF
+diff --git a/main.c b/main.c
+--- a/main.c
++++ b/main.c
+@@ -10,6 +10,8 @@
+ 		print_int(func(i));
+ 	}
+ 
++	printf("\n");
++
+ 	return 0;
+ }
+
+EOF
+
+test_expect_success "S = patch1" \
+    'git-apply patch1.patch'
+
+test_expect_failure "F = patch2" \
+    'git-apply patch2.patch'
+
+test_expect_success "S = patch2 (--ignore-whitespace)" \
+    'git-apply --ignore-whitespace patch2.patch'
+
+rm -f main.c
+test_expect_success "S = patch1 (--ignore-whitespace)" \
+    'git-apply --ignore-whitespace patch1.patch'
+
+test_done
+
+
-- 
1.6.3.3.512.g1f6a.dirty

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] git apply: preserve original whitespace with --ignore-whitespace
  2009-07-02 12:52 ` [PATCH 1/2] git apply: option to ignore " Giuseppe Bilotta
@ 2009-07-02 12:52   ` Giuseppe Bilotta
  2009-07-02 16:46   ` [PATCH 1/2] git apply: option to ignore whitespace differences Junio C Hamano
  1 sibling, 0 replies; 6+ messages in thread
From: Giuseppe Bilotta @ 2009-07-02 12:52 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Robert Fitzsimons, Giuseppe Bilotta

---
 builtin-apply.c |   32 +++++++++++++++++++++++++++++---
 1 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/builtin-apply.c b/builtin-apply.c
index 86860d6..d213cce 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -1640,6 +1640,10 @@ static void update_pre_post_images(struct image *preimage,
 	int i, ctx;
 	char *new, *old, *fixed;
 	struct image fixed_preimage;
+	/* Do we need more space for the postimage? */
+	size_t newlen = postimage->len;
+	if (len > preimage->len)
+		newlen += len - preimage->len;
 
 	/*
 	 * Update the preimage with whitespace fixes.  Note that we
@@ -1654,11 +1658,16 @@ static void update_pre_post_images(struct image *preimage,
 	*preimage = fixed_preimage;
 
 	/*
-	 * Adjust the common context lines in postimage, in place.
+	 * Adjust the common context lines in postimage, in place
+	 * if we are not ignoring whitespace differences.
 	 * This is possible because whitespace fixing does not make
 	 * the string grow.
 	 */
-	new = old = postimage->buf;
+	old = postimage->buf;
+	if (ignore_whitespace)
+		new = postimage->buf = xmalloc(newlen);
+	else
+		new = old;
 	fixed = preimage->buf;
 	for (i = ctx = 0; i < postimage->nr; i++) {
 		size_t len = postimage->line[i].len;
@@ -1730,8 +1739,25 @@ static int match_fragment(struct image *img,
 	if ((match_end
 	     ? (try + preimage->len == img->len)
 	     : (try + preimage->len <= img->len)) &&
-	    !memcmp_switch(img->buf + try, preimage->buf, preimage->len))
+	    !memcmp_switch(img->buf + try, preimage->buf, preimage->len)) {
+		if (ignore_whitespace) {
+			/*
+			 * Replace the preimage whitespace with the original one
+			 */
+			size_t newlen = 0;
+			for (i = 0; i < preimage->nr; i++) {
+				newlen += preimage->line[i].len =
+				          img->line[try_lno + i].len;
+			}
+			fixed_buf = xmalloc(newlen);
+			memcpy(fixed_buf, img->buf + try, newlen);
+
+			update_pre_post_images(
+				preimage, postimage,
+				fixed_buf, newlen);
+		}
 		return 1;
+	}
 
 	if (!ignore_whitespace && (ws_error_action != correct_ws_error))
 		return 0;
-- 
1.6.3.3.512.g1f6a.dirty

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] git apply: option to ignore whitespace differences
  2009-07-02 12:52 ` [PATCH 1/2] git apply: option to ignore " Giuseppe Bilotta
  2009-07-02 12:52   ` [PATCH 2/2] git apply: preserve original whitespace with --ignore-whitespace Giuseppe Bilotta
@ 2009-07-02 16:46   ` Junio C Hamano
  2009-07-02 17:46     ` Giuseppe Bilotta
  1 sibling, 1 reply; 6+ messages in thread
From: Junio C Hamano @ 2009-07-02 16:46 UTC (permalink / raw)
  To: Giuseppe Bilotta; +Cc: git, Robert Fitzsimons

> diff --git a/builtin-apply.c b/builtin-apply.c
> index dc0ff5e..86860d6 100644
> --- a/builtin-apply.c
> +++ b/builtin-apply.c
> @@ -39,6 +39,7 @@ static int diffstat;
>  static int numstat;
>  static int summary;
>  static int check;
> +static int ignore_whitespace = 0;

s/ = 0//;

> +/*
> + * Compare two memory areas ignoring whitespace differences
> + */
> +static int memcmp_ignore_whitespace(const char *s1, const char *s2, size_t n)
> +{

Don't you think this function signature is bogus?

You are going to use this function to compare a line that is in the target
buffer (i.e. the contents of the current blob) with a line read from the
patch context when the user says they might match if you ignore whitespace
differences.

How on earth can you do that with only a single length parameter "n"?
Length of which side are you talking about?

Remember that git-apply is designed to be able to handle a patch that has
NUL in it (generated with "diff --text"), so strlen() is not an acceptable
answer.

> +static int memcmp_switch(const char *s1, const char *s2, size_t n)
> +{
> +	if (ignore_whitespace)
> +		return memcmp_ignore_whitespace(s1, s2, n);
> +	else
> +		return memcmp(s1, s2, n);
> +}
> +

This function signature shares the same bogosity with the previous one.

In addition, it's name and semantics is bogus.

The original implementation had encapsulated the notion that it is
comparing two lines entirely in match_fragment().  Its use of memcmp() was
an implementation detail of a half of the open-coded logic to compare two
lines (the other half being the comparison of lengths), and direct use of
memcmp() made perfect sense.  It checked the length matched.  Now it
wanted to make sure they matched byte-for-byte.

If you are separating that "compare two lines" logic into a helper
function, I would expect its name actually reflect its purpose, whose
behaviour may change depending on --ignore-whitespace.  The traditional
codepath would say "do they have the same length and match byte-for-byte?"
while the new loosened codepath would say "we do not care about the
whitespaces; do they match if we disregard ws differences?"

I also suspect that you might be able to optimize the existing "allow
whitespace-fixed match" a bit by "fix"ing the target buffer only once,
instead of doing so line-by-line for every offset that find_pos() checks
by calling match_fragment().  It is an independent issue, but it appears
to me that the change this patch wants to do to match_fragment() might
become cleaner if we did that conversion first, as match_fragment() itself
won't have to have two cases (early return for exact match, and match with
whitespace-fixed target).

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] git apply: option to ignore whitespace differences
  2009-07-02 16:46   ` [PATCH 1/2] git apply: option to ignore whitespace differences Junio C Hamano
@ 2009-07-02 17:46     ` Giuseppe Bilotta
  2009-07-02 18:29       ` Junio C Hamano
  0 siblings, 1 reply; 6+ messages in thread
From: Giuseppe Bilotta @ 2009-07-02 17:46 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Robert Fitzsimons

On Thu, Jul 2, 2009 at 6:46 PM, Junio C Hamano<gitster@pobox.com> wrote:
>> diff --git a/builtin-apply.c b/builtin-apply.c
>> index dc0ff5e..86860d6 100644
>> --- a/builtin-apply.c
>> +++ b/builtin-apply.c
>> @@ -39,6 +39,7 @@ static int diffstat;
>>  static int numstat;
>>  static int summary;
>>  static int check;
>> +static int ignore_whitespace = 0;
>
> s/ = 0//;

Ah, I wondered about that. I assume this leaves no possibility for
uninitialized values because of the way option parsing is done?

>> +/*
>> + * Compare two memory areas ignoring whitespace differences
>> + */
>> +static int memcmp_ignore_whitespace(const char *s1, const char *s2, size_t n)
>> +{
>
> Don't you think this function signature is bogus?

I did have a few doubts on it. I think I worked out a proper solution,
per your suggestion.

>> +static int memcmp_switch(const char *s1, const char *s2, size_t n)
>
> This function signature shares the same bogosity with the previous one.
>
> In addition, it's name and semantics is bogus.

> If you are separating that "compare two lines" logic into a helper
> function, I would expect its name actually reflect its purpose, whose
> behaviour may change depending on --ignore-whitespace.  The traditional
> codepath would say "do they have the same length and match byte-for-byte?"
> while the new loosened codepath would say "we do not care about the
> whitespaces; do they match if we disregard ws differences?"

Indeed. See upcoming patch for a better name 8-)

> I also suspect that you might be able to optimize the existing "allow
> whitespace-fixed match" a bit by "fix"ing the target buffer only once,
> instead of doing so line-by-line for every offset that find_pos() checks
> by calling match_fragment().  It is an independent issue, but it appears
> to me that the change this patch wants to do to match_fragment() might
> become cleaner if we did that conversion first, as match_fragment() itself
> won't have to have two cases (early return for exact match, and match with
> whitespace-fixed target).

But OTOH that would 'waste time' fixing whitespace when it might not
be needed ...

-- 
Giuseppe "Oblomov" Bilotta

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] git apply: option to ignore whitespace differences
  2009-07-02 17:46     ` Giuseppe Bilotta
@ 2009-07-02 18:29       ` Junio C Hamano
  0 siblings, 0 replies; 6+ messages in thread
From: Junio C Hamano @ 2009-07-02 18:29 UTC (permalink / raw)
  To: Giuseppe Bilotta; +Cc: git, Robert Fitzsimons

Giuseppe Bilotta <giuseppe.bilotta@gmail.com> writes:

> On Thu, Jul 2, 2009 at 6:46 PM, Junio C Hamano<gitster@pobox.com> wrote:
>>> diff --git a/builtin-apply.c b/builtin-apply.c
>>> index dc0ff5e..86860d6 100644
>>> --- a/builtin-apply.c
>>> +++ b/builtin-apply.c
>>> @@ -39,6 +39,7 @@ static int diffstat;
>>>  static int numstat;
>>>  static int summary;
>>>  static int check;
>>> +static int ignore_whitespace = 0;
>>
>> s/ = 0//;
>
> Ah, I wondered about that. I assume this leaves no possibility for
> uninitialized values because of the way option parsing is done?

It is not "uninitialized" to begin with.

C initializes static variables without explicit initial values to 0.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2009-07-02 18:29 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-07-02 12:52 [PATCH 0/2] git apply: cope with whitespace differences Giuseppe Bilotta
2009-07-02 12:52 ` [PATCH 1/2] git apply: option to ignore " Giuseppe Bilotta
2009-07-02 12:52   ` [PATCH 2/2] git apply: preserve original whitespace with --ignore-whitespace Giuseppe Bilotta
2009-07-02 16:46   ` [PATCH 1/2] git apply: option to ignore whitespace differences Junio C Hamano
2009-07-02 17:46     ` Giuseppe Bilotta
2009-07-02 18:29       ` Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).