git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 2/4] convert: fix normalization of foreign idents
  2010-09-18 14:30 [PATCH v3 0/4] fix normalization of foreign idents Marcus Comstedt
@ 2010-08-23  7:05 ` Marcus Comstedt
  2010-09-18 21:31   ` Junio C Hamano
  2010-09-07 19:16 ` [PATCH v3 3/4] t0021: test checkout and commit " Marcus Comstedt
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 7+ messages in thread
From: Marcus Comstedt @ 2010-08-23  7:05 UTC (permalink / raw)
  To: git; +Cc: gitster

Since ident_to_worktree() does not touch $Id$ tags which contain
foreign expansions in the repository, make sure that ident_to_git()
does not either.  This fixes the problem that such files show
spurious modification upon checkout.

There is however one case where we want ident_to_git() to normalize
the tag to $Id$ despite the asymmetry:  When committing a modification
to a file which has a foreign ident, the foreign ident should be
replaced with a regular git ident.  Thus, add a new parameter to
convert_to_git() that indicates if we want the foreign idents
normalized after all.

Signed-off-by: Marcus Comstedt <marcus@mc.pp.se>
---
 builtin/apply.c |    2 +-
 builtin/blame.c |    2 +-
 cache.h         |    8 +++++++-
 combine-diff.c  |    2 +-
 convert.c       |   23 ++++++++++++++++++-----
 diff.c          |    2 +-
 sha1_file.c     |    3 ++-
 7 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/builtin/apply.c b/builtin/apply.c
index 638e7be..fe8d638 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -1932,7 +1932,7 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
 	case S_IFREG:
 		if (strbuf_read_file(buf, path, st->st_size) != st->st_size)
 			return error("unable to open or read %s", path);
-		convert_to_git(path, buf->buf, buf->len, buf, CHECKS_DISALLOWED);
+	        convert_to_git(path, buf->buf, buf->len, buf, CHECKS_DISALLOWED, NORMALIZE_FOR_COMPARE);
 		return 0;
 	default:
 		return -1;
diff --git a/builtin/blame.c b/builtin/blame.c
index 850e165..8d8cbf3 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -2095,7 +2095,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt,
 		if (strbuf_read(&buf, 0, 0) < 0)
 			die_errno("failed to read from stdin");
 	}
-	convert_to_git(path, buf.buf, buf.len, &buf, CHECKS_DISALLOWED);
+	convert_to_git(path, buf.buf, buf.len, &buf, CHECKS_DISALLOWED, NORMALIZE_FOR_COMPARE);
 	origin->file.ptr = buf.buf;
 	origin->file.size = buf.len;
 	pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1);
diff --git a/cache.h b/cache.h
index 250abc1..3010e20 100644
--- a/cache.h
+++ b/cache.h
@@ -586,6 +586,11 @@ enum allow_checks {
 	CHECKS_ALLOWED = 1,
 };
 
+enum normalize_mode {
+        NORMALIZE_FOR_COMPARE = 0,
+	NORMALIZE_FOR_COMMIT = 1,
+};
+
 enum branch_track {
 	BRANCH_TRACK_UNSPECIFIED = -1,
 	BRANCH_TRACK_NEVER = 0,
@@ -1064,7 +1069,8 @@ extern void trace_argv_printf(const char **argv, const char *format, ...);
 /* convert.c */
 /* returns 1 if *dst was used */
 extern int convert_to_git(const char *path, const char *src, size_t len,
-                          struct strbuf *dst, enum allow_checks checksallowed);
+                          struct strbuf *dst, enum allow_checks checksallowed,
+			  enum normalize_mode mode);
 extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
 extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst);
 
diff --git a/combine-diff.c b/combine-diff.c
index c7f132d..e36bf61 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -758,7 +758,7 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent,
 			if (is_file) {
 				struct strbuf buf = STRBUF_INIT;
 
-				if (convert_to_git(elem->path, result, len, &buf, CHECKS_ALLOWED)) {
+				if (convert_to_git(elem->path, result, len, &buf, CHECKS_ALLOWED, NORMALIZE_FOR_COMPARE)) {
 					free(result);
 					result = strbuf_detach(&buf, &len);
 					result_size = len;
diff --git a/convert.c b/convert.c
index 8050c24..4eb28d8 100644
--- a/convert.c
+++ b/convert.c
@@ -520,9 +520,10 @@ static int count_ident(const char *cp, unsigned long size)
 }
 
 static int ident_to_git(const char *path, const char *src, size_t len,
-                        struct strbuf *buf, int ident)
+			struct strbuf *buf, int ident,
+			int normalize_foreign_ident)
 {
-	char *dst, *dollar;
+	char *dst, *dollar, *spc;
 
 	if (!ident || !count_ident(src, len))
 		return 0;
@@ -549,6 +550,16 @@ static int ident_to_git(const char *path, const char *src, size_t len,
 				continue;
 			}
 
+			spc = memchr(src + 4, ' ', dollar - src - 4);
+			if (spc && spc < dollar-1 &&
+			    !normalize_foreign_ident) {
+				/* There are spaces in unexpected places.
+				 * This is probably an id from some other
+				 * versioning system. Keep it for now.
+				 */
+				continue;
+			}
+
 			memcpy(dst, "Id$", 3);
 			dst += 3;
 			len -= dollar + 1 - src;
@@ -706,7 +717,8 @@ static enum action determine_action(enum action text_attr, enum eol eol_attr)
 }
 
 int convert_to_git(const char *path, const char *src, size_t len,
-                   struct strbuf *dst, enum allow_checks checksallowed)
+                   struct strbuf *dst, enum allow_checks checksallowed,
+		   enum normalize_mode mode)
 {
 	struct git_attr_check check[5];
 	enum action action = CRLF_GUESS;
@@ -739,7 +751,8 @@ int convert_to_git(const char *path, const char *src, size_t len,
 		src = dst->buf;
 		len = dst->len;
 	}
-	return ret | ident_to_git(path, src, len, dst, ident);
+	return ret | ident_to_git(path, src, len, dst, ident,
+				  mode == NORMALIZE_FOR_COMMIT);
 }
 
 static int convert_to_working_tree_internal(const char *path, const char *src,
@@ -797,5 +810,5 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str
 		src = dst->buf;
 		len = dst->len;
 	}
-	return ret | convert_to_git(path, src, len, dst, CHECKS_DISALLOWED);
+	return ret | convert_to_git(path, src, len, dst, CHECKS_DISALLOWED, NORMALIZE_FOR_COMPARE);
 }
diff --git a/diff.c b/diff.c
index ed74f6b..eebe3dd 100644
--- a/diff.c
+++ b/diff.c
@@ -2375,7 +2375,7 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
 		/*
 		 * Convert from working tree format to canonical git format
 		 */
-		if (convert_to_git(s->path, s->data, s->size, &buf, CHECKS_ALLOWED)) {
+		if (convert_to_git(s->path, s->data, s->size, &buf, CHECKS_ALLOWED, NORMALIZE_FOR_COMPARE)) {
 			size_t size = 0;
 			munmap(s->data, s->size);
 			s->should_munmap = 0;
diff --git a/sha1_file.c b/sha1_file.c
index 13624a6..cbebb75 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2434,7 +2434,8 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 	if ((type == OBJ_BLOB) && path) {
 		struct strbuf nbuf = STRBUF_INIT;
 		if (convert_to_git(path, buf, size, &nbuf,
-		                   write_object ? CHECKS_ALLOWED : CHECKS_DISALLOWED)) {
+		                   write_object ? CHECKS_ALLOWED : CHECKS_DISALLOWED,
+				   write_object ? NORMALIZE_FOR_COMMIT : NORMALIZE_FOR_COMPARE)) {
 			buf = strbuf_detach(&nbuf, &size);
 			re_allocated = 1;
 		}
-- 
1.7.2

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 3/4] t0021: test checkout and commit of foreign idents
  2010-09-18 14:30 [PATCH v3 0/4] fix normalization of foreign idents Marcus Comstedt
  2010-08-23  7:05 ` [PATCH v3 2/4] convert: " Marcus Comstedt
@ 2010-09-07 19:16 ` Marcus Comstedt
  2010-09-13 22:00 ` [PATCH v3 1/4] convert: generalize checksafe parameter Marcus Comstedt
  2010-09-18 13:53 ` [PATCH v3 4/4] Documentation: document foreign idents Marcus Comstedt
  3 siblings, 0 replies; 7+ messages in thread
From: Marcus Comstedt @ 2010-09-07 19:16 UTC (permalink / raw)
  To: git; +Cc: gitster

Add test cases for the following behaviors:

  * Checking out a file with a foreign ident should not flag
    the file as modified.  This is to prevent a mess when checking
    out old versions, and to allow a migration model where files
    are allowed to keep their foreign ident as long as their
    content is also "foreign", i.e. not modified since the migration
    to git.

  * Committing to a file with a foreign ident should replace the
    foreign ident with a native ident.  This is simply to get
    the normal behavior of ident:  When the contents of the file is
    updated, so is the ident.

Signed-off-by: Marcus Comstedt <marcus@mc.pp.se>
---
 t/t0021-conversion.sh |   58 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 58 insertions(+), 0 deletions(-)

diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index 828e35b..cf83c02 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -93,4 +93,62 @@ test_expect_success expanded_in_repo '
 	cmp expanded-keywords expected-output
 '
 
+# Check that a file containing idents (native or foreign) is not
+# spuriously flagged as modified on checkout
+test_expect_success 'ident pristine after checkout' '
+	{
+		echo "File with foreign ident"
+		echo "\$Id\$"
+		echo "\$Id: Foreign Commit With Spaces \$"
+	} > native-and-foreign-idents &&
+
+	{
+		echo "File with foreign ident"
+		echo "\$Id: c389f8936d7baa13f463254d55b72e00e5496e3f \$"
+		echo "\$Id: Foreign Commit With Spaces \$"
+	} > expected-output &&
+
+	git add native-and-foreign-idents &&
+	git commit -m "File with native and foreign idents" &&
+
+	echo "native-and-foreign-idents ident" >> .gitattributes &&
+
+	rm -f native-and-foreign-idents &&
+	git checkout -- native-and-foreign-idents &&
+	cat native-and-foreign-idents &&
+	cmp native-and-foreign-idents expected-output &&
+	touch native-and-foreign-idents &&
+	git status --porcelain native-and-foreign-idents > output &&
+	test ! -s output &&
+	git diff -- native-and-foreign-idents > output &&
+	test ! -s output
+'
+
+# Check that actually modifying the file and committing it produces a
+# new ident on checkout
+test_expect_success 'foreign ident replaced on commit' '
+	{
+		echo "File with foreign ident"
+		echo "\$Id: cc874844b7868ce341059e6a87c50b6f37b75807 \$"
+		echo "\$Id: cc874844b7868ce341059e6a87c50b6f37b75807 \$"
+		echo "Some new content"
+	} > expected-output &&
+
+	echo "1	0	native-and-foreign-idents" > expected-stat1 &&
+	echo "2	1	native-and-foreign-idents" > expected-stat2 &&
+
+	echo "Some new content" >> native-and-foreign-idents &&
+	git diff --numstat -- native-and-foreign-idents > output &&
+	cmp output expected-stat1 &&
+	git add native-and-foreign-idents &&
+	git commit -m "Modified file" &&
+	git diff --numstat HEAD^ HEAD -- native-and-foreign-idents > output &&
+	cmp output expected-stat2 &&
+	rm -f native-and-foreign-idents &&
+	git checkout -- native-and-foreign-idents &&
+	cat native-and-foreign-idents &&
+	cmp native-and-foreign-idents expected-output
+'
+
+
 test_done
-- 
1.7.2

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 1/4] convert: generalize checksafe parameter
  2010-09-18 14:30 [PATCH v3 0/4] fix normalization of foreign idents Marcus Comstedt
  2010-08-23  7:05 ` [PATCH v3 2/4] convert: " Marcus Comstedt
  2010-09-07 19:16 ` [PATCH v3 3/4] t0021: test checkout and commit " Marcus Comstedt
@ 2010-09-13 22:00 ` Marcus Comstedt
  2010-09-18 13:53 ` [PATCH v3 4/4] Documentation: document foreign idents Marcus Comstedt
  3 siblings, 0 replies; 7+ messages in thread
From: Marcus Comstedt @ 2010-09-13 22:00 UTC (permalink / raw)
  To: git; +Cc: gitster

The convert_to_git() function used to have a checksafe parameter,
which could be used to prevent safe_crlf checks by passing 0
instead of the value of the global variable safe_crlf.

Since preventing checks is a wider concept than just disabling
safe_crlf, generalize the parameter so that it can be used for other
types of checks as well.

Signed-off-by: Marcus Comstedt <marcus@mc.pp.se>
---
 builtin/apply.c |    2 +-
 builtin/blame.c |    2 +-
 cache.h         |    7 ++++++-
 combine-diff.c  |    2 +-
 convert.c       |    7 ++++---
 diff.c          |    2 +-
 sha1_file.c     |    2 +-
 7 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/builtin/apply.c b/builtin/apply.c
index 23c18c5..638e7be 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -1932,7 +1932,7 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
 	case S_IFREG:
 		if (strbuf_read_file(buf, path, st->st_size) != st->st_size)
 			return error("unable to open or read %s", path);
-		convert_to_git(path, buf->buf, buf->len, buf, 0);
+		convert_to_git(path, buf->buf, buf->len, buf, CHECKS_DISALLOWED);
 		return 0;
 	default:
 		return -1;
diff --git a/builtin/blame.c b/builtin/blame.c
index 1015354..850e165 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -2095,7 +2095,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt,
 		if (strbuf_read(&buf, 0, 0) < 0)
 			die_errno("failed to read from stdin");
 	}
-	convert_to_git(path, buf.buf, buf.len, &buf, 0);
+	convert_to_git(path, buf.buf, buf.len, &buf, CHECKS_DISALLOWED);
 	origin->file.ptr = buf.buf;
 	origin->file.size = buf.len;
 	pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1);
diff --git a/cache.h b/cache.h
index 2ef2fa3..250abc1 100644
--- a/cache.h
+++ b/cache.h
@@ -581,6 +581,11 @@ enum eol {
 
 extern enum eol eol;
 
+enum allow_checks {
+	CHECKS_DISALLOWED = 0,
+	CHECKS_ALLOWED = 1,
+};
+
 enum branch_track {
 	BRANCH_TRACK_UNSPECIFIED = -1,
 	BRANCH_TRACK_NEVER = 0,
@@ -1059,7 +1064,7 @@ extern void trace_argv_printf(const char **argv, const char *format, ...);
 /* convert.c */
 /* returns 1 if *dst was used */
 extern int convert_to_git(const char *path, const char *src, size_t len,
-                          struct strbuf *dst, enum safe_crlf checksafe);
+                          struct strbuf *dst, enum allow_checks checksallowed);
 extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
 extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst);
 
diff --git a/combine-diff.c b/combine-diff.c
index 655fa89..c7f132d 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -758,7 +758,7 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent,
 			if (is_file) {
 				struct strbuf buf = STRBUF_INIT;
 
-				if (convert_to_git(elem->path, result, len, &buf, safe_crlf)) {
+				if (convert_to_git(elem->path, result, len, &buf, CHECKS_ALLOWED)) {
 					free(result);
 					result = strbuf_detach(&buf, &len);
 					result_size = len;
diff --git a/convert.c b/convert.c
index 01de9a8..8050c24 100644
--- a/convert.c
+++ b/convert.c
@@ -706,7 +706,7 @@ static enum action determine_action(enum action text_attr, enum eol eol_attr)
 }
 
 int convert_to_git(const char *path, const char *src, size_t len,
-                   struct strbuf *dst, enum safe_crlf checksafe)
+                   struct strbuf *dst, enum allow_checks checksallowed)
 {
 	struct git_attr_check check[5];
 	enum action action = CRLF_GUESS;
@@ -733,7 +733,8 @@ int convert_to_git(const char *path, const char *src, size_t len,
 		len = dst->len;
 	}
 	action = determine_action(action, eol_attr);
-	ret |= crlf_to_git(path, src, len, dst, action, checksafe);
+	ret |= crlf_to_git(path, src, len, dst, action,
+			   (checksallowed? safe_crlf : 0));
 	if (ret) {
 		src = dst->buf;
 		len = dst->len;
@@ -796,5 +797,5 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str
 		src = dst->buf;
 		len = dst->len;
 	}
-	return ret | convert_to_git(path, src, len, dst, 0);
+	return ret | convert_to_git(path, src, len, dst, CHECKS_DISALLOWED);
 }
diff --git a/diff.c b/diff.c
index 9a5c77c..ed74f6b 100644
--- a/diff.c
+++ b/diff.c
@@ -2375,7 +2375,7 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
 		/*
 		 * Convert from working tree format to canonical git format
 		 */
-		if (convert_to_git(s->path, s->data, s->size, &buf, safe_crlf)) {
+		if (convert_to_git(s->path, s->data, s->size, &buf, CHECKS_ALLOWED)) {
 			size_t size = 0;
 			munmap(s->data, s->size);
 			s->should_munmap = 0;
diff --git a/sha1_file.c b/sha1_file.c
index 0cd9435..13624a6 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2434,7 +2434,7 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 	if ((type == OBJ_BLOB) && path) {
 		struct strbuf nbuf = STRBUF_INIT;
 		if (convert_to_git(path, buf, size, &nbuf,
-		                   write_object ? safe_crlf : 0)) {
+		                   write_object ? CHECKS_ALLOWED : CHECKS_DISALLOWED)) {
 			buf = strbuf_detach(&nbuf, &size);
 			re_allocated = 1;
 		}
-- 
1.7.2

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 4/4] Documentation: document foreign idents
  2010-09-18 14:30 [PATCH v3 0/4] fix normalization of foreign idents Marcus Comstedt
                   ` (2 preceding siblings ...)
  2010-09-13 22:00 ` [PATCH v3 1/4] convert: generalize checksafe parameter Marcus Comstedt
@ 2010-09-18 13:53 ` Marcus Comstedt
  3 siblings, 0 replies; 7+ messages in thread
From: Marcus Comstedt @ 2010-09-18 13:53 UTC (permalink / raw)
  To: git; +Cc: gitster

Add a short paragraph to the "ident" documentation about the
semantics and intended use of foreign idents.

Signed-off-by: Marcus Comstedt <marcus@mc.pp.se>
Signed-off-by: Henrik Grubbström <grubba@grubba.org>
---
 Documentation/gitattributes.txt |    7 +++++++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index e5a27d8..9af1d6f 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -272,6 +272,13 @@ sign `$` upon checkout.  Any byte sequence that begins with
 `$Id:` and ends with `$` in the worktree file is replaced
 with `$Id$` upon check-in.
 
+When converting a repository from a different version control
+system, it can be useful to create blobs which contain expanded
+`$Id$` tags.  Git will recognize such "foreign idents" if they
+contain at least one space within the payload.  A foreign ident
+will be replaced by `$Id$` upon check-in, but is left unaltered
+upon checkout.
+
 
 `filter`
 ^^^^^^^^
-- 
1.7.2

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 0/4] fix normalization of foreign idents
@ 2010-09-18 14:30 Marcus Comstedt
  2010-08-23  7:05 ` [PATCH v3 2/4] convert: " Marcus Comstedt
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Marcus Comstedt @ 2010-09-18 14:30 UTC (permalink / raw)
  To: git; +Cc: gitster

New in this reroll:

  * Added documentation of foreign idents

  * Raised the level of abstraction for the arguments to
    convert_to_git which control the conversion process (both
    old and new), as suggested by Junio

Marcus Comstedt (4):
  convert: generalize checksafe parameter
  convert: fix normalization of foreign idents
  t0021: test checkout and commit of foreign idents
  Documentation: document foreign idents

 Documentation/gitattributes.txt |    7 +++++
 builtin/apply.c                 |    2 +-
 builtin/blame.c                 |    2 +-
 cache.h                         |   13 ++++++++-
 combine-diff.c                  |    2 +-
 convert.c                       |   26 +++++++++++++----
 diff.c                          |    2 +-
 sha1_file.c                     |    3 +-
 t/t0021-conversion.sh           |   58 +++++++++++++++++++++++++++++++++++++++
 9 files changed, 103 insertions(+), 12 deletions(-)

-- 
1.7.2

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 2/4] convert: fix normalization of foreign idents
  2010-08-23  7:05 ` [PATCH v3 2/4] convert: " Marcus Comstedt
@ 2010-09-18 21:31   ` Junio C Hamano
  2010-09-18 22:38     ` Marcus Comstedt
  0 siblings, 1 reply; 7+ messages in thread
From: Junio C Hamano @ 2010-09-18 21:31 UTC (permalink / raw)
  To: Marcus Comstedt; +Cc: git

Marcus Comstedt <marcus@mc.pp.se> writes:

> Since ident_to_worktree() does not touch $Id$ tags which contain
> foreign expansions in the repository, make sure that ident_to_git()
> does not either.

Thanks.

> There is however one case where we want ident_to_git() to normalize
> the tag to $Id$ despite the asymmetry...

I'd actually think that is a bad idea.  The user _can_ choose to do so by
removing the stale part from '$Id: obsolate garbage$", of course.  Or we
can always normalize, which _might_ turn out to be a better solution.  In
either case, it would be better to be consistent.

> diff --git a/builtin/apply.c b/builtin/apply.c
> index 638e7be..fe8d638 100644
> --- a/builtin/apply.c
> +++ b/builtin/apply.c
> @@ -1932,7 +1932,7 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
>  	case S_IFREG:
>  		if (strbuf_read_file(buf, path, st->st_size) != st->st_size)
>  			return error("unable to open or read %s", path);
> -		convert_to_git(path, buf->buf, buf->len, buf, CHECKS_DISALLOWED);
> +	        convert_to_git(path, buf->buf, buf->len, buf, CHECKS_DISALLOWED, NORMALIZE_FOR_COMPARE);

In order to apply a patch to a file in the working tree, we first need to
read it, and this codepath is the place to do so.  The file in the working
tree may be "smudged" (in the sense of smudge/clean filter) and we "clean"
it so that we can compare and update with what we read from the patch (the
patch text is expected to be always "clean").

We don't check "safe-crlf", as it can "die" if it is allowed to when
check_safe_clrf() finds things it does not like.

We expect to write the result out to the working tree, and at that point
we will run convert_to_working_tree().  The result can also be added to
the index after we are done.  The resulting index may be used to create
the next commit.  Why "for compare" and not "for commit"?

If you get a patch to a file that contains a line with an ident, either as
a context or old line.  Both your working tree file and your index would
have "$Id: stale garbage$".

A patch may have

 - "$Id: stale garbage$" (made against the identical foreign source),
 - "$Id: updated garbage$" (made against an updated foreign source), or
 - "$Id$": (made against a conversion to git done elsewhere).

By telling git not to normalize "$Id: stale garbage$" you have, aren't you
making the patch made not to apply 2 out of 3 above cases, especially if
it came from your git friends (the last one)?

This patch doesn't touch "apply --cached" at all, which introduces yet
more unnecessary inconsistency.  If you made changes to the index through
that codepath, shouldn't the resulting object lose the $Id: stale garbage$
somewhere before it is made into the next commit?

My gut feeling is that this kind of complications aren't worth it.  If we
were to address the $Id$ issue, I wonder if we can fix it the other way
around, by making both directions always convert (i.e. ident_to_worktree()
and ident_to_git()); the end result of such a change feels much simpler to
explain to the users.

Anyway, let's continue reading your patch.

> diff --git a/builtin/blame.c b/builtin/blame.c
> index 850e165..8d8cbf3 100644
> --- a/builtin/blame.c
> +++ b/builtin/blame.c
> @@ -2095,7 +2095,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt,
>  		if (strbuf_read(&buf, 0, 0) < 0)
>  			die_errno("failed to read from stdin");
>  	}
> -	convert_to_git(path, buf.buf, buf.len, &buf, CHECKS_DISALLOWED);
> +	convert_to_git(path, buf.buf, buf.len, &buf, CHECKS_DISALLOWED, NORMALIZE_FOR_COMPARE);

Not questionable.  We are "read-only".

> diff --git a/cache.h b/cache.h
> index 250abc1..3010e20 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -586,6 +586,11 @@ enum allow_checks {
>  	CHECKS_ALLOWED = 1,
>  };
>  
> +enum normalize_mode {
> +        NORMALIZE_FOR_COMPARE = 0,
> +	NORMALIZE_FOR_COMMIT = 1,
> +};

Funny use of whitespaces.

> diff --git a/combine-diff.c b/combine-diff.c
> index c7f132d..e36bf61 100644
> --- a/combine-diff.c
> +++ b/combine-diff.c
> @@ -758,7 +758,7 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent,
>  			if (is_file) {
>  				struct strbuf buf = STRBUF_INIT;
>  
> -				if (convert_to_git(elem->path, result, len, &buf, CHECKS_ALLOWED)) {
> +				if (convert_to_git(elem->path, result, len, &buf, CHECKS_ALLOWED, NORMALIZE_FOR_COMPARE)) {

Not questionable.  We are "read-only".

> @@ -797,5 +810,5 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str
>  		src = dst->buf;
>  		len = dst->len;
>  	}
> -	return ret | convert_to_git(path, src, len, dst, CHECKS_DISALLOWED);
> +	return ret | convert_to_git(path, src, len, dst, CHECKS_DISALLOWED, NORMALIZE_FOR_COMPARE);

This is called from merge operation to read from the object store, and
apply double-conversion (first from git to working tree and then back to
git).  But the result is written out as an object and hopefully be
recorded as a merge commit.  Why "for compare"?  We would want a normalized
result, no?

> diff --git a/diff.c b/diff.c
> index ed74f6b..eebe3dd 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -2375,7 +2375,7 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
>  		/*
>  		 * Convert from working tree format to canonical git format
>  		 */
> -		if (convert_to_git(s->path, s->data, s->size, &buf, CHECKS_ALLOWED)) {
> +		if (convert_to_git(s->path, s->data, s->size, &buf, CHECKS_ALLOWED, NORMALIZE_FOR_COMPARE)) {

This feels wrong.  This is a "read-only" access just like combine-diff and
blame one.  I think CHECKS_ALLOWED is probably a bug in the original.
There is no reason to complain against what you haven't added.

> diff --git a/sha1_file.c b/sha1_file.c
> index 13624a6..cbebb75 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -2434,7 +2434,8 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
>  	if ((type == OBJ_BLOB) && path) {
>  		struct strbuf nbuf = STRBUF_INIT;
>  		if (convert_to_git(path, buf, size, &nbuf,
> -		                   write_object ? CHECKS_ALLOWED : CHECKS_DISALLOWED)) {
> +		                   write_object ? CHECKS_ALLOWED : CHECKS_DISALLOWED,
> +				   write_object ? NORMALIZE_FOR_COMMIT : NORMALIZE_FOR_COMPARE)) {

Although I can sort-of see why Steffen wanted to allow the former less
strict in these two commands:

    git hash-object <foo
    git hash-object -w <foo

the code now says that they will produce different results, which feels
utterly wrong.

I was hoping that from this patch a simple pattern, a correlation between
the two conversion tweak flags you now have, would emerge[*1*].  And
indeed it looks like it does.

If we fix the potential bug in diff_populate_filespec(), we see that
checkcrlf==0 goes hand in hand with "for compare".  The meaning of both
option is "Are we in read-only codepath?"[*2*] and both conversion filters
change their behaviour based on that single bit.

But the "for compare"/"for commit" we looked at during this review may
probably have to change, and in the end it may not be a simple matter of
"if we are writing we convert this way, but if we are reading we convert
that other way".  My gut feeling is still that if we want consistency, we
should just always convert, not the other way around, though.


[footnote]

*1* By the way, as I already said, I do not like adding more and more
conversion tweak flags (it makes it even more distasteful that this patch
does so especially to support a check-box "feature" like ident).

*2* That is the kind of option that specifies what it _means_, not what or
how it does, I suggested you to think about in the first round of review.
"Do we allow checks?" wasn't the kind of a meaningful option I was hoping
to see; we would want the code to clarify _why_ we allow or forego checks
at individual callsites.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 2/4] convert: fix normalization of foreign idents
  2010-09-18 21:31   ` Junio C Hamano
@ 2010-09-18 22:38     ` Marcus Comstedt
  0 siblings, 0 replies; 7+ messages in thread
From: Marcus Comstedt @ 2010-09-18 22:38 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git


Hi Junio.

Junio C Hamano <gitster@pobox.com> writes:

>> There is however one case where we want ident_to_git() to normalize
>> the tag to $Id$ despite the asymmetry...
>
> I'd actually think that is a bad idea.  The user _can_ choose to do so by
> removing the stale part from '$Id: obsolate garbage$", of course.

Yes, but it's easy to miss doing so.  And having two versions with the
same ident would be bad.  Furthermore, I couldn't think of a use case
where you want to use idents (as indicated by setting the "ident"
attribute), and do _not_ want stale data removed.


> Or we can always normalize, which _might_ turn out to be a better
> solution.

I'm not quite sure I understand what you mean by "always normalize" here.
Does this mean reverting the original "foreign ident" change, which is
what's preserving these idents on checkout?


[...]
> In order to apply a patch to a file in the working tree, we first need to
> read it, and this codepath is the place to do so.  The file in the working
> tree may be "smudged" (in the sense of smudge/clean filter) and we "clean"
> it so that we can compare and update with what we read from the patch (the
                    ^^^^^^^
> patch text is expected to be always "clean").
>
> We don't check "safe-crlf", as it can "die" if it is allowed to when
> check_safe_clrf() finds things it does not like.
>
> We expect to write the result out to the working tree, and at that point
> we will run convert_to_working_tree().  The result can also be added to
> the index after we are done.  The resulting index may be used to create
> the next commit.  Why "for compare" and not "for commit"?

I believe you answered that question yourself above.  ;-)

The file will be converted for commit once you add and commit the
result that was written to the working tree.  That happens in
index_mem(), not here.


> If you get a patch to a file that contains a line with an ident, either as
> a context or old line.  Both your working tree file and your index would
> have "$Id: stale garbage$".
>
> A patch may have
>
>  - "$Id: stale garbage$" (made against the identical foreign source),
>  - "$Id: updated garbage$" (made against an updated foreign source), or
>  - "$Id$": (made against a conversion to git done elsewhere).
>
> By telling git not to normalize "$Id: stale garbage$" you have, aren't you
> making the patch made not to apply 2 out of 3 above cases, especially if
> it came from your git friends (the last one)?

But by normalizing it, it would just be two of the other cases where
the patch does not apply, no?

By not normalizing it, a patch made against the head of a clone of the
same repository would apply at least.

Or am I missing something here?


> This patch doesn't touch "apply --cached" at all, which introduces yet
> more unnecessary inconsistency.  If you made changes to the index through
> that codepath, shouldn't the resulting object lose the $Id: stale garbage$
> somewhere before it is made into the next commit?

Foreign idents should always be removed by a commit which alters the
file in any other way, so there might be additional handling needed
for this case, I'll have to take a closer look at it.


> My gut feeling is that this kind of complications aren't worth it.  If we
> were to address the $Id$ issue, I wonder if we can fix it the other way
> around, by making both directions always convert (i.e. ident_to_worktree()
> and ident_to_git()); the end result of such a change feels much simpler to
> explain to the users.

Well, that's how it worked before 07814d9.


>> +enum normalize_mode {
>> +        NORMALIZE_FOR_COMPARE = 0,
>> +	NORMALIZE_FOR_COMMIT = 1,
>> +};
>
> Funny use of whitespaces.

Yes, it seems one of the lines got 8 spaces and the other one a tab.
Not intentional.


>> @@ -797,5 +810,5 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str
>>  		src = dst->buf;
>>  		len = dst->len;
>>  	}
>> -	return ret | convert_to_git(path, src, len, dst, CHECKS_DISALLOWED);
>> +	return ret | convert_to_git(path, src, len, dst, CHECKS_DISALLOWED, NORMALIZE_FOR_COMPARE);
>
> This is called from merge operation to read from the object store, and
> apply double-conversion (first from git to working tree and then back to
> git).  But the result is written out as an object and hopefully be
> recorded as a merge commit.  Why "for compare"?  We would want a normalized
> result, no?

Ok, this was a new function and I didn't fully understand its purpose.
If the resulting buffer is written as an object, then "for commit"
should be used, yes.


>> @@ -2375,7 +2375,7 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
>>  		/*
>>  		 * Convert from working tree format to canonical git format
>>  		 */
>> -		if (convert_to_git(s->path, s->data, s->size, &buf, CHECKS_ALLOWED)) {
>> +		if (convert_to_git(s->path, s->data, s->size, &buf, CHECKS_ALLOWED, NORMALIZE_FOR_COMPARE)) {
>
> This feels wrong.  This is a "read-only" access just like combine-diff and
> blame one.  I think CHECKS_ALLOWED is probably a bug in the original.
> There is no reason to complain against what you haven't added.

Yes, I was also a bit curious about this one.  If it is indeed a bug,
it probably ought to be fixed independently of this patch series.


> *2* That is the kind of option that specifies what it _means_, not what or
> how it does, I suggested you to think about in the first round of review.
> "Do we allow checks?" wasn't the kind of a meaningful option I was hoping
> to see; we would want the code to clarify _why_ we allow or forego checks
> at individual callsites.

Well, maybe it was a little unrealistic to hope that someone new to
the codebase should be able to discern exactly what you guys were
thinking when you wrote a particular piece of code...  ;-)


  // Marcus

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2010-09-18 22:38 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-09-18 14:30 [PATCH v3 0/4] fix normalization of foreign idents Marcus Comstedt
2010-08-23  7:05 ` [PATCH v3 2/4] convert: " Marcus Comstedt
2010-09-18 21:31   ` Junio C Hamano
2010-09-18 22:38     ` Marcus Comstedt
2010-09-07 19:16 ` [PATCH v3 3/4] t0021: test checkout and commit " Marcus Comstedt
2010-09-13 22:00 ` [PATCH v3 1/4] convert: generalize checksafe parameter Marcus Comstedt
2010-09-18 13:53 ` [PATCH v3 4/4] Documentation: document foreign idents Marcus Comstedt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).