git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/3] tree-walk: learn get_tree_entry_follow_symlinks
@ 2015-05-08 22:38 dturner
  2015-05-08 22:38 ` [PATCH v2 2/3] sha1_name: get_sha1_with_context learns to follow symlinks dturner
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: dturner @ 2015-05-08 22:38 UTC (permalink / raw)
  To: git; +Cc: David Turner

From: David Turner <dturner@twitter.com>

Add a new function, get_tree_entry_follow_symlinks, to tree-walk.[ch].
The function is not yet used.  It will be used to implement git
cat-file --batch --follow-symlinks.

The function locates an object by path, following symlinks in the
repository.  If the symlinks lead outside the repository, the function
reports this to the caller.

Signed-off-by: David Turner <dturner@twitter.com>
---
 tree-walk.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tree-walk.h |   2 +
 2 files changed, 195 insertions(+)

diff --git a/tree-walk.c b/tree-walk.c
index 5dd9a71..2df31a2 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -415,6 +415,12 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 	return error;
 }
 
+struct dir_state {
+	void *tree;
+	unsigned long size;
+	unsigned char sha1[20];
+};
+
 static int find_tree_entry(struct tree_desc *t, const char *name, unsigned char *result, unsigned *mode)
 {
 	int namelen = strlen(name);
@@ -478,6 +484,193 @@ int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned ch
 	return retval;
 }
 
+/* This is Linux's built-in max for the number of symlinks to follow.
+ * That limit, of course, does not affect git, but it's a reasonable
+ * choice.
+ */
+#define GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS 40
+
+/**
+ * Find a tree entry by following symlinks in tree_sha (which is
+ * assumed to be the root of the repository).  In the event that a
+ * symlink points outside the repository (e.g. a link to /foo or a
+ * root-level link to ../foo), the portion of the link which is
+ * outside the repository will be copied into result_path, and *mode
+ * will be set to 0.  Otherwise, result will be filled in with the
+ * sha1 of the found object, and *mode will hold the mode of the
+ * object.
+ */
+int get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, unsigned char *result_path, unsigned *mode)
+{
+	int retval = -1;
+	void *tree;
+	struct dir_state *parents = NULL;
+	size_t parents_alloc = 0;
+	ssize_t parents_nr = 0;
+	unsigned long size;
+	unsigned char root[20];
+	unsigned char current_tree_sha1[20];
+	struct strbuf namebuf = STRBUF_INIT;
+	enum object_type type;
+	int already_have_tree = 0;
+	struct tree_desc t = {0};
+	int follows_remaining = GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS;
+	int i;
+
+	strbuf_addstr(&namebuf, name);
+	hashcpy(current_tree_sha1, tree_sha1);
+
+	while (1) {
+		char *first_slash;
+		char *remainder = NULL;
+		int find_result;
+
+		if (!t.buffer) {
+			tree = read_object_with_reference(current_tree_sha1,
+							  tree_type, &size,
+							  root);
+			if (!tree)
+				goto done;
+
+			ALLOC_GROW(parents, parents_nr + 1, parents_alloc);
+			parents[parents_nr].tree = tree;
+			parents[parents_nr].size = size;
+			hashcpy(parents[parents_nr].sha1, root);
+
+			parents_nr++;
+
+			if (namebuf.buf[0] == '\0') {
+				hashcpy(result, root);
+				retval = 0;
+				goto done;
+			}
+
+			if (!size)
+				goto done;
+
+			/* descend */
+			init_tree_desc(&t, tree, size);
+		}
+
+		/* Handle symlinks to e.g. a//b by removing leading slashes */
+		while (namebuf.buf[0] == '/') {
+			strbuf_remove(&namebuf, 0, 1);
+		}
+
+		/* Split namebuf into a first component and a
+		 * remainder */
+		if ((first_slash = strchr(namebuf.buf, '/'))) {
+			*first_slash = 0;
+			remainder = first_slash + 1;
+		}
+
+		if (!strcmp(namebuf.buf, "..")) {
+			struct dir_state *parent;
+			/* We could end up with .. in the namebuf if
+			 * it appears in a symlink. */
+
+			if (parents_nr == 1) {
+				if (remainder)
+					*first_slash = '/';
+				if (strlcpy(result_path, namebuf.buf,
+					    PATH_MAX) < PATH_MAX) {
+					*mode = 0;
+					retval = 0;
+				}
+				goto done;
+			}
+			parent = &parents[parents_nr - 1];
+			free(parent->tree);
+			parents_nr--;
+			parent = &parents[parents_nr - 1];
+			init_tree_desc(&t, parent->tree, parent->size);
+			strbuf_remove(&namebuf, 0, remainder ? 3 : 2);
+			continue;
+		}
+
+		/* We could end up here via a symlink to dir/.. */
+		if (namebuf.buf[0] == '\0') {
+			hashcpy(result, parents[parents_nr - 1].sha1);
+			retval = 0;
+			goto done;
+		}
+
+		/* Look up the first (or only) path component
+		 * in the tree. */
+		find_result = find_tree_entry(&t, namebuf.buf,
+					      current_tree_sha1, mode);
+		if (find_result) {
+			retval = find_result;
+			goto done;
+		}
+
+		if (S_ISDIR(*mode)) {
+			if (!remainder) {
+				hashcpy(result, current_tree_sha1);
+				retval = 0;
+				goto done;
+			}
+			/* Descend the tree */
+			t.buffer = NULL;
+			strbuf_remove(&namebuf, 0,
+				      1 + first_slash - namebuf.buf);
+		} else if (S_ISREG(*mode)) {
+			if (!remainder) {
+				hashcpy(result, current_tree_sha1);
+				retval = 0;
+			}
+			goto done;
+		} else if (S_ISLNK(*mode)) {
+			/* Follow a symlink */
+			size_t link_len, len;
+			char *contents, *contents_start;
+			struct dir_state *parent;
+
+			if (follows_remaining-- == 0)
+				/* Too many symlinks followed */
+				goto done;
+
+			contents = read_sha1_file(current_tree_sha1, &type,
+						  &link_len);
+
+			if (!contents)
+				goto done;
+
+			if (contents[0] == '/') {
+				if (strlcpy(result_path,
+					    contents, PATH_MAX) < PATH_MAX) {
+					*mode = 0;
+					retval = 0;
+				}
+				goto done;
+			}
+
+			if (remainder)
+				len = first_slash - namebuf.buf;
+			else
+				len = namebuf.len;
+
+			contents_start = contents;
+
+			parent = &parents[parents_nr - 1];
+			init_tree_desc(&t, parent->tree, parent->size);
+			strbuf_splice(&namebuf, 0, len,
+				      contents_start, link_len);
+			if (remainder)
+				namebuf.buf[link_len] = '/';
+			free(contents);
+		}
+	}
+done:
+	for (i = 0; i < parents_nr; ++i) {
+		free(parents[i].tree);
+	}
+	free(parents);
+
+	strbuf_release(&namebuf);
+	return retval;
+}
+
 static int match_entry(const struct pathspec_item *item,
 		       const struct name_entry *entry, int pathlen,
 		       const char *match, int matchlen,
diff --git a/tree-walk.h b/tree-walk.h
index ae7fb3a..d9ad768 100644
--- a/tree-walk.h
+++ b/tree-walk.h
@@ -40,6 +40,8 @@ struct traverse_info;
 typedef int (*traverse_callback_t)(int n, unsigned long mask, unsigned long dirmask, struct name_entry *entry, struct traverse_info *);
 int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info);
 
+int get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, unsigned char *result_path, unsigned *mode);
+
 struct traverse_info {
 	struct traverse_info *prev;
 	struct name_entry name;
-- 
2.0.4.315.gad8727a-twtrsrc

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 2/3] sha1_name: get_sha1_with_context learns to follow symlinks
  2015-05-08 22:38 [PATCH v2 1/3] tree-walk: learn get_tree_entry_follow_symlinks dturner
@ 2015-05-08 22:38 ` dturner
  2015-05-09  3:45   ` Junio C Hamano
  2015-05-08 22:38 ` [PATCH v2 3/3] cat-file: add --follow-symlinks to --batch dturner
  2015-05-09  5:00 ` [PATCH v2 1/3] tree-walk: learn get_tree_entry_follow_symlinks Junio C Hamano
  2 siblings, 1 reply; 8+ messages in thread
From: dturner @ 2015-05-08 22:38 UTC (permalink / raw)
  To: git; +Cc: David Turner

From: David Turner <dturner@twitter.com>

Wire up get_sha1_with_context to call get_tree_entry_follow_symlinks
when GET_SHA1_FOLLOW_SYMLINKS is passed in flags. G_S_FOLLOW_SYMLINKS
is incompatible with G_S_ONLY_TO_DIE because the diagnosis that
ONLY_TO_DIE triggers does not consider symlinks.

Signed-off-by: David Turner <dturner@twitter.com>
---
 cache.h     | 15 ++++++++-------
 sha1_name.c | 24 ++++++++++++++++--------
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/cache.h b/cache.h
index 3d3244b..16743de 100644
--- a/cache.h
+++ b/cache.h
@@ -924,13 +924,14 @@ struct object_context {
 	unsigned mode;
 };
 
-#define GET_SHA1_QUIETLY        01
-#define GET_SHA1_COMMIT         02
-#define GET_SHA1_COMMITTISH     04
-#define GET_SHA1_TREE          010
-#define GET_SHA1_TREEISH       020
-#define GET_SHA1_BLOB	       040
-#define GET_SHA1_ONLY_TO_DIE 04000
+#define GET_SHA1_QUIETLY           01
+#define GET_SHA1_COMMIT            02
+#define GET_SHA1_COMMITTISH        04
+#define GET_SHA1_TREE             010
+#define GET_SHA1_TREEISH          020
+#define GET_SHA1_BLOB             040
+#define GET_SHA1_FOLLOW_SYMLINKS 0100
+#define GET_SHA1_ONLY_TO_DIE    04000
 
 extern int get_sha1(const char *str, unsigned char *sha1);
 extern int get_sha1_commit(const char *str, unsigned char *sha1);
diff --git a/sha1_name.c b/sha1_name.c
index 6d10f05..cbe679e 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -1434,15 +1434,21 @@ static int get_sha1_with_context_1(const char *name,
 			new_filename = resolve_relative_path(filename);
 			if (new_filename)
 				filename = new_filename;
-			ret = get_tree_entry(tree_sha1, filename, sha1, &oc->mode);
-			if (ret && only_to_die) {
-				diagnose_invalid_sha1_path(prefix, filename,
-							   tree_sha1,
-							   name, len);
+			if (flags & GET_SHA1_FOLLOW_SYMLINKS) {
+				ret = get_tree_entry_follow_symlinks(tree_sha1,
+					filename, sha1, oc->path, &oc->mode);
+			} else {
+				ret = get_tree_entry(tree_sha1, filename,
+						     sha1, &oc->mode);
+				if (ret && only_to_die) {
+					diagnose_invalid_sha1_path(prefix,
+								   filename,
+								   tree_sha1,
+								   name, len);
+				}
+				hashcpy(oc->tree, tree_sha1);
+				strlcpy(oc->path, filename, sizeof(oc->path));
 			}
-			hashcpy(oc->tree, tree_sha1);
-			strlcpy(oc->path, filename, sizeof(oc->path));
-
 			free(new_filename);
 			return ret;
 		} else {
@@ -1469,5 +1475,7 @@ void maybe_die_on_misspelt_object_name(const char *name, const char *prefix)
 
 int get_sha1_with_context(const char *str, unsigned flags, unsigned char *sha1, struct object_context *orc)
 {
+	if (flags & GET_SHA1_FOLLOW_SYMLINKS && flags & GET_SHA1_ONLY_TO_DIE)
+		die("BUG: incompatible flags for get_sha1_with_context");
 	return get_sha1_with_context_1(str, flags, NULL, sha1, orc);
 }
-- 
2.0.4.315.gad8727a-twtrsrc

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 3/3] cat-file: add --follow-symlinks to --batch
  2015-05-08 22:38 [PATCH v2 1/3] tree-walk: learn get_tree_entry_follow_symlinks dturner
  2015-05-08 22:38 ` [PATCH v2 2/3] sha1_name: get_sha1_with_context learns to follow symlinks dturner
@ 2015-05-08 22:38 ` dturner
  2015-05-09  5:00 ` [PATCH v2 1/3] tree-walk: learn get_tree_entry_follow_symlinks Junio C Hamano
  2 siblings, 0 replies; 8+ messages in thread
From: dturner @ 2015-05-08 22:38 UTC (permalink / raw)
  To: git; +Cc: David Turner

From: David Turner <dturner@twitter.com>

This wires the in-repo-symlink following code through to the cat-file
builtin.  In the event of an out-of-repo link, cat-file will print
the link in a new format.

Signed-off-by: David Turner <dturner@twitter.com>
---
 Documentation/git-cat-file.txt |  28 ++++++-
 builtin/cat-file.c             |  23 +++++-
 t/t1006-cat-file.sh            | 184 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 230 insertions(+), 5 deletions(-)

diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index f6a16f4..18b67a3 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -10,7 +10,7 @@ SYNOPSIS
 --------
 [verse]
 'git cat-file' (-t | -s | -e | -p | <type> | --textconv ) <object>
-'git cat-file' (--batch | --batch-check) < <list-of-objects>
+'git cat-file' (--batch | --batch-check) [--follow-symlinks] < <list-of-objects>
 
 DESCRIPTION
 -----------
@@ -69,6 +69,19 @@ OPTIONS
 	not be combined with any other options or arguments.  See the
 	section `BATCH OUTPUT` below for details.
 
+--follow-symlinks::
+	Follow symlinks inside the repository.  Instead of providing
+	output about the link itself, provide output about the linked-to
+	object.  This option requires --batch or --batch-check.  In the
+	event of a symlink loop (or more than 40 symlinks in a symlink
+	resolution chain), the file will be treated as missing.  If a
+	symlink points outside the repository (e.g. a link to /foo or a
+	root-level link to ../foo), the portion of the link which is
+	outside the repository will be printed.  Follow-symlinks will
+	be silently turned off if <object> specifies an object in the
+	index rather than one in the object database.
+
+
 OUTPUT
 ------
 If '-t' is specified, one of the <type>.
@@ -148,6 +161,19 @@ the repository, then `cat-file` will ignore any custom format and print:
 <object> SP missing LF
 ------------
 
+If --follow-symlinks is used, and a symlink in the repository points
+outside the repository, then `cat-file` will ignore any custom format
+and print:
+
+------------
+symlink SP <size> LF <symlink> LF
+------------
+
+The symlink will either be absolute (beginning with a /), or relative
+to the repository root.  For instance, if dir/link points to ../../foo,
+then <symlink> will be ../foo.  <size> is the size of the symlink in
+bytes.
+
 
 CAVEATS
 -------
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index df99df4..8e96dac 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -224,6 +224,7 @@ static void print_object_or_die(int fd, struct expand_data *data)
 
 struct batch_options {
 	int enabled;
+	int follow_symlinks;
 	int print_contents;
 	const char *format;
 };
@@ -232,16 +233,24 @@ static int batch_one_object(const char *obj_name, struct batch_options *opt,
 			    struct expand_data *data)
 {
 	struct strbuf buf = STRBUF_INIT;
+	struct object_context ctx;
+	int flags = opt->follow_symlinks ? GET_SHA1_FOLLOW_SYMLINKS : 0;
 
 	if (!obj_name)
 	   return 1;
 
-	if (get_sha1(obj_name, data->sha1)) {
+	if (get_sha1_with_context(obj_name, flags, data->sha1, &ctx)) {
 		printf("%s missing\n", obj_name);
 		fflush(stdout);
 		return 0;
 	}
 
+	if (ctx.mode == 0) {
+		printf("symlink %"PRIuMAX"\n%s\n", (uintmax_t)strlen(ctx.path),
+		       ctx.path);
+		return 0;
+	}
+
 	if (sha1_object_info_extended(data->sha1, &data->info, LOOKUP_REPLACE_OBJECT) < 0) {
 		printf("%s missing\n", obj_name);
 		fflush(stdout);
@@ -342,9 +351,8 @@ static int batch_option_callback(const struct option *opt,
 {
 	struct batch_options *bo = opt->value;
 
-	if (unset) {
-		memset(bo, 0, sizeof(*bo));
-		return 0;
+	if (bo->enabled) {
+		return 1;
 	}
 
 	bo->enabled = 1;
@@ -369,6 +377,9 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 		OPT_SET_INT('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
 		OPT_SET_INT(0, "textconv", &opt,
 			    N_("for blob objects, run textconv on object's content"), 'c'),
+		OPT_SET_INT(0, "follow-symlinks", &batch.follow_symlinks,
+			N_("follow in-repo symlinks; report out-of-repo symlinks (requires --batch or --batch-check)"),
+			    1),
 		{ OPTION_CALLBACK, 0, "batch", &batch, "format",
 			N_("show info and content of objects fed from the standard input"),
 			PARSE_OPT_OPTARG, batch_option_callback },
@@ -402,6 +413,10 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
 		usage_with_options(cat_file_usage, options);
 	}
 
+	if (batch.follow_symlinks && !batch.enabled) {
+		usage_with_options(cat_file_usage, options);
+	}
+
 	if (batch.enabled)
 		return batch_objects(&batch);
 
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index ab36b1e..a9ef3a6 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -189,6 +189,13 @@ do
     '
 done
 
+for opt in t s e p
+do
+    test_expect_success "Passing -$opt with --follow-symlinks fails" '
+	    test_must_fail git cat-file --follow-symlinks -$opt $hello_sha1
+	'
+done
+
 test_expect_success "--batch-check for a non-existent named object" '
     test "foobar42 missing
 foobar84 missing" = \
@@ -296,4 +303,181 @@ test_expect_success '%(deltabase) reports packed delta bases' '
 	}
 '
 
+# Tests for git cat-file --follow-symlinks
+test_expect_success 'prep for symlink tests' '
+	echo_without_newline "$hello_content" > morx &&
+	ln -s morx same-dir-link &&
+	ln -s ../fleem out-of-repo-link &&
+	ln -s .. out-of-repo-link-dir &&
+	ln -s same-dir-link link-to-link &&
+	ln -s nope broken-same-dir-link &&
+	mkdir dir &&
+	ln -s ../morx dir/parent-dir-link &&
+	ln -s .. dir/link-dir &&
+	ln -s ../../escape dir/out-of-repo-link &&
+	ln -s ../.. dir/out-of-repo-link-dir &&
+	ln -s nope dir/broken-link-in-dir &&
+	mkdir dir/subdir &&
+	ln -s ../../morx dir/subdir/grandparent-dir-link &&
+	ln -s ../../../great-escape dir/subdir/out-of-repo-link &&
+	ln -s ../../.. dir/subdir/out-of-repo-link-dir &&
+	ln -s ../../../ dir/subdir/out-of-repo-link-dir-trailing &&
+	ln -s ../parent-dir-link dir/subdir/parent-dir-link-to-link &&
+	echo_without_newline "$hello_content" >dir/subdir/ind2 &&
+	echo_without_newline "$hello_content" >dir/ind1 &&
+	ln -s dir dirlink &&
+	ln -s dir/subdir subdirlink &&
+	ln -s subdir/ind2 dir/link-to-child &&
+	ln -s dir/link-to-child link-to-down-link &&
+	ln -s dir/.. up-down &&
+	ln -s dir/../ up-down-trailing &&
+	ln -s dir/../morx up-down-file &&
+	ln -s dir/../../morx up-up-down-file &&
+	ln -s subdirlink/../../morx up-two-down-file &&
+	ln -s loop1 loop2 &&
+	ln -s loop2 loop1 &&
+	git add . &&
+	git commit -am "test"
+'
+
+echo $hello_sha1 blob $hello_size > found
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for non-links' '
+	echo HEAD:morx | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp found actual &&
+	echo HEAD:nope missing > expect &&
+	echo HEAD:nope | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for in-repo, same-dir links' '
+	echo HEAD:same-dir-link | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp found actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for broken in-repo, same-dir links' '
+	echo HEAD:broken-same-dir-link missing > expect &&
+	echo HEAD:broken-same-dir-link | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for same-dir links-to-links' '
+	echo HEAD:link-to-link | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp found actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for parent-dir links' '
+	echo HEAD:dir/parent-dir-link | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp found actual &&
+	echo HEAD:dir/parent-dir-link/nope missing > expect &&
+	echo HEAD:dir/parent-dir-link/nope | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for .. links' '
+	echo HEAD:dir/link-dir/nope missing > expect &&
+	echo HEAD:dir/link-dir/nope | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual &&
+	echo HEAD:dir/link-dir/morx | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp found actual &&
+	echo HEAD:dir/broken-link-in-dir missing > expect &&
+	echo HEAD:dir/broken-link-in-dir | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for ../.. links' '
+	echo HEAD:dir/subdir/grandparent-dir-link/nope missing > expect &&
+	echo HEAD:dir/subdir/grandparent-dir-link/nope | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual &&
+	echo HEAD:dir/subdir/grandparent-dir-link | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp found actual &&
+	echo HEAD:dir/subdir/parent-dir-link-to-link | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp found actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for dir/ links' '
+	echo HEAD:dirlink/morx missing > expect &&
+	echo HEAD:dirlink/morx | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual &&
+	echo $hello_sha1 blob $hello_size > expect &&
+	echo HEAD:dirlink/ind1 | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for dir/subdir links' '
+	echo HEAD:subdirlink/morx missing > expect &&
+	echo HEAD:subdirlink/morx | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual &&
+	echo HEAD:subdirlink/ind2 | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp found actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for dir -> subdir links' '
+	echo HEAD:dir/link-to-child/morx missing > expect &&
+	echo HEAD:dir/link-to-child/morx | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual &&
+	echo HEAD:dir/link-to-child | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp found actual &&
+	echo HEAD:link-to-down-link | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp found actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for out-of-repo symlinks' '
+	echo symlink 8 > expect &&
+	echo ../fleem >> expect &&
+	echo HEAD:out-of-repo-link | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual &&
+	echo symlink 2 > expect &&
+	echo .. >> expect &&
+	echo HEAD:out-of-repo-link-dir | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for out-of-repo symlinks in dirs' '
+	echo symlink 9 > expect &&
+	echo ../escape >> expect &&
+	echo HEAD:dir/out-of-repo-link | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual &&
+	echo symlink 2 > expect &&
+	echo .. >> expect &&
+	echo HEAD:dir/out-of-repo-link-dir | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for out-of-repo symlinks in subdirs' '
+	echo symlink 15 > expect &&
+	echo ../great-escape >> expect &&
+	echo HEAD:dir/subdir/out-of-repo-link | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual &&
+	echo symlink 2 > expect &&
+	echo .. >> expect &&
+	echo HEAD:dir/subdir/out-of-repo-link-dir | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual &&
+	echo symlink 3 > expect &&
+	echo ../ >> expect &&
+	echo HEAD:dir/subdir/out-of-repo-link-dir-trailing | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlinks works for symlinks with internal ..' '
+	echo HEAD: | git cat-file --batch-check > expect &&
+	echo HEAD:up-down | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual &&
+	echo HEAD:up-down-trailing | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual &&
+	echo HEAD:up-down-file | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp found actual &&
+	echo symlink 7 > expect &&
+	echo ../morx >> expect &&
+	echo HEAD:up-up-down-file | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual &&
+	echo HEAD:up-two-down-file | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp found actual
+'
+
+test_expect_success 'git cat-file --batch-check --follow-symlink breaks loops' '
+	echo HEAD:loop1 missing > expect &&
+	echo HEAD:loop1 | git cat-file --batch-check --follow-symlinks > actual &&
+	test_cmp expect actual
+'
 test_done
-- 
2.0.4.315.gad8727a-twtrsrc

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 2/3] sha1_name: get_sha1_with_context learns to follow symlinks
  2015-05-08 22:38 ` [PATCH v2 2/3] sha1_name: get_sha1_with_context learns to follow symlinks dturner
@ 2015-05-09  3:45   ` Junio C Hamano
  2015-05-09 17:39     ` Junio C Hamano
  2015-05-09 20:02     ` David Turner
  0 siblings, 2 replies; 8+ messages in thread
From: Junio C Hamano @ 2015-05-09  3:45 UTC (permalink / raw)
  To: dturner; +Cc: git, David Turner

dturner@twopensource.com writes:

> From: David Turner <dturner@twitter.com>
>
> Wire up get_sha1_with_context to call get_tree_entry_follow_symlinks
> when GET_SHA1_FOLLOW_SYMLINKS is passed in flags. G_S_FOLLOW_SYMLINKS
> is incompatible with G_S_ONLY_TO_DIE because the diagnosis that
> ONLY_TO_DIE triggers does not consider symlinks.

Is "does not consider" something fundamental, or it just happens to
be that way right now?

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/3] tree-walk: learn get_tree_entry_follow_symlinks
  2015-05-08 22:38 [PATCH v2 1/3] tree-walk: learn get_tree_entry_follow_symlinks dturner
  2015-05-08 22:38 ` [PATCH v2 2/3] sha1_name: get_sha1_with_context learns to follow symlinks dturner
  2015-05-08 22:38 ` [PATCH v2 3/3] cat-file: add --follow-symlinks to --batch dturner
@ 2015-05-09  5:00 ` Junio C Hamano
  2 siblings, 0 replies; 8+ messages in thread
From: Junio C Hamano @ 2015-05-09  5:00 UTC (permalink / raw)
  To: dturner; +Cc: git, David Turner

dturner@twopensource.com writes:

> From: David Turner <dturner@twitter.com>
>
> Add a new function, get_tree_entry_follow_symlinks, to tree-walk.[ch].
> The function is not yet used.  It will be used to implement git
> cat-file --batch --follow-symlinks.
>
> The function locates an object by path, following symlinks in the
> repository.  If the symlinks lead outside the repository, the function
> reports this to the caller.
>
> Signed-off-by: David Turner <dturner@twitter.com>
> ---
>  tree-walk.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  tree-walk.h |   2 +
>  2 files changed, 195 insertions(+)

Overall this looks fairly straight-forward and much cleaner than I
feared it would end up to be.

	Side Note: Earlier I stupidly thought that it may have to
	involve ugly code to avoid penalizing common cases by first
	attempting to resolve the whole thing (e.g. a/b/c) and fall
	back to "is there any symbolic link?" codepath only after
	seeing it fail (i.e. if we can find "a/b/c" in the tree, by
	definition "a/" and "a/b/" cannot be a symbolic link).  But
	of course that is a silly thought. As this implementation
	makes it clear, for us to resolve "a/b/c" in the common case
	without symbolic link, we need to incrementally find "a" in
	the root, then find "b" in that "a", and then find "c" in
	"a/b" _anyway_.



I'll leave others (or maybe tomorrow's myself ;-) to nitpick on
styles, code layout and possibly logic errors (if any), but from
a cursory read, I like the general structure of this patch a lot.

Good job.

Thanks.

> diff --git a/tree-walk.c b/tree-walk.c
> index 5dd9a71..2df31a2 100644
> --- a/tree-walk.c
> +++ b/tree-walk.c
> @@ -415,6 +415,12 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
>  	return error;
>  }
>  
> +struct dir_state {
> +	void *tree;
> +	unsigned long size;
> +	unsigned char sha1[20];
> +};
> +
>  static int find_tree_entry(struct tree_desc *t, const char *name, unsigned char *result, unsigned *mode)
>  {
>  	int namelen = strlen(name);
> @@ -478,6 +484,193 @@ int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned ch
>  	return retval;
>  }
>  
> +/* This is Linux's built-in max for the number of symlinks to follow.
> + * That limit, of course, does not affect git, but it's a reasonable
> + * choice.
> + */
> +#define GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS 40
> +
> +/**
> + * Find a tree entry by following symlinks in tree_sha (which is
> + * assumed to be the root of the repository).  In the event that a
> + * symlink points outside the repository (e.g. a link to /foo or a
> + * root-level link to ../foo), the portion of the link which is
> + * outside the repository will be copied into result_path, and *mode
> + * will be set to 0.  Otherwise, result will be filled in with the
> + * sha1 of the found object, and *mode will hold the mode of the
> + * object.
> + */
> +int get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, unsigned char *result_path, unsigned *mode)
> +{
> +	int retval = -1;
> +	void *tree;
> +	struct dir_state *parents = NULL;
> +	size_t parents_alloc = 0;
> +	ssize_t parents_nr = 0;
> +	unsigned long size;
> +	unsigned char root[20];
> +	unsigned char current_tree_sha1[20];
> +	struct strbuf namebuf = STRBUF_INIT;
> +	enum object_type type;
> +	int already_have_tree = 0;
> +	struct tree_desc t = {0};
> +	int follows_remaining = GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS;
> +	int i;
> +
> +	strbuf_addstr(&namebuf, name);
> +	hashcpy(current_tree_sha1, tree_sha1);
> +
> +	while (1) {
> +		char *first_slash;
> +		char *remainder = NULL;
> +		int find_result;
> +
> +		if (!t.buffer) {
> +			tree = read_object_with_reference(current_tree_sha1,
> +							  tree_type, &size,
> +							  root);
> +			if (!tree)
> +				goto done;
> +
> +			ALLOC_GROW(parents, parents_nr + 1, parents_alloc);
> +			parents[parents_nr].tree = tree;
> +			parents[parents_nr].size = size;
> +			hashcpy(parents[parents_nr].sha1, root);
> +
> +			parents_nr++;
> +
> +			if (namebuf.buf[0] == '\0') {
> +				hashcpy(result, root);
> +				retval = 0;
> +				goto done;
> +			}
> +
> +			if (!size)
> +				goto done;
> +
> +			/* descend */
> +			init_tree_desc(&t, tree, size);
> +		}
> +
> +		/* Handle symlinks to e.g. a//b by removing leading slashes */
> +		while (namebuf.buf[0] == '/') {
> +			strbuf_remove(&namebuf, 0, 1);
> +		}
> +
> +		/* Split namebuf into a first component and a
> +		 * remainder */
> +		if ((first_slash = strchr(namebuf.buf, '/'))) {
> +			*first_slash = 0;
> +			remainder = first_slash + 1;
> +		}
> +
> +		if (!strcmp(namebuf.buf, "..")) {
> +			struct dir_state *parent;
> +			/* We could end up with .. in the namebuf if
> +			 * it appears in a symlink. */
> +
> +			if (parents_nr == 1) {
> +				if (remainder)
> +					*first_slash = '/';
> +				if (strlcpy(result_path, namebuf.buf,
> +					    PATH_MAX) < PATH_MAX) {
> +					*mode = 0;
> +					retval = 0;
> +				}
> +				goto done;
> +			}
> +			parent = &parents[parents_nr - 1];
> +			free(parent->tree);
> +			parents_nr--;
> +			parent = &parents[parents_nr - 1];
> +			init_tree_desc(&t, parent->tree, parent->size);
> +			strbuf_remove(&namebuf, 0, remainder ? 3 : 2);
> +			continue;
> +		}
> +
> +		/* We could end up here via a symlink to dir/.. */
> +		if (namebuf.buf[0] == '\0') {
> +			hashcpy(result, parents[parents_nr - 1].sha1);
> +			retval = 0;
> +			goto done;
> +		}
> +
> +		/* Look up the first (or only) path component
> +		 * in the tree. */
> +		find_result = find_tree_entry(&t, namebuf.buf,
> +					      current_tree_sha1, mode);
> +		if (find_result) {
> +			retval = find_result;
> +			goto done;
> +		}
> +
> +		if (S_ISDIR(*mode)) {
> +			if (!remainder) {
> +				hashcpy(result, current_tree_sha1);
> +				retval = 0;
> +				goto done;
> +			}
> +			/* Descend the tree */
> +			t.buffer = NULL;
> +			strbuf_remove(&namebuf, 0,
> +				      1 + first_slash - namebuf.buf);
> +		} else if (S_ISREG(*mode)) {
> +			if (!remainder) {
> +				hashcpy(result, current_tree_sha1);
> +				retval = 0;
> +			}
> +			goto done;
> +		} else if (S_ISLNK(*mode)) {
> +			/* Follow a symlink */
> +			size_t link_len, len;
> +			char *contents, *contents_start;
> +			struct dir_state *parent;
> +
> +			if (follows_remaining-- == 0)
> +				/* Too many symlinks followed */
> +				goto done;
> +
> +			contents = read_sha1_file(current_tree_sha1, &type,
> +						  &link_len);
> +
> +			if (!contents)
> +				goto done;
> +
> +			if (contents[0] == '/') {
> +				if (strlcpy(result_path,
> +					    contents, PATH_MAX) < PATH_MAX) {
> +					*mode = 0;
> +					retval = 0;
> +				}
> +				goto done;
> +			}
> +
> +			if (remainder)
> +				len = first_slash - namebuf.buf;
> +			else
> +				len = namebuf.len;
> +
> +			contents_start = contents;
> +
> +			parent = &parents[parents_nr - 1];
> +			init_tree_desc(&t, parent->tree, parent->size);
> +			strbuf_splice(&namebuf, 0, len,
> +				      contents_start, link_len);
> +			if (remainder)
> +				namebuf.buf[link_len] = '/';
> +			free(contents);
> +		}
> +	}
> +done:
> +	for (i = 0; i < parents_nr; ++i) {
> +		free(parents[i].tree);
> +	}
> +	free(parents);
> +
> +	strbuf_release(&namebuf);
> +	return retval;
> +}
> +
>  static int match_entry(const struct pathspec_item *item,
>  		       const struct name_entry *entry, int pathlen,
>  		       const char *match, int matchlen,
> diff --git a/tree-walk.h b/tree-walk.h
> index ae7fb3a..d9ad768 100644
> --- a/tree-walk.h
> +++ b/tree-walk.h
> @@ -40,6 +40,8 @@ struct traverse_info;
>  typedef int (*traverse_callback_t)(int n, unsigned long mask, unsigned long dirmask, struct name_entry *entry, struct traverse_info *);
>  int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info);
>  
> +int get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, unsigned char *result_path, unsigned *mode);
> +
>  struct traverse_info {
>  	struct traverse_info *prev;
>  	struct name_entry name;

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 2/3] sha1_name: get_sha1_with_context learns to follow symlinks
  2015-05-09  3:45   ` Junio C Hamano
@ 2015-05-09 17:39     ` Junio C Hamano
  2015-05-09 20:34       ` David Turner
  2015-05-09 20:02     ` David Turner
  1 sibling, 1 reply; 8+ messages in thread
From: Junio C Hamano @ 2015-05-09 17:39 UTC (permalink / raw)
  To: dturner; +Cc: git, David Turner

Junio C Hamano <gitster@pobox.com> writes:

> dturner@twopensource.com writes:
>
>> From: David Turner <dturner@twitter.com>
>>
>> Wire up get_sha1_with_context to call get_tree_entry_follow_symlinks
>> when GET_SHA1_FOLLOW_SYMLINKS is passed in flags. G_S_FOLLOW_SYMLINKS
>> is incompatible with G_S_ONLY_TO_DIE because the diagnosis that
>> ONLY_TO_DIE triggers does not consider symlinks.
>
> Is "does not consider" something fundamental, or it just happens to
> be that way right now?

Regardless of the answer to this question, I find the last part of
this hunk puzzling.


> +			if (flags & GET_SHA1_FOLLOW_SYMLINKS) {
> +				ret = get_tree_entry_follow_symlinks(tree_sha1,
> +					filename, sha1, oc->path, &oc->mode);
> +			} else {
> +				ret = get_tree_entry(tree_sha1, filename,
> +						     sha1, &oc->mode);
> +				if (ret && only_to_die) {
> +					diagnose_invalid_sha1_path(prefix,
> +								   filename,
> +								   tree_sha1,
> +								   name, len);
> +				}
> +				hashcpy(oc->tree, tree_sha1);
> +				strlcpy(oc->path, filename, sizeof(oc->path));
>  			}
> -			hashcpy(oc->tree, tree_sha1);
> -			strlcpy(oc->path, filename, sizeof(oc->path));
> -

Both variants of get_tree_entry() receive tree_sha1 and &oc->mode as
places to store the discovered results and that is why we do hashcpy
and strlcpy in the original codepath.

 - With your patch, the new codepath discards tree_sha1[] because it
   lost the copy back to oc->tree[]; is this change intended?  As we
   are not passing oc itself to the function, there is no way for it
   to return the object name directly to oc->tree[], no?

 - In the new codepath, oc->path[] is also not copied but I can
   sort-of guess why (you want to return something other than
   "filename" from get-tree-entry-follow-symlinks in it, or
   something).  But then the caller is losing the result of parsing
   the extended SHA-1.

You explain why "if (ret && only_to_die)" part is skipped, but these
two differences are equally, if not more, important differences
between the two codepaths.  I do not think I saw it explained.

In any case, I would think that get_sha1_with_context() should have
an external interface that is as close as the original, with
enhancement (i.e. not with modification of what existing fields
mean) [*1*].

That is, if oc->path[] is meant to store filename parsed from the
end-user input, it should keep doing so with or without
follow-symlinks.  And if follow-symlinks feature needs to return
extra information to the caller, it should add a new field to return
that information.

And my gut feeling is that such a correction to the way how the
updated get_sha1_with_context() behaves would mean you can (and need
to) keep hashcpy() and strlcpy() common to both codepaths in this
patch.

Thanks.


[Footnote]

*1* The reason is simple.  On a user input without any symbolic
link, a caller (not just the caller you are adding in patch 3/3)
should be able to expect to get the identical outcome from
get_sha1_with_context(), with or without GET_SHA1_FOLLOW_SYMLINKS.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 2/3] sha1_name: get_sha1_with_context learns to follow symlinks
  2015-05-09  3:45   ` Junio C Hamano
  2015-05-09 17:39     ` Junio C Hamano
@ 2015-05-09 20:02     ` David Turner
  1 sibling, 0 replies; 8+ messages in thread
From: David Turner @ 2015-05-09 20:02 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, David Turner

On Fri, 2015-05-08 at 20:45 -0700, Junio C Hamano wrote:
> dturner@twopensource.com writes:
> 
> > From: David Turner <dturner@twitter.com>
> >
> > Wire up get_sha1_with_context to call get_tree_entry_follow_symlinks
> > when GET_SHA1_FOLLOW_SYMLINKS is passed in flags. G_S_FOLLOW_SYMLINKS
> > is incompatible with G_S_ONLY_TO_DIE because the diagnosis that
> > ONLY_TO_DIE triggers does not consider symlinks.
> 
> Is "does not consider" something fundamental, or it just happens to
> be that way right now?

It just happens to be that way right now.  It would require work
approximately equivalent to this patch series to fix.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 2/3] sha1_name: get_sha1_with_context learns to follow symlinks
  2015-05-09 17:39     ` Junio C Hamano
@ 2015-05-09 20:34       ` David Turner
  0 siblings, 0 replies; 8+ messages in thread
From: David Turner @ 2015-05-09 20:34 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, David Turner

On Sat, 2015-05-09 at 10:39 -0700, Junio C Hamano wrote:
> Junio C Hamano <gitster@pobox.com> writes:
> 
> > dturner@twopensource.com writes:
> >
> >> From: David Turner <dturner@twitter.com>
> >>
> >> Wire up get_sha1_with_context to call get_tree_entry_follow_symlinks
> >> when GET_SHA1_FOLLOW_SYMLINKS is passed in flags. G_S_FOLLOW_SYMLINKS
> >> is incompatible with G_S_ONLY_TO_DIE because the diagnosis that
> >> ONLY_TO_DIE triggers does not consider symlinks.
> >
> > Is "does not consider" something fundamental, or it just happens to
> > be that way right now?
> 
> Regardless of the answer to this question, I find the last part of
> this hunk puzzling.

You're right -- this code is wrong, for the reasons you explain.  I'm
going to follow your earlier suggestion and use a strbuf for the new
field.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2015-05-09 20:34 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-05-08 22:38 [PATCH v2 1/3] tree-walk: learn get_tree_entry_follow_symlinks dturner
2015-05-08 22:38 ` [PATCH v2 2/3] sha1_name: get_sha1_with_context learns to follow symlinks dturner
2015-05-09  3:45   ` Junio C Hamano
2015-05-09 17:39     ` Junio C Hamano
2015-05-09 20:34       ` David Turner
2015-05-09 20:02     ` David Turner
2015-05-08 22:38 ` [PATCH v2 3/3] cat-file: add --follow-symlinks to --batch dturner
2015-05-09  5:00 ` [PATCH v2 1/3] tree-walk: learn get_tree_entry_follow_symlinks Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).