Git development

Git development
 help / color / mirror / Atom feed

* [PATCH v5 1/5] real_path: resolve symlinks by hand
From: Brandon Williams @ 2017-01-04 22:01 UTC (permalink / raw)
  To: git
  Cc: Brandon Williams, sbeller, peff, jacob.keller, gitster, ramsay,
	tboegi, j6t, pclouds, larsxschneider
In-Reply-To: <20170104220124.145808-1-bmwill@google.com>

The current implementation of real_path uses chdir() in order to resolve
symlinks.  Unfortunately this isn't thread-safe as chdir() affects a
process as a whole and not just an individual thread.  Instead perform
the symlink resolution by hand so that the calls to chdir() can be
removed, making real_path one step closer to being reentrant.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 abspath.c | 194 ++++++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 133 insertions(+), 61 deletions(-)

diff --git a/abspath.c b/abspath.c
index 2825de859..629201e48 100644
--- a/abspath.c
+++ b/abspath.c
@@ -11,8 +11,47 @@ int is_directory(const char *path)
 	return (!stat(path, &st) && S_ISDIR(st.st_mode));
 }
 
+/* removes the last path component from 'path' except if 'path' is root */
+static void strip_last_component(struct strbuf *path)
+{
+	size_t offset = offset_1st_component(path->buf);
+	size_t len = path->len;
+
+	/* Find start of the last component */
+	while (offset < len && !is_dir_sep(path->buf[len - 1]))
+		len--;
+	/* Skip sequences of multiple path-separators */
+	while (offset < len && is_dir_sep(path->buf[len - 1]))
+		len--;
+
+	strbuf_setlen(path, len);
+}
+
+/* get (and remove) the next component in 'remaining' and place it in 'next' */
+static void get_next_component(struct strbuf *next, struct strbuf *remaining)
+{
+	char *start = NULL;
+	char *end = NULL;
+
+	strbuf_reset(next);
+
+	/* look for the next component */
+	/* Skip sequences of multiple path-separators */
+	for (start = remaining->buf; is_dir_sep(*start); start++)
+		; /* nothing */
+	/* Find end of the path component */
+	for (end = start; *end && !is_dir_sep(*end); end++)
+		; /* nothing */
+
+	strbuf_add(next, start, end - start);
+	/* remove the component from 'remaining' */
+	strbuf_remove(remaining, 0, end - remaining->buf);
+}
+
 /* We allow "recursive" symbolic links. Only within reason, though. */
-#define MAXDEPTH 5
+#ifndef MAXSYMLINKS
+#define MAXSYMLINKS 32
+#endif
 
 /*
  * Return the real path (i.e., absolute path, with symlinks resolved
@@ -21,7 +60,6 @@ int is_directory(const char *path)
  * absolute_path().)  The return value is a pointer to a static
  * buffer.
  *
- * The input and all intermediate paths must be shorter than MAX_PATH.
  * The directory part of path (i.e., everything up to the last
  * dir_sep) must denote a valid, existing directory, but the last
  * component need not exist.  If die_on_error is set, then die with an
@@ -33,22 +71,16 @@ int is_directory(const char *path)
  */
 static const char *real_path_internal(const char *path, int die_on_error)
 {
-	static struct strbuf sb = STRBUF_INIT;
+	static struct strbuf resolved = STRBUF_INIT;
+	struct strbuf remaining = STRBUF_INIT;
+	struct strbuf next = STRBUF_INIT;
+	struct strbuf symlink = STRBUF_INIT;
 	char *retval = NULL;
-
-	/*
-	 * If we have to temporarily chdir(), store the original CWD
-	 * here so that we can chdir() back to it at the end of the
-	 * function:
-	 */
-	struct strbuf cwd = STRBUF_INIT;
-
-	int depth = MAXDEPTH;
-	char *last_elem = NULL;
+	int num_symlinks = 0;
 	struct stat st;
 
 	/* We've already done it */
-	if (path == sb.buf)
+	if (path == resolved.buf)
 		return path;
 
 	if (!*path) {
@@ -58,74 +90,114 @@ static const char *real_path_internal(const char *path, int die_on_error)
 			goto error_out;
 	}
 
-	strbuf_reset(&sb);
-	strbuf_addstr(&sb, path);
-
-	while (depth--) {
-		if (!is_directory(sb.buf)) {
-			char *last_slash = find_last_dir_sep(sb.buf);
-			if (last_slash) {
-				last_elem = xstrdup(last_slash + 1);
-				strbuf_setlen(&sb, last_slash - sb.buf + 1);
-			} else {
-				last_elem = xmemdupz(sb.buf, sb.len);
-				strbuf_reset(&sb);
-			}
+	strbuf_reset(&resolved);
+
+	if (is_absolute_path(path)) {
+		/* absolute path; start with only root as being resolved */
+		int offset = offset_1st_component(path);
+		strbuf_add(&resolved, path, offset);
+		strbuf_addstr(&remaining, path + offset);
+	} else {
+		/* relative path; can use CWD as the initial resolved path */
+		if (strbuf_getcwd(&resolved)) {
+			if (die_on_error)
+				die_errno("unable to get current working directory");
+			else
+				goto error_out;
 		}
+		strbuf_addstr(&remaining, path);
+	}
 
-		if (sb.len) {
-			if (!cwd.len && strbuf_getcwd(&cwd)) {
+	/* Iterate over the remaining path components */
+	while (remaining.len > 0) {
+		get_next_component(&next, &remaining);
+
+		if (next.len == 0) {
+			continue; /* empty component */
+		} else if (next.len == 1 && !strcmp(next.buf, ".")) {
+			continue; /* '.' component */
+		} else if (next.len == 2 && !strcmp(next.buf, "..")) {
+			/* '..' component; strip the last path component */
+			strip_last_component(&resolved);
+			continue;
+		}
+
+		/* append the next component and resolve resultant path */
+		if (!is_dir_sep(resolved.buf[resolved.len - 1]))
+			strbuf_addch(&resolved, '/');
+		strbuf_addbuf(&resolved, &next);
+
+		if (lstat(resolved.buf, &st)) {
+			/* error out unless this was the last component */
+			if (errno != ENOENT || remaining.len) {
 				if (die_on_error)
-					die_errno("Could not get current working directory");
+					die_errno("Invalid path '%s'",
+						  resolved.buf);
 				else
 					goto error_out;
 			}
+		} else if (S_ISLNK(st.st_mode)) {
+			ssize_t len;
+			strbuf_reset(&symlink);
+
+			if (num_symlinks++ > MAXSYMLINKS) {
+				errno = ELOOP;
 
-			if (chdir(sb.buf)) {
 				if (die_on_error)
-					die_errno("Could not switch to '%s'",
-						  sb.buf);
+					die("More than %d nested symlinks "
+					    "on path '%s'", MAXSYMLINKS, path);
 				else
 					goto error_out;
 			}
-		}
-		if (strbuf_getcwd(&sb)) {
-			if (die_on_error)
-				die_errno("Could not get current working directory");
-			else
-				goto error_out;
-		}
 
-		if (last_elem) {
-			if (sb.len && !is_dir_sep(sb.buf[sb.len - 1]))
-				strbuf_addch(&sb, '/');
-			strbuf_addstr(&sb, last_elem);
-			free(last_elem);
-			last_elem = NULL;
-		}
-
-		if (!lstat(sb.buf, &st) && S_ISLNK(st.st_mode)) {
-			struct strbuf next_sb = STRBUF_INIT;
-			ssize_t len = strbuf_readlink(&next_sb, sb.buf, 0);
+			len = strbuf_readlink(&symlink, resolved.buf,
+					      st.st_size);
 			if (len < 0) {
 				if (die_on_error)
 					die_errno("Invalid symlink '%s'",
-						  sb.buf);
+						  resolved.buf);
 				else
 					goto error_out;
 			}
-			strbuf_swap(&sb, &next_sb);
-			strbuf_release(&next_sb);
-		} else
-			break;
+
+			if (is_absolute_path(symlink.buf)) {
+				/* absolute symlink; set resolved to root */
+				int offset = offset_1st_component(symlink.buf);
+				strbuf_reset(&resolved);
+				strbuf_add(&resolved, symlink.buf, offset);
+				strbuf_remove(&symlink, 0, offset);
+			} else {
+				/*
+				 * relative symlink
+				 * strip off the last component since it will
+				 * be replaced with the contents of the symlink
+				 */
+				strip_last_component(&resolved);
+			}
+
+			/*
+			 * if there are still remaining components to resolve
+			 * then append them to symlink
+			 */
+			if (remaining.len) {
+				strbuf_addch(&symlink, '/');
+				strbuf_addbuf(&symlink, &remaining);
+			}
+
+			/*
+			 * use the symlink as the remaining components that
+			 * need to be resloved
+			 */
+			strbuf_swap(&symlink, &remaining);
+		}
 	}
 
-	retval = sb.buf;
+	retval = resolved.buf;
+
 error_out:
-	free(last_elem);
-	if (cwd.len && chdir(cwd.buf))
-		die_errno("Could not change back to '%s'", cwd.buf);
-	strbuf_release(&cwd);
+	strbuf_release(&remaining);
+	strbuf_release(&next);
+	strbuf_release(&symlink);
 
 	return retval;
 }
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* Re: [PATCH v4 0/5] road to reentrant real_path
From: Brandon Williams @ 2017-01-04 21:55 UTC (permalink / raw)
  To: Jacob Keller
  Cc: Stefan Beller, Jeff King, Torsten Bögershausen,
	git@vger.kernel.org, Junio C Hamano, Ramsay Jones, Johannes Sixt,
	Duy Nguyen, Lars Schneider
In-Reply-To: <CA+P7+xp+j1ajPLjE-RukSmp33_bRkD7J65X-++frkYd9LWLSkQ@mail.gmail.com>

On 01/04, Jacob Keller wrote:
> On Wed, Jan 4, 2017 at 10:22 AM, Stefan Beller <sbeller@google.com> wrote:
> > On Wed, Jan 4, 2017 at 10:13 AM, Brandon Williams <bmwill@google.com> wrote:
> >> On 01/04, Jeff King wrote:
> >>> On Wed, Jan 04, 2017 at 07:56:02AM +0100, Torsten Bögershausen wrote:
> >>>
> >>> > On 04.01.17 01:48, Jeff King wrote:
> >>> > > On Tue, Jan 03, 2017 at 11:09:18AM -0800, Brandon Williams wrote:
> >>> > >
> >>> > >> Only change with v4 is in [1/5] renaming the #define MAXSYMLINKS back to
> >>> > >> MAXDEPTH due to a naming conflict brought up by Lars Schneider.
> >>> > >
> >>> > > Hmm. Isn't MAXSYMLINKS basically what you want here, though? It what's
> >>> > > what all other similar functions will be using.
> >>> > >
> >>> > > The only problem was that we were redefining the macro. So maybe:
> >>> > >
> >>> > >   #ifndef MAXSYMLINKS
> >>> > >   #define MAXSYMLINKS 5
> >>> > >   #endif
> >>> > >
> >>> > > would be a good solution?
> >>> > Why 5  ? (looking at the  20..30 below)
> >>> > And why 5 on one system and e.g. on my Mac OS
> >>> > #define MAXSYMLINKS     32
> >>>
> >>> I mentioned "5" because that is the current value of MAXDEPTH. I do
> >>> think it would be reasonable to bump it to something higher.
> >>>
> >>> > Would the same value value for all Git installations on all platforms make sense?
> >>> > #define GITMAXSYMLINKS 20
> >>>
> >>> I think it's probably more important to match the rest of the OS, so
> >>> that open("foo") and realpath("foo") behave similarly on the same
> >>> system. Though I think even that isn't always possible, as the limit is
> >>> dynamic on some systems.
> >>>
> >>> I think the idea of the 20-30 range is that it's small enough to catch
> >>> an infinite loop quickly, and large enough that nobody will ever hit it
> >>> in practice. :)
> >>
> >> I agree that we should have similar guarantees as the OS provides,
> >> especially if the OS already has MAXSYMLINKS defined.  What then, should
> >> the fall back value be if the OS doesn't have this defined?  5 like we
> >> have done historically, or something around the 20-30 range as Torsten
> >> suggests?
> >
> > As a fallback I'd rather go for a larger number than too small.
> > The reason for the existence is just to break an infinite loop
> > (and report an error? which the current code doesn't quite do,
> > but your series actually does).
> >
> > If the number is too large, then it takes a bit longer to generate the error
> > message, but the error path is no big deal w.r.t. performance, so it's fine
> > for it taking a bit longer.
> >
> > If the number is too low, then we may hinder people from getting actual
> > work done, (i.e. they have to figure out what the problem is and recompile
> > git), so I'd think a larger number is not harmful. So 32?
> >
> 
> I think I agree as well.
> 
> Thanks,
> Jake
> 
> >>
> >> --
> >> Brandon Williams

That's two people for 32.  I'll use that as the fallback and resend the
series.

-- 
Brandon Williams

^ permalink raw reply

* [PATCHv5] pathspec: give better message for submodule related pathspec error
From: Stefan Beller @ 2017-01-04 23:10 UTC (permalink / raw)
  To: peff; +Cc: git, bmwill, gitster, Stefan Beller
In-Reply-To: <20170104205346.GE69227@google.com>

Every once in a while someone complains to the mailing list to have
run into this weird assertion[1]. The usual response from the mailing
list is link to old discussions[2], and acknowledging the problem
stating it is known.

This patch accomplishes two things:

  1. Switch assert() to die("BUG") to give a more readable message.

  2. Take one of the cases where we hit a BUG and turn it into a normal
     "there was something wrong with the input" message.

     This assertion triggered for cases where there wasn't a programming
     bug, but just bogus input. In particular, if the user asks for a
     pathspec that is inside a submodule, we shouldn't assert() or
     die("BUG"); we should tell the user their request is bogus.

     The only reason we did not check for it, is the expensive nature
     of such a check, so callers avoid setting the flag
     PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE. However when we die due
     to bogus input, the expense of cpu cycles spent outweighs the user
     wondering what went wrong, so run that check unconditionally before
     dying with a more generic error message.

     In case we found out that the path points inside a submodule, but the
     caller did not ask for PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE, we
     should not silently fix the pathspec to just point at the submodule,
     as that would confuse callers.

To make this happen, specifically the second part, move the check for
being inside a submodule into a function and call it either when
PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE is set or when we are in the
buggy case to give a better error message.

Note: There is this one special case ("git -C submodule add .") in which
we call check_inside_submodule_expensive two times, once for checking
PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE and once in the code path
handling the buggy user input. For this to work correctly we need to adapt
the conditions in the check for being inside the submodule to account for
the prior run to have taken effect.

[1] https://www.google.com/search?q=item-%3Enowildcard_len
[2] http://git.661346.n2.nabble.com/assert-failed-in-submodule-edge-case-td7628687.html
    https://www.spinics.net/lists/git/msg249473.html

Helped-by: Jeff King <peff@peff.net>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
---

This is just rerolling the second patch of that "make the assert go away",
asking for opinions again.

I agree with Brandon that pathspec code is not the ideal place to
check for issues with submodules. However we should give the best error
message possible for the user, so running this diagnosis is fine by me.

Thanks,
Stefan

 pathspec.c                       | 63 +++++++++++++++++++++++++++-------------
 t/t6134-pathspec-in-submodule.sh | 33 +++++++++++++++++++++
 2 files changed, 76 insertions(+), 20 deletions(-)
 create mode 100755 t/t6134-pathspec-in-submodule.sh

diff --git a/pathspec.c b/pathspec.c
index 22ca74a126..41e0dac1df 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -88,6 +88,34 @@ static void prefix_short_magic(struct strbuf *sb, int prefixlen,
 	strbuf_addf(sb, ",prefix:%d)", prefixlen);
 }
 
+static void check_inside_submodule_expensive(struct pathspec_item *item,
+					     char *match,
+					     const char *elt, int die_inside)
+{
+	int i;
+	for (i = 0; i < active_nr; i++) {
+		struct cache_entry *ce = active_cache[i];
+		int ce_len = ce_namelen(ce);
+
+		if (!S_ISGITLINK(ce->ce_mode))
+			continue;
+
+		if (item->len < ce_len ||
+		    !(match[ce_len] == '/' || match[ce_len] == '\0') ||
+		    memcmp(ce->name, match, ce_len))
+			continue;
+
+		if (item->len != ce_len + 1 || die_inside)
+			die (_("Pathspec '%s' is in submodule '%.*s'"),
+			     elt, ce_len, ce->name);
+
+		/* strip trailing slash */
+		item->len--;
+		match[item->len] = '\0';
+		break;
+	}
+}
+
 /*
  * Take an element of a pathspec and check for magic signatures.
  * Append the result to the prefix. Return the magic bitmap.
@@ -273,24 +301,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
 	}
 
 	if (flags & PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE)
-		for (i = 0; i < active_nr; i++) {
-			struct cache_entry *ce = active_cache[i];
-			int ce_len = ce_namelen(ce);
-
-			if (!S_ISGITLINK(ce->ce_mode))
-				continue;
-
-			if (item->len <= ce_len || match[ce_len] != '/' ||
-			    memcmp(ce->name, match, ce_len))
-				continue;
-			if (item->len == ce_len + 1) {
-				/* strip trailing slash */
-				item->len--;
-				match[item->len] = '\0';
-			} else
-				die (_("Pathspec '%s' is in submodule '%.*s'"),
-				     elt, ce_len, ce->name);
-		}
+		check_inside_submodule_expensive(item, match, elt, 0);
 
 	if (magic & PATHSPEC_LITERAL)
 		item->nowildcard_len = item->len;
@@ -313,8 +324,20 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
 	}
 
 	/* sanity checks, pathspec matchers assume these are sane */
-	assert(item->nowildcard_len <= item->len &&
-	       item->prefix         <= item->len);
+	if (item->nowildcard_len > item->len ||
+	    item->prefix         > item->len) {
+		/*
+		 * We know something is fishy and we're going to die
+		 * anyway, so we don't care about following operation
+		 * to be expensive, despite the caller not asking for
+		 * an expensive submodule check. The potential expensive
+		 * operation here reduces the users head scratching
+		 * greatly, though.
+		 */
+		check_inside_submodule_expensive(item, match, elt, 1);
+		die ("BUG: item->nowildcard_len > item->len || item->prefix > item->len)");
+	}
+
 	return magic;
 }
 
diff --git a/t/t6134-pathspec-in-submodule.sh b/t/t6134-pathspec-in-submodule.sh
new file mode 100755
index 0000000000..2900d8d06e
--- /dev/null
+++ b/t/t6134-pathspec-in-submodule.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+test_description='test case exclude pathspec'
+
+TEST_CREATE_SUBMODULE=yes
+. ./test-lib.sh
+
+test_expect_success 'setup a submodule' '
+	git submodule add ./pretzel.bare sub &&
+	git commit -a -m "add submodule" &&
+	git submodule deinit --all
+'
+
+cat <<EOF >expect
+fatal: Pathspec 'sub/a' is in submodule 'sub'
+EOF
+
+test_expect_success 'error message for path inside submodule' '
+	echo a >sub/a &&
+	test_must_fail git add sub/a 2>actual &&
+	test_cmp expect actual
+'
+
+cat <<EOF >expect
+fatal: Pathspec '.' is in submodule 'sub'
+EOF
+
+test_expect_success 'error message for path inside submodule from within submodule' '
+	test_must_fail git -C sub add . 2>actual &&
+	test_cmp expect actual
+'
+
+test_done
-- 
2.11.0.rc2.32.gdde9519.dirty


^ permalink raw reply related

* Re: [PATCH 2/2] pathspec: give better message for submodule related pathspec error
From: Stefan Beller @ 2017-01-04 23:10 UTC (permalink / raw)
  To: Jeff King; +Cc: Junio C Hamano, git@vger.kernel.org, Brandon Williams
In-Reply-To: <20170104075506.sa5oa5bheykswkwn@sigill.intra.peff.net>

On Tue, Jan 3, 2017 at 11:55 PM, Jeff King <peff@peff.net> wrote:

> As this last bit is quoted from me, I won't deny that it's brilliant as
> usual.

obviously. :)

>
> But as this commit message needs to stand on its own, rather than as part of a
> larger discussion thread, it might be worth expanding "one of the cases"
> here. And talking about what's happening to the other cases.
>
> Like:
>
>   This assertion triggered for cases where there wasn't a programming
>   bug, but just bogus input. In particular, if the user asks for a
>   pathspec that is inside a submodule, we shouldn't assert() or
>   die("BUG"); we should tell the user their request is bogus.

alt. cont'd:

We already would do that if PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE
is set, but we had to ask for this examination via a flag, because
it is expensive. At this point in code we know there is bogus input,
so all we would do is error out. For that case we can assume that the cost
of the expensive search is negligible compared to the users head scratching
that follows.

(This will appear in the patch I am about to send out)

^ permalink raw reply

* Re: [PATCHv5] pathspec: give better message for submodule related pathspec error
From: Brandon Williams @ 2017-01-04 23:16 UTC (permalink / raw)
  To: Stefan Beller; +Cc: peff, git, gitster
In-Reply-To: <20170104231027.7065-1-sbeller@google.com>

On 01/04, Stefan Beller wrote:
> Every once in a while someone complains to the mailing list to have
> run into this weird assertion[1]. The usual response from the mailing
> list is link to old discussions[2], and acknowledging the problem
> stating it is known.
> 
> This patch accomplishes two things:
> 
>   1. Switch assert() to die("BUG") to give a more readable message.
> 
>   2. Take one of the cases where we hit a BUG and turn it into a normal
>      "there was something wrong with the input" message.
> 
>      This assertion triggered for cases where there wasn't a programming
>      bug, but just bogus input. In particular, if the user asks for a
>      pathspec that is inside a submodule, we shouldn't assert() or
>      die("BUG"); we should tell the user their request is bogus.
> 
>      The only reason we did not check for it, is the expensive nature
>      of such a check, so callers avoid setting the flag
>      PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE. However when we die due
>      to bogus input, the expense of cpu cycles spent outweighs the user
>      wondering what went wrong, so run that check unconditionally before
>      dying with a more generic error message.
> 
>      In case we found out that the path points inside a submodule, but the
>      caller did not ask for PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE, we
>      should not silently fix the pathspec to just point at the submodule,
>      as that would confuse callers.
> 
> To make this happen, specifically the second part, move the check for
> being inside a submodule into a function and call it either when
> PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE is set or when we are in the
> buggy case to give a better error message.
> 
> Note: There is this one special case ("git -C submodule add .") in which
> we call check_inside_submodule_expensive two times, once for checking
> PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE and once in the code path
> handling the buggy user input. For this to work correctly we need to adapt
> the conditions in the check for being inside the submodule to account for
> the prior run to have taken effect.
> 
> [1] https://www.google.com/search?q=item-%3Enowildcard_len
> [2] http://git.661346.n2.nabble.com/assert-failed-in-submodule-edge-case-td7628687.html
>     https://www.spinics.net/lists/git/msg249473.html
> 
> Helped-by: Jeff King <peff@peff.net>
> Helped-by: Junio C Hamano <gitster@pobox.com>
> Signed-off-by: Stefan Beller <sbeller@google.com>
> ---
> 
> This is just rerolling the second patch of that "make the assert go away",
> asking for opinions again.
> 
> I agree with Brandon that pathspec code is not the ideal place to
> check for issues with submodules. However we should give the best error
> message possible for the user, so running this diagnosis is fine by me.
> 
> Thanks,
> Stefan
> 
>  pathspec.c                       | 63 +++++++++++++++++++++++++++-------------
>  t/t6134-pathspec-in-submodule.sh | 33 +++++++++++++++++++++
>  2 files changed, 76 insertions(+), 20 deletions(-)
>  create mode 100755 t/t6134-pathspec-in-submodule.sh
> 
> diff --git a/pathspec.c b/pathspec.c
> index 22ca74a126..41e0dac1df 100644
> --- a/pathspec.c
> +++ b/pathspec.c
> @@ -88,6 +88,34 @@ static void prefix_short_magic(struct strbuf *sb, int prefixlen,
>  	strbuf_addf(sb, ",prefix:%d)", prefixlen);
>  }
>  
> +static void check_inside_submodule_expensive(struct pathspec_item *item,
> +					     char *match,
> +					     const char *elt, int die_inside)
> +{
> +	int i;
> +	for (i = 0; i < active_nr; i++) {
> +		struct cache_entry *ce = active_cache[i];
> +		int ce_len = ce_namelen(ce);
> +
> +		if (!S_ISGITLINK(ce->ce_mode))
> +			continue;
> +
> +		if (item->len < ce_len ||
> +		    !(match[ce_len] == '/' || match[ce_len] == '\0') ||
> +		    memcmp(ce->name, match, ce_len))
> +			continue;
> +
> +		if (item->len != ce_len + 1 || die_inside)
> +			die (_("Pathspec '%s' is in submodule '%.*s'"),
> +			     elt, ce_len, ce->name);
> +
> +		/* strip trailing slash */
> +		item->len--;
> +		match[item->len] = '\0';
> +		break;
> +	}
> +}
> +
>  /*
>   * Take an element of a pathspec and check for magic signatures.
>   * Append the result to the prefix. Return the magic bitmap.
> @@ -273,24 +301,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
>  	}
>  
>  	if (flags & PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE)
> -		for (i = 0; i < active_nr; i++) {
> -			struct cache_entry *ce = active_cache[i];
> -			int ce_len = ce_namelen(ce);
> -
> -			if (!S_ISGITLINK(ce->ce_mode))
> -				continue;
> -
> -			if (item->len <= ce_len || match[ce_len] != '/' ||
> -			    memcmp(ce->name, match, ce_len))
> -				continue;
> -			if (item->len == ce_len + 1) {
> -				/* strip trailing slash */
> -				item->len--;
> -				match[item->len] = '\0';
> -			} else
> -				die (_("Pathspec '%s' is in submodule '%.*s'"),
> -				     elt, ce_len, ce->name);
> -		}
> +		check_inside_submodule_expensive(item, match, elt, 0);
>  
>  	if (magic & PATHSPEC_LITERAL)
>  		item->nowildcard_len = item->len;
> @@ -313,8 +324,20 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
>  	}
>  
>  	/* sanity checks, pathspec matchers assume these are sane */
> -	assert(item->nowildcard_len <= item->len &&
> -	       item->prefix         <= item->len);
> +	if (item->nowildcard_len > item->len ||
> +	    item->prefix         > item->len) {
> +		/*
> +		 * We know something is fishy and we're going to die
> +		 * anyway, so we don't care about following operation
> +		 * to be expensive, despite the caller not asking for
> +		 * an expensive submodule check. The potential expensive
> +		 * operation here reduces the users head scratching
> +		 * greatly, though.
> +		 */
> +		check_inside_submodule_expensive(item, match, elt, 1);
> +		die ("BUG: item->nowildcard_len > item->len || item->prefix > item->len)");
> +	}
> +
>  	return magic;
>  }
>  
> diff --git a/t/t6134-pathspec-in-submodule.sh b/t/t6134-pathspec-in-submodule.sh
> new file mode 100755
> index 0000000000..2900d8d06e
> --- /dev/null
> +++ b/t/t6134-pathspec-in-submodule.sh
> @@ -0,0 +1,33 @@
> +#!/bin/sh
> +
> +test_description='test case exclude pathspec'
> +
> +TEST_CREATE_SUBMODULE=yes
> +. ./test-lib.sh
> +
> +test_expect_success 'setup a submodule' '
> +	git submodule add ./pretzel.bare sub &&
> +	git commit -a -m "add submodule" &&
> +	git submodule deinit --all
> +'
> +
> +cat <<EOF >expect
> +fatal: Pathspec 'sub/a' is in submodule 'sub'
> +EOF
> +
> +test_expect_success 'error message for path inside submodule' '
> +	echo a >sub/a &&
> +	test_must_fail git add sub/a 2>actual &&
> +	test_cmp expect actual
> +'
> +
> +cat <<EOF >expect
> +fatal: Pathspec '.' is in submodule 'sub'
> +EOF
> +
> +test_expect_success 'error message for path inside submodule from within submodule' '
> +	test_must_fail git -C sub add . 2>actual &&
> +	test_cmp expect actual
> +'
> +
> +test_done

I haven't taken a through look at this patch but I think you may want to
base it off of 'origin/bw/pathspec-cleanup' series as the changes made in this
patch now conflict with that series.

Also I still don't really think this solves the problem of telling the
user what is wrong, which is that the submodule's gitdir is gone.

-- 
Brandon Williams

^ permalink raw reply

* Re: [PATCHv5] pathspec: give better message for submodule related pathspec error
From: Stefan Beller @ 2017-01-04 23:28 UTC (permalink / raw)
  To: Brandon Williams; +Cc: Jeff King, git@vger.kernel.org, Junio C Hamano
In-Reply-To: <20170104231605.GB68241@google.com>

> I haven't taken a through look at this patch but I think you may want to
> base it off of 'origin/bw/pathspec-cleanup' series as the changes made in this
> patch now conflict with that series.

eh right, I forgot to mention this in the notes, it requires
sb/submodule-embed-gitdir as well, so I'll have to figure that out.

>
> Also I still don't really think this solves the problem of telling the
> user what is wrong, which is that the submodule's gitdir is gone.
>

The "git dir gone" is not a big deal IMHO as a deinitialized submodule
is perfectly fine (e.g. not initialized). The errors as I tested in Gerrit,
a superproject that contains submodules in plugins/* :

    : gerrit/plugins/cookbook-plugin$ git add .
    fatal: Pathspec '.' is in submodule 'plugins/cookbook-plugin'
    : gerrit/plugins/cookbook-plugin$ cd ..
    : gerrit/plugins$ git add cookbook-plugin/a
    fatal: Pathspec 'cookbook-plugin/a' is in submodule
'plugins/cookbook-plugin'
    : gerrit/plugins$ git add cookbook-plugin/.
    : gerrit/plugins$ git add cookbook-plugin/./.
    : gerrit/plugins$

I think that is perfect behavior for now, as it reliably detects
(a) the submodule being there and (b) if you are in there, no
matter if there is a .git dir or not.

The same error coming up if the submodule is initialized and valid, e.g.

    : gerrit/plugins$ git submodule update --init cookbook-plugin
    : gerrit/plugins$ git add cookbook-plugin/a/.
    fatal: Pathspec 'cookbook-plugin/a/.' is in submodule
'plugins/cookbook-plugin'

So I think this is pretty much exactly what we want for now:
* if PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE is set
   we keep the behavior as is and do the expensive thing
* if the caller wants to use path inside of a submodule no matter
  the git dir of the submodule, then set the CHEAP flag instead
* in case of the assert (that I originally wanted to fix), we fall back to the
  EXPENSIVE thing reporting the error message that we already reported
  in such cases.

TL;DR: I was rather asking about the code being a viable;
by now I am convinced this is the correct behavior. ;)

^ permalink raw reply

* RFC: --force-with-lease default behaviour
From: G. Sylvie Davies @ 2017-01-05  2:34 UTC (permalink / raw)
  To: git

Right now the default variant does this:

> --force-with-lease alone, without specifying the details, will protect all remote refs that are going to be updated by requiring their current value to be the same as the remote-tracking branch we have for them.

The problem is people sometimes run "git fetch".   And so "git push
--force-with-lease" is going to do the push even if the local version
is stale.

Instead I think the default behavior should require that the remote
ref's current value be equal to the merge-base of the local-branch and
remote-tracking-branch.

Here's an example (password is "test" for the push):

git clone http://test@vm.bit-booster.com/bitbucket/scm/bb/a.git
cd a
git checkout bugfix/TKT-123
git reset --hard HEAD~1   (to simulate situation where local is stale,
but remote is up to date)

At this point "git push --force-with-lease" is going to work.   But I
think it shouldn't.   (Note: I use push.default = simple).

Here's how I think it should work:

git push --force-with-lease=bugfix/TKT-123:$(git merge-base HEAD
origin/bugfix/TKT-123)
To http://vm.bit-booster.com/bitbucket/scm/bb/a.git
 ! [rejected]        bugfix/TKT-123 -> bugfix/TKT-123 (stale info)

For now I'm happy with this alias:

git config --global alias.please '!sh -c "git push
--force-with-lease=$(git rev-parse --abbrev-ref HEAD):$(git merge-base
HEAD @{u})"'

But I'd like to put together a patch if people are interested in a
tweak like this to the --force-with-lease default behaviour.  I
haven't written much C in my life, but thought this might make a good
force-myself-to-learn-C exercise.

- Sylvie Davies

ps.  I never thought about the fetch problem with --force-with-lease
until reading https://developer.atlassian.com/blog/2015/04/force-with-lease/
and https://buddyreno.me/git-please-a182f28efeb5#.s291gh5jn , so
thanks to them!

^ permalink raw reply

* Re: RFC: --force-with-lease default behaviour
From: G. Sylvie Davies @ 2017-01-05  6:52 UTC (permalink / raw)
  To: git
In-Reply-To: <CAAj3zPz-jMVoxNTRZ0iR1ZPTFh873gEo33QjynBE1vaHsMmg3A@mail.gmail.com>

On Wed, Jan 4, 2017 at 6:34 PM, G. Sylvie Davies <sylvie@bit-booster.com> wrote:
> Right now the default variant does this:
>
>> --force-with-lease alone, without specifying the details, will protect all remote refs that are going to be updated by requiring their current value to be the same as the remote-tracking branch we have for them.
>
> The problem is people sometimes run "git fetch".   And so "git push
> --force-with-lease" is going to do the push even if the local version
> is stale.
>
> Instead I think the default behavior should require that the remote
> ref's current value be equal to the merge-base of the local-branch and
> remote-tracking-branch.
>
> Here's an example (password is "test" for the push):
>
> git clone http://test@vm.bit-booster.com/bitbucket/scm/bb/a.git
> cd a
> git checkout bugfix/TKT-123
> git reset --hard HEAD~1   (to simulate situation where local is stale,
> but remote is up to date)
>
> At this point "git push --force-with-lease" is going to work.   But I
> think it shouldn't.   (Note: I use push.default = simple).
>
> Here's how I think it should work:
>
> git push --force-with-lease=bugfix/TKT-123:$(git merge-base HEAD
> origin/bugfix/TKT-123)
> To http://vm.bit-booster.com/bitbucket/scm/bb/a.git
>  ! [rejected]        bugfix/TKT-123 -> bugfix/TKT-123 (stale info)
>
>
> For now I'm happy with this alias:
>
> git config --global alias.please '!sh -c "git push
> --force-with-lease=$(git rev-parse --abbrev-ref HEAD):$(git merge-base
> HEAD @{u})"'
>

Nevermind!   I realize this essentially removes the "--force" and
turns it into the original non-forced "fast-forwardable" only style
push.   [BLUSH!]

I wonder if there's anything one could do to help those who type "git
fetch" and still want to enjoy "--force-with-lease"...


> But I'd like to put together a patch if people are interested in a
> tweak like this to the --force-with-lease default behaviour.  I
> haven't written much C in my life, but thought this might make a good
> force-myself-to-learn-C exercise.
>
>
> - Sylvie Davies
>
> ps.  I never thought about the fetch problem with --force-with-lease
> until reading https://developer.atlassian.com/blog/2015/04/force-with-lease/
> and https://buddyreno.me/git-please-a182f28efeb5#.s291gh5jn , so
> thanks to them!

^ permalink raw reply

* Re: [PATCH 1/2] asciidoctor: fix user-manual to be built by `asciidoctor`
From: Lars Schneider @ 2017-01-05 10:05 UTC (permalink / raw)
  To: Jeff King
  Cc: Johannes Schindelin, git, 마누엘,
	Junio C Hamano
In-Reply-To: <20170104080852.bmlmtzxhjx4qt74f@sigill.intra.peff.net>


> On 04 Jan 2017, at 09:08, Jeff King <peff@peff.net> wrote:
> 
> On Mon, Jan 02, 2017 at 05:03:57PM +0100, Johannes Schindelin wrote:
> 
>> From: =?UTF-8?q?=EB=A7=88=EB=88=84=EC=97=98?= <nalla@hamal.uberspace.de>
>> 
>> The `user-manual.txt` is designed as a `book` but the `Makefile` wants
>> to build it as an `article`. This seems to be a problem when building
>> the documentation with `asciidoctor`. Furthermore the parts *Git
>> Glossary* and *Appendix B* had no subsections which is not allowed when
>> building with `asciidoctor`. So lets add a *dummy* section.
> 
> The git-scm.com site uses asciidoctor, too, and I think I have seen some
> oddness with the rendering though. So in general I am in favor of making
> things work under both asciidoc and asciidoctor.

I am not familiar with both tools but it sounds to me as if "asciidoctor"
is kind of the "lowest common denominator". Is this true? If yes, would it
make sense to switch TravisCI [1] to asciidocter if this change gets merged?

- Lars

[1] https://github.com/git/git/blob/master/.travis.yml#L48


^ permalink raw reply

* git branch -D doesn't work with deleted worktree
From: Roland Illig @ 2017-01-05 10:06 UTC (permalink / raw)
  To: git@vger.kernel.org

Git 2.11.0 gives a wrong error message after the following commands:

$ git init
$ echo hello >file
$ git add file
$ git commit -m "message"
$ git worktree add ../worktree
$ rm -rf ../worktree
$ git br -D worktree
error: Cannot delete branch 'worktree' checked out at '../worktree'

Since ../worktree has been deleted, there cannot be anything checked out at that location.

In my opinion, deleting the branch should just work. Especially since I used the -D option and the "git worktree" documentation says "When you are done with a linked working tree you can simply delete it."

Regards,
Roland

^ permalink raw reply

* core.sshCommand and url.*.insteadOf for submodules
From: Stefan Schindler @ 2017-01-05 10:09 UTC (permalink / raw)
  To: git

Hello mailing list,

it seems like that the `core.sshCommand` and `url.*.insteadOf`
configuration settings do not apply to `git submodule update --init`
(and probably related) calls.

Is this intentional?

My scenario is as follows: I use 2 SSH keys for GitHub, for private and
work-related repositories. My default key is my private key. So when I
clone a work repository and try getting the submodules, `git submodule
update --init` fails. This is also the case when setting
`core.sshCommand` and `url.*.insteadOf` (useful for substituting
"github.com" by some ~/.ssh/config'ured host).

Greetings,
Stefan Schindler

^ permalink raw reply

* Refreshing index timestamps without reading content
From: Quentin Casasnovas @ 2017-01-05 11:23 UTC (permalink / raw)
  To: git

[-- Attachment #1: Type: text/plain, Size: 2657 bytes --]

Hi guys,

Apologies if this is documented somewhere, I have fairly bad search vudu
skills.

I'm looking for a way to cause a full refresh of the index without causing
any read of the files, basically telling git "trust me, all worktree files
are matching the index, but their stat information have changed".  I have
read about the update-index --assume-unchanged and --skip-worktree flags in
the documentation, but these do not cause any index refresh - rather, they
fake that the respective worktree files are matching the index until you
remove those assume-unchanged/skip-worktree bits.

This might sound like a really weird thing to do, but I do have a use case
for it - we have some build farm setup where the resulting objects of a
compilation are stored on a shared server.  The source files are not stored
on the shared server, but locally on each of the build server (as to
decrease network load and make good use of local storage as caches).

We then use an onion filesystem to mount the compiled objects on top of the
local sources - and change the modification time of the source to be older
than the object files, so that on subsequent builds, make does not rebuild
the whole world.

This works fine except for one thing, after changing the mtime of the
source files, the first subsequent git command needing to compare the tree
with the index will take a LONG time since it will read all of the object
content:

  cd linux-2.6

  # Less than a second  when the index is up to date
  time git status > /dev/null
  git status 0.06s user 0.09s system 172% cpu 0.087 total
                                              ~~~~~~~~~~~

  # Change the mtime..
  git ls-tree -r --name-only HEAD | xargs -n 1024 touch

  # Now 30s..
  time git status > /dev/null
  git status  2.73s user 1.79s system 13% cpu 32.453 total
                                              ~~~~~~~~~~~~

The timing information above was captured on my laptop SSD and the penalty
is obviously much higher on spinning disks - especially when this operation
is done on *hundreds* of different work tree in parallel, all hosted on the
same filesystem (it can take tens of minutes!).

Is there any way to tell git, after the git ls-tree command above, to
refresh its stat cache information and trust us that the file content has
not changed, as to avoid any useless file read (though it will obviously
will have to stat all of them, but that's not something we can really
avoid)

If not, I am willing to implement a --assume-content-unchanged to the git
update-index if you guys don't see something fundamentally wrong with this
approach.

Thanks for any hints you can give! :)

Q

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply

* Re: [PATCH 2/4] t7610: make tests more independent and debuggable
From: Simon Ruderich @ 2017-01-05 12:20 UTC (permalink / raw)
  To: Richard Hansen; +Cc: git, davvid, j6t
In-Reply-To: <20170104005042.51530-3-hansenr@google.com>

[-- Attachment #1: Type: text/plain, Size: 671 bytes --]

On Tue, Jan 03, 2017 at 07:50:40PM -0500, Richard Hansen wrote:
> [snip]
> @@ -145,8 +148,13 @@ test_expect_success 'mergetool in subdir' '
>  '
>
>  test_expect_success 'mergetool on file in parent dir' '
> +	git reset --hard &&
> +	git checkout -b test$test_count branch1 &&
> +	git submodule update -N &&
>  	(
>  		cd subdir &&
> +		test_must_fail git merge master >/dev/null 2>&1 &&
> +		( yes "" | git mergetool file3 >/dev/null 2>&1 ) &&

This change seems unrelated to the changes mentioned in the
commit message. Was it intended?

Regards
Simon
-- 
+ privacy is necessary
+ using gnupg http://gnupg.org
+ public key id: 0x92FEFDB7E44C32F9

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH 1/2] asciidoctor: fix user-manual to be built by `asciidoctor`
From: Johannes Schindelin @ 2017-01-05 13:49 UTC (permalink / raw)
  To: Lars Schneider; +Cc: Jeff King, git, 마누엘, Junio C Hamano
In-Reply-To: <1D7C66EA-E87A-4154-ACAC-8045D28477D2@gmail.com>

Hi Lars,

On Thu, 5 Jan 2017, Lars Schneider wrote:

> > On 04 Jan 2017, at 09:08, Jeff King <peff@peff.net> wrote:
> > 
> > On Mon, Jan 02, 2017 at 05:03:57PM +0100, Johannes Schindelin wrote:
> > 
> >> From: =?UTF-8?q?=EB=A7=88=EB=88=84=EC=97=98?= <nalla@hamal.uberspace.de>
> >> 
> >> The `user-manual.txt` is designed as a `book` but the `Makefile`
> >> wants to build it as an `article`. This seems to be a problem when
> >> building the documentation with `asciidoctor`. Furthermore the parts
> >> *Git Glossary* and *Appendix B* had no subsections which is not
> >> allowed when building with `asciidoctor`. So lets add a *dummy*
> >> section.
> > 
> > The git-scm.com site uses asciidoctor, too, and I think I have seen
> > some oddness with the rendering though. So in general I am in favor of
> > making things work under both asciidoc and asciidoctor.
> 
> I am not familiar with both tools but it sounds to me as if
> "asciidoctor" is kind of the "lowest common denominator". Is this true?
> If yes, would it make sense to switch TravisCI [1] to asciidocter if
> this change gets merged?

It is true that asciidoc typically parses whatever asciidoctor parses,
but not vice versa.

In that light, I would love to see our Travis runs to switch to
asciidoctor.

For the record, this is my local config.mak in the asciidoctor worktree:

-- snip --
ASCIIDOC=asciidoctor
ASCIIDOC_HTML=html5
ASCIIDOC_DOCBOOK=docbook45
ASCIIDOC_EXTRA="-alitdd=&\#45;&\#45;"
ASCIIDOC_CONF=-I"/mingw64/lib/asciidoctor-extensions" -rman-inline-macro
-- snap --

Please note that the extensions are required to build correctly (and we
require this patch, too, unfortunately:
https://github.com/git-for-windows/MINGW-packages/blob/master/mingw-w64-asciidoctor-extensions/0001-man-inline-macro-enable-linkgit-syntax.patch).

Ciao,
Dscho

^ permalink raw reply

* Re: core.sshCommand and url.*.insteadOf for submodules
From: Stefan Beller @ 2017-01-05 13:53 UTC (permalink / raw)
  To: Stefan Schindler, Jacob Keller, Jeff King; +Cc: git@vger.kernel.org
In-Reply-To: <17f2724d-7001-203e-f0b5-cf586703a41a@boxbox.org>

On Thu, Jan 5, 2017 at 2:09 AM, Stefan Schindler <stsch@boxbox.org> wrote:
> Hello mailing list,
>
> it seems like that the `core.sshCommand` and `url.*.insteadOf`
> configuration settings do not apply to `git submodule update --init`
> (and probably related) calls.
>
> Is this intentional?

The original design of submodules was to have a submodule to be a
standalone repository, such that e.g. its options are read from its own
config file. So the original vision was to decouple the init and clone of the
submodule to allow the user to change the settings:

    git submodule init
    # copies the submodule.<name>.URL from .gitmodules to .git/config
    # user realizes that the URL is not a good idea, such that
    git  config submodule.<name>.url http://${company-mirror}/submodule
    # now the url is fixed so
    git submodule update

I guess it could be a good idea to propagate some settings from the
superproject to the submodules when they are cloned.

>
> My scenario is as follows: I use 2 SSH keys for GitHub, for private and
> work-related repositories. My default key is my private key. So when I
> clone a work repository and try getting the submodules, `git submodule
> update --init` fails. This is also the case when setting
> `core.sshCommand` and `url.*.insteadOf` (useful for substituting
> "github.com" by some ~/.ssh/config'ured host).
>

which is why e.g.
git config --global url.https://github.com/.insteadOf git://github.com/
is not your preferred way here.

There was some discussion a couple of weeks ago, which settings
should be kept when recursing into submodules, Jacob and Jeff cc'd.

> Greetings,
> Stefan Schindler

^ permalink raw reply

* Re: git branch -D doesn't work with deleted worktree
From: Stefan Beller @ 2017-01-05 14:02 UTC (permalink / raw)
  To: Roland Illig, Duy Nguyen; +Cc: git@vger.kernel.org
In-Reply-To: <4D106F0FF3D29E4FA1D91C1A31CE4C3501B8DEF2E6@email.novomind.com>

On Thu, Jan 5, 2017 at 2:06 AM, Roland Illig <rillig@novomind.com> wrote:
> Git 2.11.0 gives a wrong error message after the following commands:
>
> $ git init
> $ echo hello >file
> $ git add file
> $ git commit -m "message"
> $ git worktree add ../worktree
> $ rm -rf ../worktree
> $ git br -D worktree
> error: Cannot delete branch 'worktree' checked out at '../worktree'
>
> Since ../worktree has been deleted, there cannot be anything checked out at that location.
>
> In my opinion, deleting the branch should just work. Especially since I used the -D option and the "git worktree" documentation says "When you are done with a linked working tree you can simply delete it."
>
> Regards,
> Roland
>

^ permalink raw reply

* Regression: Ctrl-c from the pager in an alias exits it
From: Trygve Aaberge @ 2017-01-05 14:25 UTC (permalink / raw)
  To: git

I'm experiencing an issue when using aliases for commands that open the pager.
When I press Ctrl-c from the pager, it exits. This does not happen when I
don't use an alias and did not happen before. It causes problems because
Ctrl-c is also used for other things, such as canceling a search that hasn't
completed.

To reproduce, create e.g. the alias `l = log` and run `git l`. Then press
Ctrl-c. The expected behavior is that nothing happens. The actual behavior is
that the pager exits.

I bisected the repo, and found that the commit 86d26f240 [0] introduced the
issue.

[0]: 86d26f240 (setup.c: re-fix d95138e (setup: set env $GIT_WORK_TREE
                when .. - 2015-12-20)

-- 
Trygve Aaberge

^ permalink raw reply

* Re: [PATCH 2/4] t7610: make tests more independent and debuggable
From: Richard Hansen @ 2017-01-05 15:46 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git@vger.kernel.org, David Aguilar, Johannes Sixt
In-Reply-To: <CAGZ79kb9UvV6AaRLBP5OzyRtTTXarRZRZDMd_1k5n9CrgbVr5A@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1530 bytes --]



On 2017-01-04 15:27, Stefan Beller wrote:
> On Tue, Jan 3, 2017 at 4:50 PM, Richard Hansen <hansenr@google.com> wrote:
>> If a test fails it might leave the repository in a strange state.  Add
>> 'git reset --hard' at the beginning of each test to increase the odds
>> of passing when an earlier test fails.
>
> So each test is cleaning up the previous test, which *may* confuse
> a reader ("how is the reset --hard relevant for this test? Oooh it's
> just a cleanup").
>
> We could put it another way by having each test itself make clean
> up after itself via
>
>   test_when_finished "git reset --hard" &&
>   ..
>
> at the beginning of each test.
> This would produce the same order of operations, i.e. a
> reset run between each test, but semantically tells the reader
> that the reset is part of the current test cleaning up after itself,
> as "reset" is operation for this particular test to cleanup.
> Does that make sense?

I like that idea; thanks for the suggestion.  I'll cook up a reroll.

>>
>> Also use test-specific branches to avoid interfering with later tests
>> and to make the tests easier to debug.
>
> That sounds great!
> Though in the code I only spot one occurrence for
>
> +       git checkout -b test$test_count branch1 &&
>
> so maybe that could be part of the first patch in the series?

There are two; the other is buried in the change for the 'mergetool on 
file in parent dir' test.  I'll improve the commit message to make it 
more clear.

Thanks for the review,
Richard


>
> Thanks,
> Stefan

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4845 bytes --]

^ permalink raw reply

* Re: [PATCH 2/4] t7610: make tests more independent and debuggable
From: Richard Hansen @ 2017-01-05 15:48 UTC (permalink / raw)
  To: Simon Ruderich; +Cc: git, davvid, j6t
In-Reply-To: <20170105122037.t4k7cby6klqb4lci@ruderich.org>

[-- Attachment #1: Type: text/plain, Size: 793 bytes --]



On 2017-01-05 07:20, Simon Ruderich wrote:
> On Tue, Jan 03, 2017 at 07:50:40PM -0500, Richard Hansen wrote:
>> [snip]
>> @@ -145,8 +148,13 @@ test_expect_success 'mergetool in subdir' '
>>  '
>>
>>  test_expect_success 'mergetool on file in parent dir' '
>> +	git reset --hard &&
>> +	git checkout -b test$test_count branch1 &&
>> +	git submodule update -N &&
>>  	(
>>  		cd subdir &&
>> +		test_must_fail git merge master >/dev/null 2>&1 &&
>> +		( yes "" | git mergetool file3 >/dev/null 2>&1 ) &&
>
> This change seems unrelated to the changes mentioned in the
> commit message. Was it intended?

Yes, that is intentional; without this change, the test depends on the 
successful completion of the previous test.  I'll improve the commit 
message.

Thanks,
Richard

>
> Regards
> Simon

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4845 bytes --]

^ permalink raw reply

* Re: [PATCH 2/4] t7610: make tests more independent and debuggable
From: Richard Hansen @ 2017-01-05 15:41 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git@vger.kernel.org, David Aguilar, Johannes Sixt
In-Reply-To: <CAGZ79kb9UvV6AaRLBP5OzyRtTTXarRZRZDMd_1k5n9CrgbVr5A@mail.gmail.com>

On 2017-01-04 15:27, Stefan Beller wrote:
> On Tue, Jan 3, 2017 at 4:50 PM, Richard Hansen <hansenr@google.com> wrote:
>> If a test fails it might leave the repository in a strange state.  Add
>> 'git reset --hard' at the beginning of each test to increase the odds
>> of passing when an earlier test fails.
>
> So each test is cleaning up the previous test, which *may* confuse
> a reader ("how is the reset --hard relevant for this test? Oooh it's
> just a cleanup").
>
> We could put it another way by having each test itself make clean
> up after itself via
>
>   test_when_finished "git reset --hard" &&
>   ..
>
> at the beginning of each test.
> This would produce the same order of operations, i.e. a
> reset run between each test, but semantically tells the reader
> that the reset is part of the current test cleaning up after itself,
> as "reset" is operation for this particular test to cleanup.
> Does that make sense?

I like that idea; thanks for the suggestion.  I'll cook up a reroll.

>>
>> Also use test-specific branches to avoid interfering with later tests
>> and to make the tests easier to debug.
>
> That sounds great!
> Though in the code I only spot one occurrence for
>
> +       git checkout -b test$test_count branch1 &&
>
> so maybe that could be part of the first patch in the series?

There are two; the other is buried in the change for the 'mergetool on 
file in parent dir' test.

Thanks for the review,
Richard


>
> Thanks,
> Stefan


^ permalink raw reply

* Re: core.sshCommand and url.*.insteadOf for submodules
From: Jeff King @ 2017-01-05 17:02 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Stefan Schindler, Jacob Keller, git@vger.kernel.org
In-Reply-To: <CAGZ79kb8TTaJBmVCWK3jnr4RvGjmfmsj3-ieT87wzyFLYi5frQ@mail.gmail.com>

On Thu, Jan 05, 2017 at 05:53:30AM -0800, Stefan Beller wrote:

> > My scenario is as follows: I use 2 SSH keys for GitHub, for private and
> > work-related repositories. My default key is my private key. So when I
> > clone a work repository and try getting the submodules, `git submodule
> > update --init` fails. This is also the case when setting
> > `core.sshCommand` and `url.*.insteadOf` (useful for substituting
> > "github.com" by some ~/.ssh/config'ured host).
> 
> which is why e.g.
> git config --global url.https://github.com/.insteadOf git://github.com/
> is not your preferred way here.
> 
> There was some discussion a couple of weeks ago, which settings
> should be kept when recursing into submodules, Jacob and Jeff cc'd.

The only discussion I recall was from last May. But that was about "-c"
config on the command-line, and the end decision was that we pass it all
down to submodules, per 89044baa8b (submodule: stop sanitizing config
options, 2016-05-04).

I think the problem here is more about propagating options from the
superproject's repo-level config into the submodules. AFAIK we do not do
that at all, but I may have missed some patches in that area.

Another approach would be conditional config includes based on the repo
path. With the patches discussed in [1], you could do something like:

  git config --global include./path/to/work/repos.path .gitconfig-work
  git config -f ~/.gitconfig-work url.foo.insteadOf bar

-Peff

[1] http://public-inbox.org/git/20160626070617.30211-1-pclouds@gmail.com/

^ permalink raw reply

* Re: core.sshCommand and url.*.insteadOf for submodules
From: Stefan Beller @ 2017-01-05 17:30 UTC (permalink / raw)
  To: Jeff King; +Cc: Stefan Schindler, Jacob Keller, git@vger.kernel.org
In-Reply-To: <20170105170214.6fmsbj6ltbmpvcfb@sigill.intra.peff.net>

On Thu, Jan 5, 2017 at 9:02 AM, Jeff King <peff@peff.net> wrote:
> On Thu, Jan 05, 2017 at 05:53:30AM -0800, Stefan Beller wrote:
>
>> > My scenario is as follows: I use 2 SSH keys for GitHub, for private and
>> > work-related repositories. My default key is my private key. So when I
>> > clone a work repository and try getting the submodules, `git submodule
>> > update --init` fails. This is also the case when setting
>> > `core.sshCommand` and `url.*.insteadOf` (useful for substituting
>> > "github.com" by some ~/.ssh/config'ured host).
>>
>> which is why e.g.
>> git config --global url.https://github.com/.insteadOf git://github.com/
>> is not your preferred way here.
>>
>> There was some discussion a couple of weeks ago, which settings
>> should be kept when recursing into submodules, Jacob and Jeff cc'd.
>
> The only discussion I recall was from last May. But that was about "-c"
> config on the command-line, and the end decision was that we pass it all
> down to submodules, per 89044baa8b (submodule: stop sanitizing config
> options, 2016-05-04).

Oh, yeah that was the difference.

>
> I think the problem here is more about propagating options from the
> superproject's repo-level config into the submodules. AFAIK we do not do
> that at all, but I may have missed some patches in that area.

AFAIK there were no such patches yet.

>
> Another approach would be conditional config includes based on the repo
> path. With the patches discussed in [1], you could do something like:
>
>   git config --global include./path/to/work/repos.path .gitconfig-work
>   git config -f ~/.gitconfig-work url.foo.insteadOf bar

Or maybe we could specialize these patches to allow
includes from specific other repos, i.e. superproject(s) or worktrees.

>
> -Peff
>
> [1] http://public-inbox.org/git/20160626070617.30211-1-pclouds@gmail.com/

^ permalink raw reply

* Re: [PATCH 1/2] asciidoctor: fix user-manual to be built by `asciidoctor`
From: Jeff King @ 2017-01-05 16:45 UTC (permalink / raw)
  To: Lars Schneider
  Cc: Johannes Schindelin, git, 마누엘,
	Junio C Hamano
In-Reply-To: <1D7C66EA-E87A-4154-ACAC-8045D28477D2@gmail.com>

On Thu, Jan 05, 2017 at 11:05:29AM +0100, Lars Schneider wrote:

> > The git-scm.com site uses asciidoctor, too, and I think I have seen some
> > oddness with the rendering though. So in general I am in favor of making
> > things work under both asciidoc and asciidoctor.
> 
> I am not familiar with both tools but it sounds to me as if "asciidoctor"
> is kind of the "lowest common denominator". Is this true? If yes, would it
> make sense to switch TravisCI [1] to asciidocter if this change gets merged?

I don't think that's quite true.

The two programs produce different output for certain inputs. We tend to
see the cases where asciidoc produces the desired output and asciidoctor
doesn't, because traditionally the documentation was written _for_
asciidoc. So whenever asciidoctor diverges, it looks like a bug.

If people start developing and checking their work using asciidoctor[1],
then we'll see bugs going in the other direction.

As far as CI goes, I am not altogether convinced of the usefulness of
building the documentation. It's very expensive, and the failure mode is
rarely "whoops, running `make doc` failed". It's almost always that the
output looks subtly wrong, but that's very hard to check automatically.

[1] I think we've also traditionally considered asciidoc to be the
    definitive toolchain, and people using asciidoctor are free to
    submit patches to make things work correctly in both places. I'm not
    opposed to changing that attitude, as it seems like asciidoctor is
    faster and more actively maintained these days. But I suspect our
    build chain would need some improvements. Last time I tried building
    with AsciiDoctor it involved a lot manual tweaking of Makefile
    variables. It sounds like Dscho is doing it regularly, though. It
    should probably work out of the box (with something like
    USE_ASCIIDOCTOR=Yes) if we expect people to actually rely on it.

^ permalink raw reply

* [PATCHv6 2/2] pathspec: give better message for submodule related pathspec error
From: Stefan Beller @ 2017-01-05 19:29 UTC (permalink / raw)
  To: bmwill, peff, gitster; +Cc: git, Stefan Beller
In-Reply-To: <20170105192904.1107-1-sbeller@google.com>

Every once in a while someone complains to the mailing list to have
run into this weird assertion[1]. The usual response from the mailing
list is link to old discussions[2], and acknowledging the problem
stating it is known.

This patch accomplishes two things:

  1. Switch assert() to die("BUG") to give a more readable message.

  2. Take one of the cases where we hit a BUG and turn it into a normal
     "there was something wrong with the input" message.

     This assertion triggered for cases where there wasn't a programming
     bug, but just bogus input. In particular, if the user asks for a
     pathspec that is inside a submodule, we shouldn't assert() or
     die("BUG"); we should tell the user their request is bogus.

     The only reason we did not check for it, is the expensive nature
     of such a check, so callers avoid setting the flag
     PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE. However when we die due
     to bogus input, the expense of CPU cycles spent outweighs the user
     wondering what went wrong, so run that check unconditionally before
     dying with a more generic error message.

Note: There is a case (e.g. "git -C submodule add .") in which we call
strip_submodule_slash_expensive, as git-add requests it via the flag
PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE, but the assert used to
trigger nevertheless, because the flag PATHSPEC_LITERAL was not set,
such that we executed

	if (item->nowildcard_len < prefixlen)
		item->nowildcard_len = prefixlen;

and prefixlen was not adapted (e.g. it was computed from "submodule/")
So in the die_inside_submodule_path function we also need handle paths,
that were stripped before, i.e. are the exact submodule path. This
is why the conditions in die_inside_submodule_path are slightly
different than in strip_submodule_slash_expensive.

[1] https://www.google.com/search?q=item-%3Enowildcard_len
[2] http://git.661346.n2.nabble.com/assert-failed-in-submodule-edge-case-td7628687.html
    https://www.spinics.net/lists/git/msg249473.html

Helped-by: Jeff King <peff@peff.net>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
---
 pathspec.c                       | 35 +++++++++++++++++++++++++++++++++--
 t/t6134-pathspec-in-submodule.sh | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+), 2 deletions(-)
 create mode 100755 t/t6134-pathspec-in-submodule.sh

diff --git a/pathspec.c b/pathspec.c
index d4efcf6662..42cd83c235 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -296,6 +296,27 @@ static void strip_submodule_slash_expensive(struct pathspec_item *item)
 	}
 }
 
+static void die_inside_submodule_path(struct pathspec_item *item)
+{
+	int i;
+
+	for (i = 0; i < active_nr; i++) {
+		struct cache_entry *ce = active_cache[i];
+		int ce_len = ce_namelen(ce);
+
+		if (!S_ISGITLINK(ce->ce_mode))
+			continue;
+
+		if (item->len < ce_len ||
+		    !(item->match[ce_len] == '/' || item->match[ce_len] == '\0') ||
+		    memcmp(ce->name, item->match, ce_len))
+			continue;
+
+		die(_("Pathspec '%s' is in submodule '%.*s'"),
+		    item->original, ce_len, ce->name);
+	}
+}
+
 /*
  * Perform the initialization of a pathspec_item based on a pathspec element.
  */
@@ -391,8 +412,18 @@ static void init_pathspec_item(struct pathspec_item *item, unsigned flags,
 	}
 
 	/* sanity checks, pathspec matchers assume these are sane */
-	assert(item->nowildcard_len <= item->len &&
-	       item->prefix         <= item->len);
+	if (item->nowildcard_len > item->len ||
+	    item->prefix         > item->len) {
+		/*
+		 * This case can be triggered by the user pointing us to a
+		 * pathspec inside a submodule, which is an input error.
+		 * Detect that here and complain, but fallback in the
+		 * non-submodule case to a BUG, as we have no idea what
+		 * would trigger that.
+		 */
+		die_inside_submodule_path(item);
+		die ("BUG: item->nowildcard_len > item->len || item->prefix > item->len)");
+	}
 }
 
 static int pathspec_item_cmp(const void *a_, const void *b_)
diff --git a/t/t6134-pathspec-in-submodule.sh b/t/t6134-pathspec-in-submodule.sh
new file mode 100755
index 0000000000..2900d8d06e
--- /dev/null
+++ b/t/t6134-pathspec-in-submodule.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+test_description='test case exclude pathspec'
+
+TEST_CREATE_SUBMODULE=yes
+. ./test-lib.sh
+
+test_expect_success 'setup a submodule' '
+	git submodule add ./pretzel.bare sub &&
+	git commit -a -m "add submodule" &&
+	git submodule deinit --all
+'
+
+cat <<EOF >expect
+fatal: Pathspec 'sub/a' is in submodule 'sub'
+EOF
+
+test_expect_success 'error message for path inside submodule' '
+	echo a >sub/a &&
+	test_must_fail git add sub/a 2>actual &&
+	test_cmp expect actual
+'
+
+cat <<EOF >expect
+fatal: Pathspec '.' is in submodule 'sub'
+EOF
+
+test_expect_success 'error message for path inside submodule from within submodule' '
+	test_must_fail git -C sub add . 2>actual &&
+	test_cmp expect actual
+'
+
+test_done
-- 
2.11.0.31.g919a8d0.dirty


^ permalink raw reply related

* [PATCHv6 1/2] submodule tests: don't use itself as a submodule
From: Stefan Beller @ 2017-01-05 19:29 UTC (permalink / raw)
  To: bmwill, peff, gitster; +Cc: git, Stefan Beller
In-Reply-To: <20170105192904.1107-1-sbeller@google.com>

In reality nobody would run "git submodule add ./. <submodule-path>"
to add the repository to itself as a submodule as this comes with some
nasty surprises, such as infinite recursion when cloning that repository.
However we do this all the time in the test suite, because most of the
time this was the most convenient way to test a very specific thing
for submodule behavior.

This provides an easier way to have submodules in tests, by just setting
TEST_CREATE_SUBMODULE to a non empty string, similar to
TEST_NO_CREATE_REPO.

Make use of it in those tests that add a submodule from ./. except for
the occurrence in create_lib_submodule_repo as there it seems we craft
a repository deliberately for both inside as well as outside use.

The name "pretzel.[non]bare" was chosen deliberate to not introduce
more strings to the test suite containing "sub[module]" as searching for
"sub" already yields a lot of hits from different contexts. "pretzel"
doesn't occur in the test suite yet, so it is a good candidate for
a potential remote for a submodule.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 t/lib-submodule-update.sh |  2 ++
 t/t7001-mv.sh             |  5 +++--
 t/t7507-commit-verbose.sh |  4 +++-
 t/t7800-difftool.sh       |  4 +++-
 t/test-lib-functions.sh   | 16 ++++++++++++++++
 5 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/t/lib-submodule-update.sh b/t/lib-submodule-update.sh
index 79cdd34a54..58d76d9df8 100755
--- a/t/lib-submodule-update.sh
+++ b/t/lib-submodule-update.sh
@@ -44,6 +44,8 @@ create_lib_submodule_repo () {
 		git branch "no_submodule" &&
 
 		git checkout -b "add_sub1" &&
+		# Adding the repo itself as a submodule is a hack.
+		# Do not imitate this.
 		git submodule add ./. sub1 &&
 		git config -f .gitmodules submodule.sub1.ignore all &&
 		git config submodule.sub1.ignore all &&
diff --git a/t/t7001-mv.sh b/t/t7001-mv.sh
index e365d1ff77..6cb32f3a3a 100755
--- a/t/t7001-mv.sh
+++ b/t/t7001-mv.sh
@@ -1,6 +1,7 @@
 #!/bin/sh
 
 test_description='git mv in subdirs'
+TEST_CREATE_SUBMODULE=yes
 . ./test-lib.sh
 
 test_expect_success \
@@ -288,12 +289,12 @@ rm -f moved symlink
 test_expect_success 'setup submodule' '
 	git commit -m initial &&
 	git reset --hard &&
-	git submodule add ./. sub &&
+	git submodule add ./pretzel.bare sub &&
 	echo content >file &&
 	git add file &&
 	git commit -m "added sub and file" &&
 	mkdir -p deep/directory/hierarchy &&
-	git submodule add ./. deep/directory/hierarchy/sub &&
+	git submodule add ./pretzel.bare deep/directory/hierarchy/sub &&
 	git commit -m "added another submodule" &&
 	git branch submodule
 '
diff --git a/t/t7507-commit-verbose.sh b/t/t7507-commit-verbose.sh
index ed2653d46f..d269900afa 100755
--- a/t/t7507-commit-verbose.sh
+++ b/t/t7507-commit-verbose.sh
@@ -1,6 +1,7 @@
 #!/bin/sh
 
 test_description='verbose commit template'
+TEST_CREATE_SUBMODULE=yes
 . ./test-lib.sh
 
 write_script "check-for-diff" <<\EOF &&
@@ -74,11 +75,12 @@ test_expect_success 'diff in message is retained with -v' '
 
 test_expect_success 'submodule log is stripped out too with -v' '
 	git config diff.submodule log &&
-	git submodule add ./. sub &&
+	git submodule add ./pretzel.bare sub &&
 	git commit -m "sub added" &&
 	(
 		cd sub &&
 		echo "more" >>file &&
+		git add file &&
 		git commit -a -m "submodule commit"
 	) &&
 	(
diff --git a/t/t7800-difftool.sh b/t/t7800-difftool.sh
index 70a2de461a..d13a5d0453 100755
--- a/t/t7800-difftool.sh
+++ b/t/t7800-difftool.sh
@@ -7,6 +7,7 @@ test_description='git-difftool
 
 Testing basic diff tool invocation
 '
+TEST_CREATE_SUBMODULE=Yes
 
 . ./test-lib.sh
 
@@ -534,7 +535,8 @@ test_expect_success PERL 'difftool --no-symlinks detects conflict ' '
 '
 
 test_expect_success PERL 'difftool properly honors gitlink and core.worktree' '
-	git submodule add ./. submod/ule &&
+	git submodule add ./pretzel.bare submod/ule &&
+	test_commit -C submod/ule second_commit &&
 	test_config -C submod/ule diff.tool checktrees &&
 	test_config -C submod/ule difftool.checktrees.cmd '\''
 		test -d "$LOCAL" && test -d "$REMOTE" && echo good
diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 579e812506..aa327a7dff 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -800,6 +800,22 @@ test_create_repo () {
 		error "cannot run git init -- have you built things yet?"
 		mv .git/hooks .git/hooks-disabled
 	) || exit
+	if test -n "$TEST_CREATE_SUBMODULE"
+	then
+		(
+			cd "$repo"
+			TEST_CREATE_SUBMODULE=
+			export TEST_CREATE_SUBMODULE
+			test_create_repo "pretzel.nonbare"
+			test_commit -C "pretzel.nonbare" \
+				"create submodule" "submodule-file" \
+				"submodule-content" "submodule-tag" >&3 2>&4 ||
+				error "cannot run test_commit"
+			git clone --bare "pretzel.nonbare" \
+				  "pretzel.bare" >&3 2>&4 ||
+				  error "cannot clone into bare"
+		)
+	fi
 }
 
 # This function helps on symlink challenged file systems when it is not
-- 
2.11.0.31.g919a8d0.dirty


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox