Git development
 help / color / mirror / Atom feed
* [PATCH v5 15/16] pathspec: small readability changes
From: Brandon Williams @ 2017-01-04 18:04 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170104180411.150000-1-bmwill@google.com>

A few small changes to improve readability.  This is done by grouping related
assignments, adding blank lines, ensuring lines are <80 characters, and
adding additional comments.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 pathspec.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index f3a7a1d33..e53530e7a 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -67,11 +67,11 @@ static struct pathspec_magic {
 	char mnemonic; /* this cannot be ':'! */
 	const char *name;
 } pathspec_magic[] = {
-	{ PATHSPEC_FROMTOP, '/', "top" },
-	{ PATHSPEC_LITERAL,   0, "literal" },
-	{ PATHSPEC_GLOB,   '\0', "glob" },
-	{ PATHSPEC_ICASE,  '\0', "icase" },
-	{ PATHSPEC_EXCLUDE, '!', "exclude" },
+	{ PATHSPEC_FROMTOP,  '/', "top" },
+	{ PATHSPEC_LITERAL, '\0', "literal" },
+	{ PATHSPEC_GLOB,    '\0', "glob" },
+	{ PATHSPEC_ICASE,   '\0', "icase" },
+	{ PATHSPEC_EXCLUDE,  '!', "exclude" },
 };
 
 static void prefix_magic(struct strbuf *sb, int prefixlen, unsigned magic)
@@ -336,6 +336,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 	if ((magic & PATHSPEC_LITERAL) && (magic & PATHSPEC_GLOB))
 		die(_("%s: 'literal' and 'glob' are incompatible"), elt);
 
+	/* Create match string which will be used for pathspec matching */
 	if (pathspec_prefix >= 0) {
 		match = xstrdup(copyfrom);
 		prefixlen = pathspec_prefix;
@@ -343,11 +344,16 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 		match = xstrdup(copyfrom);
 		prefixlen = 0;
 	} else {
-		match = prefix_path_gently(prefix, prefixlen, &prefixlen, copyfrom);
+		match = prefix_path_gently(prefix, prefixlen,
+					   &prefixlen, copyfrom);
 		if (!match)
 			die(_("%s: '%s' is outside repository"), elt, copyfrom);
 	}
+
 	item->match = match;
+	item->len = strlen(item->match);
+	item->prefix = prefixlen;
+
 	/*
 	 * Prefix the pathspec (keep all magic) and assign to
 	 * original. Useful for passing to another command.
@@ -364,8 +370,6 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 	} else {
 		item->original = xstrdup(elt);
 	}
-	item->len = strlen(item->match);
-	item->prefix = prefixlen;
 
 	if (flags & PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP)
 		strip_submodule_slash_cheap(item);
@@ -373,13 +377,14 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 	if (flags & PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE)
 		strip_submodule_slash_expensive(item);
 
-	if (magic & PATHSPEC_LITERAL)
+	if (magic & PATHSPEC_LITERAL) {
 		item->nowildcard_len = item->len;
-	else {
+	} else {
 		item->nowildcard_len = simple_length(item->match);
 		if (item->nowildcard_len < prefixlen)
 			item->nowildcard_len = prefixlen;
 	}
+
 	item->flags = 0;
 	if (magic & PATHSPEC_GLOB) {
 		/*
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* [PATCH v5 13/16] pathspec: create parse_element_magic helper
From: Brandon Williams @ 2017-01-04 18:04 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170104180411.150000-1-bmwill@google.com>

Factor out the logic responsible for the magic in a pathspec element
into its own function.

Also avoid calling into the parsing functions when
`PATHSPEC_LITERAL_PATH` is specified since it causes magic to be
ignored and all paths to be treated as literals.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 pathspec.c | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index f6356bde1..00fcae4e1 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -245,6 +245,19 @@ static const char *parse_short_magic(unsigned *magic, const char *elem)
 	return pos;
 }
 
+static const char *parse_element_magic(unsigned *magic, int *prefix_len,
+				       const char *elem)
+{
+	if (elem[0] != ':' || get_literal_global())
+		return elem; /* nothing to do */
+	else if (elem[1] == '(')
+		/* longhand */
+		return parse_long_magic(magic, prefix_len, elem);
+	else
+		/* shorthand */
+		return parse_short_magic(magic, elem);
+}
+
 /*
  * Take an element of a pathspec and check for magic signatures.
  * Append the result to the prefix. Return the magic bitmap.
@@ -267,26 +280,16 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 	char *match;
 	int i, pathspec_prefix = -1;
 
-	if (elt[0] != ':' || get_literal_global() ||
-	    (flags & PATHSPEC_LITERAL_PATH)) {
-		; /* nothing to do */
-	} else if (elt[1] == '(') {
-		/* longhand */
-		copyfrom = parse_long_magic(&element_magic,
-					    &pathspec_prefix,
-					    elt);
-	} else {
-		/* shorthand */
-		copyfrom = parse_short_magic(&element_magic, elt);
-	}
-
-	magic |= element_magic;
-
 	/* PATHSPEC_LITERAL_PATH ignores magic */
-	if (flags & PATHSPEC_LITERAL_PATH)
+	if (flags & PATHSPEC_LITERAL_PATH) {
 		magic = PATHSPEC_LITERAL;
-	else
+	} else {
+		copyfrom = parse_element_magic(&element_magic,
+					       &pathspec_prefix,
+					       elt);
+		magic |= element_magic;
 		magic |= get_global_magic(element_magic);
+	}
 
 	if (pathspec_prefix >= 0 &&
 	    (prefixlen || (prefix && *prefix)))
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* [PATCH v5 12/16] pathspec: create parse_long_magic function
From: Brandon Williams @ 2017-01-04 18:04 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170104180411.150000-1-bmwill@google.com>

Factor out the logic responsible for parsing long magic into its own
function.  As well as hoist the prefix check logic outside of the inner
loop as there isn't anything that needs to be done after matching
"prefix:".

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 pathspec.c | 92 ++++++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 57 insertions(+), 35 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index 1b0901848..f6356bde1 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -157,6 +157,60 @@ static int get_global_magic(int element_magic)
 }
 
 /*
+ * Parse the pathspec element looking for long magic
+ *
+ * saves all magic in 'magic'
+ * if prefix magic is used, save the prefix length in 'prefix_len'
+ * returns the position in 'elem' after all magic has been parsed
+ */
+static const char *parse_long_magic(unsigned *magic, int *prefix_len,
+				    const char *elem)
+{
+	const char *pos;
+	const char *nextat;
+
+	for (pos = elem + 2; *pos && *pos != ')'; pos = nextat) {
+		size_t len = strcspn(pos, ",)");
+		int i;
+
+		if (pos[len] == ',')
+			nextat = pos + len + 1; /* handle ',' */
+		else
+			nextat = pos + len; /* handle ')' and '\0' */
+
+		if (!len)
+			continue;
+
+		if (starts_with(pos, "prefix:")) {
+			char *endptr;
+			*prefix_len = strtol(pos + 7, &endptr, 10);
+			if (endptr - pos != len)
+				die(_("invalid parameter for pathspec magic 'prefix'"));
+			continue;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
+			if (strlen(pathspec_magic[i].name) == len &&
+			    !strncmp(pathspec_magic[i].name, pos, len)) {
+				*magic |= pathspec_magic[i].bit;
+				break;
+			}
+		}
+
+		if (ARRAY_SIZE(pathspec_magic) <= i)
+			die(_("Invalid pathspec magic '%.*s' in '%s'"),
+			    (int) len, pos, elem);
+	}
+
+	if (*pos != ')')
+		die(_("Missing ')' at the end of pathspec magic in '%s'"),
+		    elem);
+	pos++;
+
+	return pos;
+}
+
+/*
  * Parse the pathspec element looking for short magic
  *
  * saves all magic in 'magic'
@@ -218,41 +272,9 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 		; /* nothing to do */
 	} else if (elt[1] == '(') {
 		/* longhand */
-		const char *nextat;
-		for (copyfrom = elt + 2;
-		     *copyfrom && *copyfrom != ')';
-		     copyfrom = nextat) {
-			size_t len = strcspn(copyfrom, ",)");
-			if (copyfrom[len] == ',')
-				nextat = copyfrom + len + 1;
-			else
-				/* handle ')' and '\0' */
-				nextat = copyfrom + len;
-			if (!len)
-				continue;
-			for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
-				if (strlen(pathspec_magic[i].name) == len &&
-				    !strncmp(pathspec_magic[i].name, copyfrom, len)) {
-					element_magic |= pathspec_magic[i].bit;
-					break;
-				}
-				if (starts_with(copyfrom, "prefix:")) {
-					char *endptr;
-					pathspec_prefix = strtol(copyfrom + 7,
-								 &endptr, 10);
-					if (endptr - copyfrom != len)
-						die(_("invalid parameter for pathspec magic 'prefix'"));
-					/* "i" would be wrong, but it does not matter */
-					break;
-				}
-			}
-			if (ARRAY_SIZE(pathspec_magic) <= i)
-				die(_("Invalid pathspec magic '%.*s' in '%s'"),
-				    (int) len, copyfrom, elt);
-		}
-		if (*copyfrom != ')')
-			die(_("Missing ')' at the end of pathspec magic in '%s'"), elt);
-		copyfrom++;
+		copyfrom = parse_long_magic(&element_magic,
+					    &pathspec_prefix,
+					    elt);
 	} else {
 		/* shorthand */
 		copyfrom = parse_short_magic(&element_magic, elt);
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* [PATCH v5 10/16] pathspec: factor global magic into its own function
From: Brandon Williams @ 2017-01-04 18:04 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170104180411.150000-1-bmwill@google.com>

Create helper functions to read the global magic environment variables
in additon to factoring out the global magic gathering logic into its
own function.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 pathspec.c | 127 +++++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 78 insertions(+), 49 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index af7f2d01d..77df55da6 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -87,6 +87,75 @@ static void prefix_magic(struct strbuf *sb, int prefixlen, unsigned magic)
 	strbuf_addf(sb, ",prefix:%d)", prefixlen);
 }
 
+static inline int get_literal_global(void)
+{
+	static int literal = -1;
+
+	if (literal < 0)
+		literal = git_env_bool(GIT_LITERAL_PATHSPECS_ENVIRONMENT, 0);
+
+	return literal;
+}
+
+static inline int get_glob_global(void)
+{
+	static int glob = -1;
+
+	if (glob < 0)
+		glob = git_env_bool(GIT_GLOB_PATHSPECS_ENVIRONMENT, 0);
+
+	return glob;
+}
+
+static inline int get_noglob_global(void)
+{
+	static int noglob = -1;
+
+	if (noglob < 0)
+		noglob = git_env_bool(GIT_NOGLOB_PATHSPECS_ENVIRONMENT, 0);
+
+	return noglob;
+}
+
+static inline int get_icase_global(void)
+{
+	static int icase = -1;
+
+	if (icase < 0)
+		icase = git_env_bool(GIT_ICASE_PATHSPECS_ENVIRONMENT, 0);
+
+	return icase;
+}
+
+static int get_global_magic(int element_magic)
+{
+	int global_magic = 0;
+
+	if (get_literal_global())
+		global_magic |= PATHSPEC_LITERAL;
+
+	/* --glob-pathspec is overridden by :(literal) */
+	if (get_glob_global() && !(element_magic & PATHSPEC_LITERAL))
+		global_magic |= PATHSPEC_GLOB;
+
+	if (get_glob_global() && get_noglob_global())
+		die(_("global 'glob' and 'noglob' pathspec settings are incompatible"));
+
+	if (get_icase_global())
+		global_magic |= PATHSPEC_ICASE;
+
+	if ((global_magic & PATHSPEC_LITERAL) &&
+	    (global_magic & ~PATHSPEC_LITERAL))
+		die(_("global 'literal' pathspec setting is incompatible "
+		      "with all other global pathspec settings"));
+
+	/* --noglob-pathspec adds :(literal) _unless_ :(glob) is specified */
+	if (get_noglob_global() && !(element_magic & PATHSPEC_GLOB))
+		global_magic |= PATHSPEC_LITERAL;
+
+	return global_magic;
+}
+
 /*
  * Take an element of a pathspec and check for magic signatures.
  * Append the result to the prefix. Return the magic bitmap.
@@ -104,46 +173,12 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 				const char *prefix, int prefixlen,
 				const char *elt)
 {
-	static int literal_global = -1;
-	static int glob_global = -1;
-	static int noglob_global = -1;
-	static int icase_global = -1;
-	unsigned magic = 0, element_magic = 0, global_magic = 0;
+	unsigned magic = 0, element_magic = 0;
 	const char *copyfrom = elt;
 	char *match;
 	int i, pathspec_prefix = -1;
 
-	if (literal_global < 0)
-		literal_global = git_env_bool(GIT_LITERAL_PATHSPECS_ENVIRONMENT, 0);
-	if (literal_global)
-		global_magic |= PATHSPEC_LITERAL;
-
-	if (glob_global < 0)
-		glob_global = git_env_bool(GIT_GLOB_PATHSPECS_ENVIRONMENT, 0);
-	if (glob_global)
-		global_magic |= PATHSPEC_GLOB;
-
-	if (noglob_global < 0)
-		noglob_global = git_env_bool(GIT_NOGLOB_PATHSPECS_ENVIRONMENT, 0);
-
-	if (glob_global && noglob_global)
-		die(_("global 'glob' and 'noglob' pathspec settings are incompatible"));
-
-
-	if (icase_global < 0)
-		icase_global = git_env_bool(GIT_ICASE_PATHSPECS_ENVIRONMENT, 0);
-	if (icase_global)
-		global_magic |= PATHSPEC_ICASE;
-
-	if ((global_magic & PATHSPEC_LITERAL) &&
-	    (global_magic & ~PATHSPEC_LITERAL))
-		die(_("global 'literal' pathspec setting is incompatible "
-		      "with all other global pathspec settings"));
-
-	if (flags & PATHSPEC_LITERAL_PATH)
-		global_magic = 0;
-
-	if (elt[0] != ':' || literal_global ||
+	if (elt[0] != ':' || get_literal_global() ||
 	    (flags & PATHSPEC_LITERAL_PATH)) {
 		; /* nothing to do */
 	} else if (elt[1] == '(') {
@@ -207,15 +242,11 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 
 	magic |= element_magic;
 
-	/* --noglob-pathspec adds :(literal) _unless_ :(glob) is specified */
-	if (noglob_global && !(magic & PATHSPEC_GLOB))
-		global_magic |= PATHSPEC_LITERAL;
-
-	/* --glob-pathspec is overridden by :(literal) */
-	if ((global_magic & PATHSPEC_GLOB) && (magic & PATHSPEC_LITERAL))
-		global_magic &= ~PATHSPEC_GLOB;
-
-	magic |= global_magic;
+	/* PATHSPEC_LITERAL_PATH ignores magic */
+	if (flags & PATHSPEC_LITERAL_PATH)
+		magic = PATHSPEC_LITERAL;
+	else
+		magic |= get_global_magic(element_magic);
 
 	if (pathspec_prefix >= 0 &&
 	    (prefixlen || (prefix && *prefix)))
@@ -241,7 +272,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 	 * original. Useful for passing to another command.
 	 */
 	if ((flags & PATHSPEC_PREFIX_ORIGIN) &&
-	    prefixlen && !literal_global) {
+	    prefixlen && !get_literal_global()) {
 		struct strbuf sb = STRBUF_INIT;
 
 		/* Preserve the actual prefix length of each pattern */
@@ -408,9 +439,7 @@ void parse_pathspec(struct pathspec *pathspec,
 
 		item[i].magic = prefix_pathspec(item + i, flags,
 						prefix, prefixlen, entry);
-		if ((flags & PATHSPEC_LITERAL_PATH) &&
-		    !(magic_mask & PATHSPEC_LITERAL))
-			item[i].magic |= PATHSPEC_LITERAL;
+
 		if (item[i].magic & PATHSPEC_EXCLUDE)
 			nr_exclude++;
 		if (item[i].magic & magic_mask)
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* [PATCH v5 08/16] pathspec: always show mnemonic and name in unsupported_magic
From: Brandon Williams @ 2017-01-04 18:04 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170104180411.150000-1-bmwill@google.com>

For better clarity, always show the mnemonic and name of the unsupported
magic being used.  This lets users have a more clear understanding of
what magic feature isn't supported.  And if they supplied a mnemonic,
the user will be told what its corresponding name is which will allow
them to more easily search the man pages for that magic type.

This also avoids passing an extra parameter around the pathspec
initialization code.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 pathspec.c | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index b9a3819d6..5df364bc6 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -101,9 +101,7 @@ static void prefix_short_magic(struct strbuf *sb, int prefixlen,
  * the prefix part must always match literally, and a single stupid
  * string cannot express such a case.
  */
-static unsigned prefix_pathspec(struct pathspec_item *item,
-				unsigned *p_short_magic,
-				unsigned flags,
+static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 				const char *prefix, int prefixlen,
 				const char *elt)
 {
@@ -210,7 +208,6 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
 	}
 
 	magic |= short_magic;
-	*p_short_magic = short_magic;
 
 	/* --noglob-pathspec adds :(literal) _unless_ :(glob) is specified */
 	if (noglob_global && !(magic & PATHSPEC_GLOB))
@@ -329,8 +326,7 @@ static int pathspec_item_cmp(const void *a_, const void *b_)
 }
 
 static void NORETURN unsupported_magic(const char *pattern,
-				       unsigned magic,
-				       unsigned short_magic)
+				       unsigned magic)
 {
 	struct strbuf sb = STRBUF_INIT;
 	int i;
@@ -339,9 +335,11 @@ static void NORETURN unsupported_magic(const char *pattern,
 		if (!(magic & m->bit))
 			continue;
 		if (sb.len)
-			strbuf_addch(&sb, ' ');
-		if (short_magic & m->bit)
-			strbuf_addf(&sb, "'%c'", m->mnemonic);
+			strbuf_addstr(&sb, ", ");
+
+		if (m->mnemonic)
+			strbuf_addf(&sb, _("'%s' (mnemonic: '%c')"),
+				    m->name, m->mnemonic);
 		else
 			strbuf_addf(&sb, "'%s'", m->name);
 	}
@@ -413,11 +411,9 @@ void parse_pathspec(struct pathspec *pathspec,
 	prefixlen = prefix ? strlen(prefix) : 0;
 
 	for (i = 0; i < n; i++) {
-		unsigned short_magic;
 		entry = argv[i];
 
-		item[i].magic = prefix_pathspec(item + i, &short_magic,
-						flags,
+		item[i].magic = prefix_pathspec(item + i, flags,
 						prefix, prefixlen, entry);
 		if ((flags & PATHSPEC_LITERAL_PATH) &&
 		    !(magic_mask & PATHSPEC_LITERAL))
@@ -425,9 +421,7 @@ void parse_pathspec(struct pathspec *pathspec,
 		if (item[i].magic & PATHSPEC_EXCLUDE)
 			nr_exclude++;
 		if (item[i].magic & magic_mask)
-			unsupported_magic(entry,
-					  item[i].magic & magic_mask,
-					  short_magic);
+			unsupported_magic(entry, item[i].magic & magic_mask);
 
 		if ((flags & PATHSPEC_SYMLINK_LEADING_PATH) &&
 		    has_symlink_leading_path(item[i].match, item[i].len)) {
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* [PATCH v5 09/16] pathspec: simpler logic to prefix original pathspec elements
From: Brandon Williams @ 2017-01-04 18:04 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170104180411.150000-1-bmwill@google.com>

The logic used to prefix an original pathspec element with 'prefix'
magic is more general purpose and can be used for more than just short
magic.  Remove the extra code paths and rename 'prefix_short_magic' to
'prefix_magic' to better indicate that it can be used in more general
situations.

Also, slightly change the logic which decides when to prefix the
original element in order to prevent a pathspec of "." from getting
converted to "" (empty string).

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 pathspec.c | 33 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index 5df364bc6..af7f2d01d 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -74,13 +74,12 @@ static struct pathspec_magic {
 	{ PATHSPEC_EXCLUDE, '!', "exclude" },
 };
 
-static void prefix_short_magic(struct strbuf *sb, int prefixlen,
-			       unsigned short_magic)
+static void prefix_magic(struct strbuf *sb, int prefixlen, unsigned magic)
 {
 	int i;
 	strbuf_addstr(sb, ":(");
 	for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++)
-		if (short_magic & pathspec_magic[i].bit) {
+		if (magic & pathspec_magic[i].bit) {
 			if (sb->buf[sb->len - 1] != '(')
 				strbuf_addch(sb, ',');
 			strbuf_addstr(sb, pathspec_magic[i].name);
@@ -109,8 +108,8 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 	static int glob_global = -1;
 	static int noglob_global = -1;
 	static int icase_global = -1;
-	unsigned magic = 0, short_magic = 0, global_magic = 0;
-	const char *copyfrom = elt, *long_magic_end = NULL;
+	unsigned magic = 0, element_magic = 0, global_magic = 0;
+	const char *copyfrom = elt;
 	char *match;
 	int i, pathspec_prefix = -1;
 
@@ -164,7 +163,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 			for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
 				if (strlen(pathspec_magic[i].name) == len &&
 				    !strncmp(pathspec_magic[i].name, copyfrom, len)) {
-					magic |= pathspec_magic[i].bit;
+					element_magic |= pathspec_magic[i].bit;
 					break;
 				}
 				if (starts_with(copyfrom, "prefix:")) {
@@ -183,7 +182,6 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 		}
 		if (*copyfrom != ')')
 			die(_("Missing ')' at the end of pathspec magic in '%s'"), elt);
-		long_magic_end = copyfrom;
 		copyfrom++;
 	} else {
 		/* shorthand */
@@ -196,7 +194,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 				break;
 			for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++)
 				if (pathspec_magic[i].mnemonic == ch) {
-					short_magic |= pathspec_magic[i].bit;
+					element_magic |= pathspec_magic[i].bit;
 					break;
 				}
 			if (ARRAY_SIZE(pathspec_magic) <= i)
@@ -207,7 +205,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 			copyfrom++;
 	}
 
-	magic |= short_magic;
+	magic |= element_magic;
 
 	/* --noglob-pathspec adds :(literal) _unless_ :(glob) is specified */
 	if (noglob_global && !(magic & PATHSPEC_GLOB))
@@ -242,18 +240,13 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 	 * Prefix the pathspec (keep all magic) and assign to
 	 * original. Useful for passing to another command.
 	 */
-	if (flags & PATHSPEC_PREFIX_ORIGIN) {
+	if ((flags & PATHSPEC_PREFIX_ORIGIN) &&
+	    prefixlen && !literal_global) {
 		struct strbuf sb = STRBUF_INIT;
-		if (prefixlen && !literal_global) {
-			/* Preserve the actual prefix length of each pattern */
-			if (short_magic)
-				prefix_short_magic(&sb, prefixlen, short_magic);
-			else if (long_magic_end) {
-				strbuf_add(&sb, elt, long_magic_end - elt);
-				strbuf_addf(&sb, ",prefix:%d)", prefixlen);
-			} else
-				strbuf_addf(&sb, ":(prefix:%d)", prefixlen);
-		}
+
+		/* Preserve the actual prefix length of each pattern */
+		prefix_magic(&sb, prefixlen, element_magic);
+
 		strbuf_addstr(&sb, match);
 		item->original = strbuf_detach(&sb, NULL);
 	} else {
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* [PATCH v5 16/16] pathspec: rename prefix_pathspec to init_pathspec_item
From: Brandon Williams @ 2017-01-04 18:04 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170104180411.150000-1-bmwill@google.com>

Give a more relevant name to the prefix_pathspec function as it does
more than just prefix a pathspec element.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 pathspec.c | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index e53530e7a..ff2509ddd 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -297,21 +297,11 @@ static void strip_submodule_slash_expensive(struct pathspec_item *item)
 }
 
 /*
- * Take an element of a pathspec and check for magic signatures.
- * Append the result to the prefix. Return the magic bitmap.
- *
- * For now, we only parse the syntax and throw out anything other than
- * "top" magic.
- *
- * NEEDSWORK: This needs to be rewritten when we start migrating
- * get_pathspec() users to use the "struct pathspec" interface.  For
- * example, a pathspec element may be marked as case-insensitive, but
- * the prefix part must always match literally, and a single stupid
- * string cannot express such a case.
+ * Perform the initialization of a pathspec_item based on a pathspec element.
  */
-static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
-				const char *prefix, int prefixlen,
-				const char *elt)
+static void init_pathspec_item(struct pathspec_item *item, unsigned flags,
+			       const char *prefix, int prefixlen,
+			       const char *elt)
 {
 	unsigned magic = 0, element_magic = 0;
 	const char *copyfrom = elt;
@@ -329,6 +319,8 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 		magic |= get_global_magic(element_magic);
 	}
 
+	item->magic = magic;
+
 	if (pathspec_prefix >= 0 &&
 	    (prefixlen || (prefix && *prefix)))
 		die("BUG: 'prefix' magic is supposed to be used at worktree's root");
@@ -401,7 +393,6 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 	/* sanity checks, pathspec matchers assume these are sane */
 	assert(item->nowildcard_len <= item->len &&
 	       item->prefix         <= item->len);
-	return magic;
 }
 
 static int pathspec_item_cmp(const void *a_, const void *b_)
@@ -501,8 +492,7 @@ void parse_pathspec(struct pathspec *pathspec,
 	for (i = 0; i < n; i++) {
 		entry = argv[i];
 
-		item[i].magic = prefix_pathspec(item + i, flags,
-						prefix, prefixlen, entry);
+		init_pathspec_item(item + i, flags, prefix, prefixlen, entry);
 
 		if (item[i].magic & PATHSPEC_EXCLUDE)
 			nr_exclude++;
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* [PATCH v5 14/16] pathspec: create strip submodule slash helpers
From: Brandon Williams @ 2017-01-04 18:04 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170104180411.150000-1-bmwill@google.com>

Factor out the logic responsible for stripping the trailing slash on
pathspecs referencing submodules into its own function.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 pathspec.c | 68 ++++++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 26 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index 00fcae4e1..f3a7a1d33 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -258,6 +258,44 @@ static const char *parse_element_magic(unsigned *magic, int *prefix_len,
 		return parse_short_magic(magic, elem);
 }
 
+static void strip_submodule_slash_cheap(struct pathspec_item *item)
+{
+	if (item->len >= 1 && item->match[item->len - 1] == '/') {
+		int i = cache_name_pos(item->match, item->len - 1);
+
+		if (i >= 0 && S_ISGITLINK(active_cache[i]->ce_mode)) {
+			item->len--;
+			item->match[item->len] = '\0';
+		}
+	}
+}
+
+static void strip_submodule_slash_expensive(struct pathspec_item *item)
+{
+	int i;
+
+	for (i = 0; i < active_nr; i++) {
+		struct cache_entry *ce = active_cache[i];
+		int ce_len = ce_namelen(ce);
+
+		if (!S_ISGITLINK(ce->ce_mode))
+			continue;
+
+		if (item->len <= ce_len || item->match[ce_len] != '/' ||
+		    memcmp(ce->name, item->match, ce_len))
+			continue;
+
+		if (item->len == ce_len + 1) {
+			/* strip trailing slash */
+			item->len--;
+			item->match[item->len] = '\0';
+		} else {
+			die(_("Pathspec '%s' is in submodule '%.*s'"),
+			    item->original, ce_len, ce->name);
+		}
+	}
+}
+
 /*
  * Take an element of a pathspec and check for magic signatures.
  * Append the result to the prefix. Return the magic bitmap.
@@ -278,7 +316,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 	unsigned magic = 0, element_magic = 0;
 	const char *copyfrom = elt;
 	char *match;
-	int i, pathspec_prefix = -1;
+	int pathspec_prefix = -1;
 
 	/* PATHSPEC_LITERAL_PATH ignores magic */
 	if (flags & PATHSPEC_LITERAL_PATH) {
@@ -329,33 +367,11 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 	item->len = strlen(item->match);
 	item->prefix = prefixlen;
 
-	if ((flags & PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP) &&
-	    (item->len >= 1 && item->match[item->len - 1] == '/') &&
-	    (i = cache_name_pos(item->match, item->len - 1)) >= 0 &&
-	    S_ISGITLINK(active_cache[i]->ce_mode)) {
-		item->len--;
-		match[item->len] = '\0';
-	}
+	if (flags & PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP)
+		strip_submodule_slash_cheap(item);
 
 	if (flags & PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE)
-		for (i = 0; i < active_nr; i++) {
-			struct cache_entry *ce = active_cache[i];
-			int ce_len = ce_namelen(ce);
-
-			if (!S_ISGITLINK(ce->ce_mode))
-				continue;
-
-			if (item->len <= ce_len || match[ce_len] != '/' ||
-			    memcmp(ce->name, match, ce_len))
-				continue;
-			if (item->len == ce_len + 1) {
-				/* strip trailing slash */
-				item->len--;
-				match[item->len] = '\0';
-			} else
-				die (_("Pathspec '%s' is in submodule '%.*s'"),
-				     elt, ce_len, ce->name);
-		}
+		strip_submodule_slash_expensive(item);
 
 	if (magic & PATHSPEC_LITERAL)
 		item->nowildcard_len = item->len;
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* [PATCH v5 07/16] pathspec: remove unused variable from unsupported_magic
From: Brandon Williams @ 2017-01-04 18:04 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170104180411.150000-1-bmwill@google.com>

Removed unused variable 'n' from the 'unsupported_magic()' function.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 pathspec.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index b8faa8f46..b9a3819d6 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -333,8 +333,8 @@ static void NORETURN unsupported_magic(const char *pattern,
 				       unsigned short_magic)
 {
 	struct strbuf sb = STRBUF_INIT;
-	int i, n;
-	for (n = i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
+	int i;
+	for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
 		const struct pathspec_magic *m = pathspec_magic + i;
 		if (!(magic & m->bit))
 			continue;
@@ -344,7 +344,6 @@ static void NORETURN unsupported_magic(const char *pattern,
 			strbuf_addf(&sb, "'%c'", m->mnemonic);
 		else
 			strbuf_addf(&sb, "'%s'", m->name);
-		n++;
 	}
 	/*
 	 * We may want to substitute "this command" with a command
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* [PATCH v5 11/16] pathspec: create parse_short_magic function
From: Brandon Williams @ 2017-01-04 18:04 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170104180411.150000-1-bmwill@google.com>

Factor out the logic responsible for parsing short magic into its own
function.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 pathspec.c | 54 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 36 insertions(+), 18 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index 77df55da6..1b0901848 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -157,6 +157,41 @@ static int get_global_magic(int element_magic)
 }
 
 /*
+ * Parse the pathspec element looking for short magic
+ *
+ * saves all magic in 'magic'
+ * returns the position in 'elem' after all magic has been parsed
+ */
+static const char *parse_short_magic(unsigned *magic, const char *elem)
+{
+	const char *pos;
+
+	for (pos = elem + 1; *pos && *pos != ':'; pos++) {
+		char ch = *pos;
+		int i;
+
+		if (!is_pathspec_magic(ch))
+			break;
+
+		for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
+			if (pathspec_magic[i].mnemonic == ch) {
+				*magic |= pathspec_magic[i].bit;
+				break;
+			}
+		}
+
+		if (ARRAY_SIZE(pathspec_magic) <= i)
+			die(_("Unimplemented pathspec magic '%c' in '%s'"),
+			    ch, elem);
+	}
+
+	if (*pos == ':')
+		pos++;
+
+	return pos;
+}
+
+/*
  * Take an element of a pathspec and check for magic signatures.
  * Append the result to the prefix. Return the magic bitmap.
  *
@@ -220,24 +255,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, unsigned flags,
 		copyfrom++;
 	} else {
 		/* shorthand */
-		for (copyfrom = elt + 1;
-		     *copyfrom && *copyfrom != ':';
-		     copyfrom++) {
-			char ch = *copyfrom;
-
-			if (!is_pathspec_magic(ch))
-				break;
-			for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++)
-				if (pathspec_magic[i].mnemonic == ch) {
-					element_magic |= pathspec_magic[i].bit;
-					break;
-				}
-			if (ARRAY_SIZE(pathspec_magic) <= i)
-				die(_("Unimplemented pathspec magic '%c' in '%s'"),
-				    ch, elt);
-		}
-		if (*copyfrom == ':')
-			copyfrom++;
+		copyfrom = parse_short_magic(&element_magic, elt);
 	}
 
 	magic |= element_magic;
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* [PATCH v5 04/16] ls-tree: convert show_recursive to use the pathspec struct interface
From: Brandon Williams @ 2017-01-04 18:03 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170104180411.150000-1-bmwill@google.com>

Convert 'show_recursive()' to use the pathspec struct interface from
using the '_raw' entry in the pathspec struct.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 builtin/ls-tree.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
index 0e30d8623..d7ebeb4ce 100644
--- a/builtin/ls-tree.c
+++ b/builtin/ls-tree.c
@@ -31,21 +31,18 @@ static const  char * const ls_tree_usage[] = {
 
 static int show_recursive(const char *base, int baselen, const char *pathname)
 {
-	const char **s;
+	int i;
 
 	if (ls_options & LS_RECURSIVE)
 		return 1;
 
-	s = pathspec._raw;
-	if (!s)
+	if (!pathspec.nr)
 		return 0;
 
-	for (;;) {
-		const char *spec = *s++;
+	for (i = 0; i < pathspec.nr; i++) {
+		const char *spec = pathspec.items[i].match;
 		int len, speclen;
 
-		if (!spec)
-			return 0;
 		if (strncmp(base, spec, baselen))
 			continue;
 		len = strlen(pathname);
@@ -59,6 +56,7 @@ static int show_recursive(const char *base, int baselen, const char *pathname)
 			continue;
 		return 1;
 	}
+	return 0;
 }
 
 static int show_tree(const unsigned char *sha1, struct strbuf *base,
@@ -175,8 +173,8 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix)
 	 * cannot be lifted until it is converted to use
 	 * match_pathspec() or tree_entry_interesting()
 	 */
-	parse_pathspec(&pathspec, PATHSPEC_GLOB | PATHSPEC_ICASE |
-				  PATHSPEC_EXCLUDE,
+	parse_pathspec(&pathspec, PATHSPEC_ALL_MAGIC &
+				  ~(PATHSPEC_FROMTOP | PATHSPEC_LITERAL),
 		       PATHSPEC_PREFER_CWD,
 		       prefix, argv + 1);
 	for (i = 0; i < pathspec.nr; i++)
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* [PATCH v5 03/16] dir: convert fill_directory to use the pathspec struct interface
From: Brandon Williams @ 2017-01-04 18:03 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170104180411.150000-1-bmwill@google.com>

Convert 'fill_directory()' to use the pathspec struct interface from
using the '_raw' entry in the pathspec struct.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 dir.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/dir.c b/dir.c
index 9ae454dde..bc5ff7216 100644
--- a/dir.c
+++ b/dir.c
@@ -174,17 +174,21 @@ char *common_prefix(const struct pathspec *pathspec)
 
 int fill_directory(struct dir_struct *dir, const struct pathspec *pathspec)
 {
-	size_t len;
+	char *prefix;
+	size_t prefix_len;
 
 	/*
 	 * Calculate common prefix for the pathspec, and
 	 * use that to optimize the directory walk
 	 */
-	len = common_prefix_len(pathspec);
+	prefix = common_prefix(pathspec);
+	prefix_len = prefix ? strlen(prefix) : 0;
 
 	/* Read the directory and prune it */
-	read_directory(dir, pathspec->nr ? pathspec->_raw[0] : "", len, pathspec);
-	return len;
+	read_directory(dir, prefix, prefix_len, pathspec);
+
+	free(prefix);
+	return prefix_len;
 }
 
 int within_depth(const char *name, int namelen,
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* [PATCH v5 02/16] dir: remove struct path_simplify
From: Brandon Williams @ 2017-01-04 18:03 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170104180411.150000-1-bmwill@google.com>

Teach simplify_away() and exclude_matches_pathspec() to handle struct
pathspec directly, eliminating the need for the struct path_simplify.

Also renamed the len parameter to pathlen in exclude_matches_pathspec()
to match the parameter names used in simplify_away().

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 dir.c | 179 ++++++++++++++++++++++++++++--------------------------------------
 1 file changed, 76 insertions(+), 103 deletions(-)

diff --git a/dir.c b/dir.c
index bfa8c8a9a..9ae454dde 100644
--- a/dir.c
+++ b/dir.c
@@ -16,11 +16,6 @@
 #include "varint.h"
 #include "ewah/ewok.h"
 
-struct path_simplify {
-	int len;
-	const char *path;
-};
-
 /*
  * Tells read_directory_recursive how a file or directory should be treated.
  * Values are ordered by significance, e.g. if a directory contains both
@@ -50,7 +45,7 @@ struct cached_dir {
 
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 	const char *path, int len, struct untracked_cache_dir *untracked,
-	int check_only, const struct path_simplify *simplify);
+	int check_only, const struct pathspec *pathspec);
 static int get_dtype(struct dirent *de, const char *path, int len);
 
 int fspathcmp(const char *a, const char *b)
@@ -1312,7 +1307,7 @@ static enum exist_status directory_exists_in_index(const char *dirname, int len)
 static enum path_treatment treat_directory(struct dir_struct *dir,
 	struct untracked_cache_dir *untracked,
 	const char *dirname, int len, int baselen, int exclude,
-	const struct path_simplify *simplify)
+	const struct pathspec *pathspec)
 {
 	/* The "len-1" is to strip the final '/' */
 	switch (directory_exists_in_index(dirname, len-1)) {
@@ -1341,7 +1336,7 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
 	untracked = lookup_untracked(dir->untracked, untracked,
 				     dirname + baselen, len - baselen);
 	return read_directory_recursive(dir, dirname, len,
-					untracked, 1, simplify);
+					untracked, 1, pathspec);
 }
 
 /*
@@ -1349,24 +1344,33 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
  * reading - if the path cannot possibly be in the pathspec,
  * return true, and we'll skip it early.
  */
-static int simplify_away(const char *path, int pathlen, const struct path_simplify *simplify)
+static int simplify_away(const char *path, int pathlen,
+			 const struct pathspec *pathspec)
 {
-	if (simplify) {
-		for (;;) {
-			const char *match = simplify->path;
-			int len = simplify->len;
+	int i;
 
-			if (!match)
-				break;
-			if (len > pathlen)
-				len = pathlen;
-			if (!memcmp(path, match, len))
-				return 0;
-			simplify++;
-		}
-		return 1;
+	if (!pathspec || !pathspec->nr)
+		return 0;
+
+	GUARD_PATHSPEC(pathspec,
+		       PATHSPEC_FROMTOP |
+		       PATHSPEC_MAXDEPTH |
+		       PATHSPEC_LITERAL |
+		       PATHSPEC_GLOB |
+		       PATHSPEC_ICASE |
+		       PATHSPEC_EXCLUDE);
+
+	for (i = 0; i < pathspec->nr; i++) {
+		const struct pathspec_item *item = &pathspec->items[i];
+		int len = item->nowildcard_len;
+
+		if (len > pathlen)
+			len = pathlen;
+		if (!ps_strncmp(item, item->match, path, len))
+			return 0;
 	}
-	return 0;
+
+	return 1;
 }
 
 /*
@@ -1380,19 +1384,33 @@ static int simplify_away(const char *path, int pathlen, const struct path_simpli
  *   2. the path is a directory prefix of some element in the
  *      pathspec
  */
-static int exclude_matches_pathspec(const char *path, int len,
-		const struct path_simplify *simplify)
-{
-	if (simplify) {
-		for (; simplify->path; simplify++) {
-			if (len == simplify->len
-			    && !memcmp(path, simplify->path, len))
-				return 1;
-			if (len < simplify->len
-			    && simplify->path[len] == '/'
-			    && !memcmp(path, simplify->path, len))
-				return 1;
-		}
+static int exclude_matches_pathspec(const char *path, int pathlen,
+				    const struct pathspec *pathspec)
+{
+	int i;
+
+	if (!pathspec || !pathspec->nr)
+		return 0;
+
+	GUARD_PATHSPEC(pathspec,
+		       PATHSPEC_FROMTOP |
+		       PATHSPEC_MAXDEPTH |
+		       PATHSPEC_LITERAL |
+		       PATHSPEC_GLOB |
+		       PATHSPEC_ICASE |
+		       PATHSPEC_EXCLUDE);
+
+	for (i = 0; i < pathspec->nr; i++) {
+		const struct pathspec_item *item = &pathspec->items[i];
+		int len = item->nowildcard_len;
+
+		if (len == pathlen &&
+		    !ps_strncmp(item, item->match, path, pathlen))
+			return 1;
+		if (len > pathlen &&
+		    item->match[pathlen] == '/' &&
+		    !ps_strncmp(item, item->match, path, pathlen))
+			return 1;
 	}
 	return 0;
 }
@@ -1460,7 +1478,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 					  struct untracked_cache_dir *untracked,
 					  struct strbuf *path,
 					  int baselen,
-					  const struct path_simplify *simplify,
+					  const struct pathspec *pathspec,
 					  int dtype, struct dirent *de)
 {
 	int exclude;
@@ -1512,7 +1530,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 	case DT_DIR:
 		strbuf_addch(path, '/');
 		return treat_directory(dir, untracked, path->buf, path->len,
-				       baselen, exclude, simplify);
+				       baselen, exclude, pathspec);
 	case DT_REG:
 	case DT_LNK:
 		return exclude ? path_excluded : path_untracked;
@@ -1524,7 +1542,7 @@ static enum path_treatment treat_path_fast(struct dir_struct *dir,
 					   struct cached_dir *cdir,
 					   struct strbuf *path,
 					   int baselen,
-					   const struct path_simplify *simplify)
+					   const struct pathspec *pathspec)
 {
 	strbuf_setlen(path, baselen);
 	if (!cdir->ucd) {
@@ -1541,7 +1559,7 @@ static enum path_treatment treat_path_fast(struct dir_struct *dir,
 		 * with check_only set.
 		 */
 		return read_directory_recursive(dir, path->buf, path->len,
-						cdir->ucd, 1, simplify);
+						cdir->ucd, 1, pathspec);
 	/*
 	 * We get path_recurse in the first run when
 	 * directory_exists_in_index() returns index_nonexistent. We
@@ -1556,23 +1574,23 @@ static enum path_treatment treat_path(struct dir_struct *dir,
 				      struct cached_dir *cdir,
 				      struct strbuf *path,
 				      int baselen,
-				      const struct path_simplify *simplify)
+				      const struct pathspec *pathspec)
 {
 	int dtype;
 	struct dirent *de = cdir->de;
 
 	if (!de)
 		return treat_path_fast(dir, untracked, cdir, path,
-				       baselen, simplify);
+				       baselen, pathspec);
 	if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
 		return path_none;
 	strbuf_setlen(path, baselen);
 	strbuf_addstr(path, de->d_name);
-	if (simplify_away(path->buf, path->len, simplify))
+	if (simplify_away(path->buf, path->len, pathspec))
 		return path_none;
 
 	dtype = DTYPE(de);
-	return treat_one_path(dir, untracked, path, baselen, simplify, dtype, de);
+	return treat_one_path(dir, untracked, path, baselen, pathspec, dtype, de);
 }
 
 static void add_untracked(struct untracked_cache_dir *dir, const char *name)
@@ -1703,7 +1721,7 @@ static void close_cached_dir(struct cached_dir *cdir)
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 				    const char *base, int baselen,
 				    struct untracked_cache_dir *untracked, int check_only,
-				    const struct path_simplify *simplify)
+				    const struct pathspec *pathspec)
 {
 	struct cached_dir cdir;
 	enum path_treatment state, subdir_state, dir_state = path_none;
@@ -1719,7 +1737,8 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 
 	while (!read_cached_dir(&cdir)) {
 		/* check how the file or directory should be treated */
-		state = treat_path(dir, untracked, &cdir, &path, baselen, simplify);
+		state = treat_path(dir, untracked, &cdir, &path,
+				   baselen, pathspec);
 
 		if (state > dir_state)
 			dir_state = state;
@@ -1731,8 +1750,9 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 					      path.buf + baselen,
 					      path.len - baselen);
 			subdir_state =
-				read_directory_recursive(dir, path.buf, path.len,
-							 ud, check_only, simplify);
+				read_directory_recursive(dir, path.buf,
+							 path.len, ud,
+							 check_only, pathspec);
 			if (subdir_state > dir_state)
 				dir_state = subdir_state;
 		}
@@ -1756,7 +1776,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 			else if ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
 				((dir->flags & DIR_COLLECT_IGNORED) &&
 				exclude_matches_pathspec(path.buf, path.len,
-					simplify)))
+							 pathspec)))
 				dir_add_ignored(dir, path.buf, path.len);
 			break;
 
@@ -1787,36 +1807,9 @@ static int cmp_name(const void *p1, const void *p2)
 	return name_compare(e1->name, e1->len, e2->name, e2->len);
 }
 
-static struct path_simplify *create_simplify(const char **pathspec)
-{
-	int nr, alloc = 0;
-	struct path_simplify *simplify = NULL;
-
-	if (!pathspec)
-		return NULL;
-
-	for (nr = 0 ; ; nr++) {
-		const char *match;
-		ALLOC_GROW(simplify, nr + 1, alloc);
-		match = *pathspec++;
-		if (!match)
-			break;
-		simplify[nr].path = match;
-		simplify[nr].len = simple_length(match);
-	}
-	simplify[nr].path = NULL;
-	simplify[nr].len = 0;
-	return simplify;
-}
-
-static void free_simplify(struct path_simplify *simplify)
-{
-	free(simplify);
-}
-
 static int treat_leading_path(struct dir_struct *dir,
 			      const char *path, int len,
-			      const struct path_simplify *simplify)
+			      const struct pathspec *pathspec)
 {
 	struct strbuf sb = STRBUF_INIT;
 	int baselen, rc = 0;
@@ -1840,9 +1833,9 @@ static int treat_leading_path(struct dir_struct *dir,
 		strbuf_add(&sb, path, baselen);
 		if (!is_directory(sb.buf))
 			break;
-		if (simplify_away(sb.buf, sb.len, simplify))
+		if (simplify_away(sb.buf, sb.len, pathspec))
 			break;
-		if (treat_one_path(dir, NULL, &sb, baselen, simplify,
+		if (treat_one_path(dir, NULL, &sb, baselen, pathspec,
 				   DT_DIR, NULL) == path_none)
 			break; /* do not recurse into it */
 		if (len <= baselen) {
@@ -2010,33 +2003,14 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d
 	return root;
 }
 
-int read_directory(struct dir_struct *dir, const char *path, int len, const struct pathspec *pathspec)
+int read_directory(struct dir_struct *dir, const char *path,
+		   int len, const struct pathspec *pathspec)
 {
-	struct path_simplify *simplify;
 	struct untracked_cache_dir *untracked;
 
-	/*
-	 * Check out create_simplify()
-	 */
-	if (pathspec)
-		GUARD_PATHSPEC(pathspec,
-			       PATHSPEC_FROMTOP |
-			       PATHSPEC_MAXDEPTH |
-			       PATHSPEC_LITERAL |
-			       PATHSPEC_GLOB |
-			       PATHSPEC_ICASE |
-			       PATHSPEC_EXCLUDE);
-
 	if (has_symlink_leading_path(path, len))
 		return dir->nr;
 
-	/*
-	 * exclude patterns are treated like positive ones in
-	 * create_simplify. Usually exclude patterns should be a
-	 * subset of positive ones, which has no impacts on
-	 * create_simplify().
-	 */
-	simplify = create_simplify(pathspec ? pathspec->_raw : NULL);
 	untracked = validate_untracked_cache(dir, len, pathspec);
 	if (!untracked)
 		/*
@@ -2044,9 +2018,8 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
 		 * e.g. prep_exclude()
 		 */
 		dir->untracked = NULL;
-	if (!len || treat_leading_path(dir, path, len, simplify))
-		read_directory_recursive(dir, path, len, untracked, 0, simplify);
-	free_simplify(simplify);
+	if (!len || treat_leading_path(dir, path, len, pathspec))
+		read_directory_recursive(dir, path, len, untracked, 0, pathspec);
 	QSORT(dir->entries, dir->nr, cmp_name);
 	QSORT(dir->ignored, dir->ignored_nr, cmp_name);
 	if (dir->untracked) {
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* [PATCH v5 00/16] pathspec cleanup
From: Brandon Williams @ 2017-01-04 18:03 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170103184241.128409-1-bmwill@google.com>

Changes in v5:
* Move GUARD_PATHSPEC to prevent checking if pathspec is null twice.
* Mark a string containing 'mnemonic' for translation.

Brandon Williams (16):
  mv: remove use of deprecated 'get_pathspec()'
  dir: remove struct path_simplify
  dir: convert fill_directory to use the pathspec struct interface
  ls-tree: convert show_recursive to use the pathspec struct interface
  pathspec: remove the deprecated get_pathspec function
  pathspec: copy and free owned memory
  pathspec: remove unused variable from unsupported_magic
  pathspec: always show mnemonic and name in unsupported_magic
  pathspec: simpler logic to prefix original pathspec elements
  pathspec: factor global magic into its own function
  pathspec: create parse_short_magic function
  pathspec: create parse_long_magic function
  pathspec: create parse_element_magic helper
  pathspec: create strip submodule slash helpers
  pathspec: small readability changes
  pathspec: rename prefix_pathspec to init_pathspec_item

 Documentation/technical/api-setup.txt |   2 -
 builtin/ls-tree.c                     |  16 +-
 builtin/mv.c                          |  50 ++--
 cache.h                               |   1 -
 dir.c                                 | 191 ++++++--------
 pathspec.c                            | 480 +++++++++++++++++++---------------
 pathspec.h                            |   5 +-
 7 files changed, 388 insertions(+), 357 deletions(-)

--- interdiff between v4 and v5

diff --git a/dir.c b/dir.c
index e8ddd7f8a..bc5ff7216 100644
--- a/dir.c
+++ b/dir.c
@@ -1353,18 +1353,17 @@ static int simplify_away(const char *path, int pathlen,
 {
 	int i;
 
-	if (pathspec)
-		GUARD_PATHSPEC(pathspec,
-			       PATHSPEC_FROMTOP |
-			       PATHSPEC_MAXDEPTH |
-			       PATHSPEC_LITERAL |
-			       PATHSPEC_GLOB |
-			       PATHSPEC_ICASE |
-			       PATHSPEC_EXCLUDE);
-
 	if (!pathspec || !pathspec->nr)
 		return 0;
 
+	GUARD_PATHSPEC(pathspec,
+		       PATHSPEC_FROMTOP |
+		       PATHSPEC_MAXDEPTH |
+		       PATHSPEC_LITERAL |
+		       PATHSPEC_GLOB |
+		       PATHSPEC_ICASE |
+		       PATHSPEC_EXCLUDE);
+
 	for (i = 0; i < pathspec->nr; i++) {
 		const struct pathspec_item *item = &pathspec->items[i];
 		int len = item->nowildcard_len;
@@ -1394,18 +1393,17 @@ static int exclude_matches_pathspec(const char *path, int pathlen,
 {
 	int i;
 
-	if (pathspec)
-		GUARD_PATHSPEC(pathspec,
-			       PATHSPEC_FROMTOP |
-			       PATHSPEC_MAXDEPTH |
-			       PATHSPEC_LITERAL |
-			       PATHSPEC_GLOB |
-			       PATHSPEC_ICASE |
-			       PATHSPEC_EXCLUDE);
-
 	if (!pathspec || !pathspec->nr)
 		return 0;
 
+	GUARD_PATHSPEC(pathspec,
+		       PATHSPEC_FROMTOP |
+		       PATHSPEC_MAXDEPTH |
+		       PATHSPEC_LITERAL |
+		       PATHSPEC_GLOB |
+		       PATHSPEC_ICASE |
+		       PATHSPEC_EXCLUDE);
+
 	for (i = 0; i < pathspec->nr; i++) {
 		const struct pathspec_item *item = &pathspec->items[i];
 		int len = item->nowildcard_len;
diff --git a/pathspec.c b/pathspec.c
index bcf3ba039..ff2509ddd 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -417,7 +417,7 @@ static void NORETURN unsupported_magic(const char *pattern,
 			strbuf_addstr(&sb, ", ");
 
 		if (m->mnemonic)
-			strbuf_addf(&sb, "'%s' (mnemonic: '%c')",
+			strbuf_addf(&sb, _("'%s' (mnemonic: '%c')"),
 				    m->name, m->mnemonic);
 		else
 			strbuf_addf(&sb, "'%s'", m->name);

-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* [PATCH v5 01/16] mv: remove use of deprecated 'get_pathspec()'
From: Brandon Williams @ 2017-01-04 18:03 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170104180411.150000-1-bmwill@google.com>

Convert the 'internal_copy_pathspec()' function to 'prefix_path()'
instead of using the deprecated 'get_pathspec()' interface.  Also,
rename 'internal_copy_pathspec()' to 'internal_prefix_pathspec()' to be
more descriptive of what the funciton is actually doing.

In addition to this, fix a memory leak caused by only duplicating some
of the pathspec elements.  Instead always duplicate all of the the
pathspec elements as an intermediate step (with modificationed based on
the passed in flags).  This way the intermediate strings can then be
freed after getting the result from 'prefix_path()'.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 builtin/mv.c | 50 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/builtin/mv.c b/builtin/mv.c
index 2f43877bc..4e86dc523 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2006 Johannes Schindelin
  */
 #include "builtin.h"
+#include "pathspec.h"
 #include "lockfile.h"
 #include "dir.h"
 #include "cache-tree.h"
@@ -19,31 +20,42 @@ static const char * const builtin_mv_usage[] = {
 #define DUP_BASENAME 1
 #define KEEP_TRAILING_SLASH 2
 
-static const char **internal_copy_pathspec(const char *prefix,
-					   const char **pathspec,
-					   int count, unsigned flags)
+static const char **internal_prefix_pathspec(const char *prefix,
+					     const char **pathspec,
+					     int count, unsigned flags)
 {
 	int i;
 	const char **result;
+	int prefixlen = prefix ? strlen(prefix) : 0;
 	ALLOC_ARRAY(result, count + 1);
-	COPY_ARRAY(result, pathspec, count);
-	result[count] = NULL;
+
+	/* Create an intermediate copy of the pathspec based on the flags */
 	for (i = 0; i < count; i++) {
-		int length = strlen(result[i]);
+		int length = strlen(pathspec[i]);
 		int to_copy = length;
+		char *it;
 		while (!(flags & KEEP_TRAILING_SLASH) &&
-		       to_copy > 0 && is_dir_sep(result[i][to_copy - 1]))
+		       to_copy > 0 && is_dir_sep(pathspec[i][to_copy - 1]))
 			to_copy--;
-		if (to_copy != length || flags & DUP_BASENAME) {
-			char *it = xmemdupz(result[i], to_copy);
-			if (flags & DUP_BASENAME) {
-				result[i] = xstrdup(basename(it));
-				free(it);
-			} else
-				result[i] = it;
+
+		it = xmemdupz(pathspec[i], to_copy);
+		if (flags & DUP_BASENAME) {
+			result[i] = xstrdup(basename(it));
+			free(it);
+		} else {
+			result[i] = it;
 		}
 	}
-	return get_pathspec(prefix, result);
+	result[count] = NULL;
+
+	/* Prefix the pathspec and free the old intermediate strings */
+	for (i = 0; i < count; i++) {
+		const char *match = prefix_path(prefix, prefixlen, result[i]);
+		free((char *) result[i]);
+		result[i] = match;
+	}
+
+	return result;
 }
 
 static const char *add_slash(const char *path)
@@ -130,7 +142,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
 	if (read_cache() < 0)
 		die(_("index file corrupt"));
 
-	source = internal_copy_pathspec(prefix, argv, argc, 0);
+	source = internal_prefix_pathspec(prefix, argv, argc, 0);
 	modes = xcalloc(argc, sizeof(enum update_mode));
 	/*
 	 * Keep trailing slash, needed to let
@@ -140,16 +152,16 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
 	flags = KEEP_TRAILING_SLASH;
 	if (argc == 1 && is_directory(argv[0]) && !is_directory(argv[1]))
 		flags = 0;
-	dest_path = internal_copy_pathspec(prefix, argv + argc, 1, flags);
+	dest_path = internal_prefix_pathspec(prefix, argv + argc, 1, flags);
 	submodule_gitfile = xcalloc(argc, sizeof(char *));
 
 	if (dest_path[0][0] == '\0')
 		/* special case: "." was normalized to "" */
-		destination = internal_copy_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
+		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
 	else if (!lstat(dest_path[0], &st) &&
 			S_ISDIR(st.st_mode)) {
 		dest_path[0] = add_slash(dest_path[0]);
-		destination = internal_copy_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
+		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
 	} else {
 		if (argc != 1)
 			die(_("destination '%s' is not a directory"), dest_path[0]);
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* [PATCH v5 05/16] pathspec: remove the deprecated get_pathspec function
From: Brandon Williams @ 2017-01-04 18:04 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170104180411.150000-1-bmwill@google.com>

Now that all callers of the old 'get_pathspec' interface have been
migrated to use the new pathspec struct interface it can be removed
from the codebase.

Since there are no more users of the '_raw' field in the pathspec struct
it can also be removed.  This patch also removes the old functionality
of modifying the const char **argv array that was passed into
parse_pathspec.  Instead the constructed 'match' string (which is a
pathspec element with the prefix prepended) is only stored in its
corresponding pathspec_item entry.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 Documentation/technical/api-setup.txt |  2 --
 cache.h                               |  1 -
 pathspec.c                            | 42 +++--------------------------------
 pathspec.h                            |  1 -
 4 files changed, 3 insertions(+), 43 deletions(-)

diff --git a/Documentation/technical/api-setup.txt b/Documentation/technical/api-setup.txt
index 540e45568..eb1fa9853 100644
--- a/Documentation/technical/api-setup.txt
+++ b/Documentation/technical/api-setup.txt
@@ -27,8 +27,6 @@ parse_pathspec(). This function takes several arguments:
 
 - prefix and args come from cmd_* functions
 
-get_pathspec() is obsolete and should never be used in new code.
-
 parse_pathspec() helps catch unsupported features and reject them
 politely. At a lower level, different pathspec-related functions may
 not support the same set of features. Such pathspec-sensitive
diff --git a/cache.h b/cache.h
index a50a61a19..0f80e01bd 100644
--- a/cache.h
+++ b/cache.h
@@ -514,7 +514,6 @@ extern void set_git_work_tree(const char *tree);
 
 #define ALTERNATE_DB_ENVIRONMENT "GIT_ALTERNATE_OBJECT_DIRECTORIES"
 
-extern const char **get_pathspec(const char *prefix, const char **pathspec);
 extern void setup_work_tree(void);
 extern const char *setup_git_directory_gently(int *);
 extern const char *setup_git_directory(void);
diff --git a/pathspec.c b/pathspec.c
index 22ca74a12..1f918cbae 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -103,7 +103,7 @@ static void prefix_short_magic(struct strbuf *sb, int prefixlen,
  */
 static unsigned prefix_pathspec(struct pathspec_item *item,
 				unsigned *p_short_magic,
-				const char **raw, unsigned flags,
+				unsigned flags,
 				const char *prefix, int prefixlen,
 				const char *elt)
 {
@@ -240,7 +240,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
 		if (!match)
 			die(_("%s: '%s' is outside repository"), elt, copyfrom);
 	}
-	*raw = item->match = match;
+	item->match = match;
 	/*
 	 * Prefix the pathspec (keep all magic) and assign to
 	 * original. Useful for passing to another command.
@@ -381,8 +381,6 @@ void parse_pathspec(struct pathspec *pathspec,
 
 	/* No arguments with prefix -> prefix pathspec */
 	if (!entry) {
-		static const char *raw[2];
-
 		if (flags & PATHSPEC_PREFER_FULL)
 			return;
 
@@ -394,10 +392,7 @@ void parse_pathspec(struct pathspec *pathspec,
 		item->original = prefix;
 		item->nowildcard_len = item->len = strlen(prefix);
 		item->prefix = item->len;
-		raw[0] = prefix;
-		raw[1] = NULL;
 		pathspec->nr = 1;
-		pathspec->_raw = raw;
 		return;
 	}
 
@@ -415,7 +410,6 @@ void parse_pathspec(struct pathspec *pathspec,
 	pathspec->nr = n;
 	ALLOC_ARRAY(pathspec->items, n);
 	item = pathspec->items;
-	pathspec->_raw = argv;
 	prefixlen = prefix ? strlen(prefix) : 0;
 
 	for (i = 0; i < n; i++) {
@@ -423,7 +417,7 @@ void parse_pathspec(struct pathspec *pathspec,
 		entry = argv[i];
 
 		item[i].magic = prefix_pathspec(item + i, &short_magic,
-						argv + i, flags,
+						flags,
 						prefix, prefixlen, entry);
 		if ((flags & PATHSPEC_LITERAL_PATH) &&
 		    !(magic_mask & PATHSPEC_LITERAL))
@@ -457,36 +451,6 @@ void parse_pathspec(struct pathspec *pathspec,
 	}
 }
 
-/*
- * N.B. get_pathspec() is deprecated in favor of the "struct pathspec"
- * based interface - see pathspec.c:parse_pathspec().
- *
- * Arguments:
- *  - prefix - a path relative to the root of the working tree
- *  - pathspec - a list of paths underneath the prefix path
- *
- * Iterates over pathspec, prepending each path with prefix,
- * and return the resulting list.
- *
- * If pathspec is empty, return a singleton list containing prefix.
- *
- * If pathspec and prefix are both empty, return an empty list.
- *
- * This is typically used by built-in commands such as add.c, in order
- * to normalize argv arguments provided to the built-in into a list of
- * paths to process, all relative to the root of the working tree.
- */
-const char **get_pathspec(const char *prefix, const char **pathspec)
-{
-	struct pathspec ps;
-	parse_pathspec(&ps,
-		       PATHSPEC_ALL_MAGIC &
-		       ~(PATHSPEC_FROMTOP | PATHSPEC_LITERAL),
-		       PATHSPEC_PREFER_CWD,
-		       prefix, pathspec);
-	return ps._raw;
-}
-
 void copy_pathspec(struct pathspec *dst, const struct pathspec *src)
 {
 	*dst = *src;
diff --git a/pathspec.h b/pathspec.h
index 59809e479..70a592e91 100644
--- a/pathspec.h
+++ b/pathspec.h
@@ -19,7 +19,6 @@
 #define PATHSPEC_ONESTAR 1	/* the pathspec pattern satisfies GFNM_ONESTAR */
 
 struct pathspec {
-	const char **_raw; /* get_pathspec() result, not freed by clear_pathspec() */
 	int nr;
 	unsigned int has_wildcard:1;
 	unsigned int recursive:1;
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* [PATCH v5 06/16] pathspec: copy and free owned memory
From: Brandon Williams @ 2017-01-04 18:04 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams, sbeller, pclouds, gitster
In-Reply-To: <20170104180411.150000-1-bmwill@google.com>

The 'original' string entry in a pathspec_item is only duplicated some
of the time, instead always make a copy of the original and take
ownership of the memory.

Since both 'match' and 'original' string entries in a pathspec_item are
owned by the pathspec struct, they need to be freed when clearing the
pathspec struct (in 'clear_pathspec()') and duplicated when copying the
pathspec struct (in 'copy_pathspec()').

Also change the type of 'match' and 'original' to 'char *' in order to
more explicitly show the ownership of the memory.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 pathspec.c | 23 +++++++++++++++++++----
 pathspec.h |  4 ++--
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index 1f918cbae..b8faa8f46 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -259,8 +259,9 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
 		}
 		strbuf_addstr(&sb, match);
 		item->original = strbuf_detach(&sb, NULL);
-	} else
-		item->original = elt;
+	} else {
+		item->original = xstrdup(elt);
+	}
 	item->len = strlen(item->match);
 	item->prefix = prefixlen;
 
@@ -388,8 +389,8 @@ void parse_pathspec(struct pathspec *pathspec,
 			die("BUG: PATHSPEC_PREFER_CWD requires arguments");
 
 		pathspec->items = item = xcalloc(1, sizeof(*item));
-		item->match = prefix;
-		item->original = prefix;
+		item->match = xstrdup(prefix);
+		item->original = xstrdup(prefix);
 		item->nowildcard_len = item->len = strlen(prefix);
 		item->prefix = item->len;
 		pathspec->nr = 1;
@@ -453,13 +454,27 @@ void parse_pathspec(struct pathspec *pathspec,
 
 void copy_pathspec(struct pathspec *dst, const struct pathspec *src)
 {
+	int i;
+
 	*dst = *src;
 	ALLOC_ARRAY(dst->items, dst->nr);
 	COPY_ARRAY(dst->items, src->items, dst->nr);
+
+	for (i = 0; i < dst->nr; i++) {
+		dst->items[i].match = xstrdup(src->items[i].match);
+		dst->items[i].original = xstrdup(src->items[i].original);
+	}
 }
 
 void clear_pathspec(struct pathspec *pathspec)
 {
+	int i;
+
+	for (i = 0; i < pathspec->nr; i++) {
+		free(pathspec->items[i].match);
+		free(pathspec->items[i].original);
+	}
 	free(pathspec->items);
 	pathspec->items = NULL;
+	pathspec->nr = 0;
 }
diff --git a/pathspec.h b/pathspec.h
index 70a592e91..49fd823dd 100644
--- a/pathspec.h
+++ b/pathspec.h
@@ -25,8 +25,8 @@ struct pathspec {
 	unsigned magic;
 	int max_depth;
 	struct pathspec_item {
-		const char *match;
-		const char *original;
+		char *match;
+		char *original;
 		unsigned magic;
 		int len, prefix;
 		int nowildcard_len;
-- 
2.11.0.390.gc69c2f50cf-goog


^ permalink raw reply related

* Re: [PATCH v4 0/5] road to reentrant real_path
From: Brandon Williams @ 2017-01-04 18:13 UTC (permalink / raw)
  To: Jeff King
  Cc: Torsten Bögershausen, git, sbeller, jacob.keller, gitster,
	ramsay, j6t, pclouds, larsxschneider
In-Reply-To: <20170104070107.huse2a6thz737epv@sigill.intra.peff.net>

On 01/04, Jeff King wrote:
> On Wed, Jan 04, 2017 at 07:56:02AM +0100, Torsten Bögershausen wrote:
> 
> > On 04.01.17 01:48, Jeff King wrote:
> > > On Tue, Jan 03, 2017 at 11:09:18AM -0800, Brandon Williams wrote:
> > > 
> > >> Only change with v4 is in [1/5] renaming the #define MAXSYMLINKS back to
> > >> MAXDEPTH due to a naming conflict brought up by Lars Schneider.
> > > 
> > > Hmm. Isn't MAXSYMLINKS basically what you want here, though? It what's
> > > what all other similar functions will be using.
> > > 
> > > The only problem was that we were redefining the macro. So maybe:
> > > 
> > >   #ifndef MAXSYMLINKS
> > >   #define MAXSYMLINKS 5
> > >   #endif
> > > 
> > > would be a good solution?
> > Why 5  ? (looking at the  20..30 below)
> > And why 5 on one system and e.g. on my Mac OS
> > #define MAXSYMLINKS     32  
> 
> I mentioned "5" because that is the current value of MAXDEPTH. I do
> think it would be reasonable to bump it to something higher.
> 
> > Would the same value value for all Git installations on all platforms make sense?
> > #define GITMAXSYMLINKS 20
> 
> I think it's probably more important to match the rest of the OS, so
> that open("foo") and realpath("foo") behave similarly on the same
> system. Though I think even that isn't always possible, as the limit is
> dynamic on some systems.
> 
> I think the idea of the 20-30 range is that it's small enough to catch
> an infinite loop quickly, and large enough that nobody will ever hit it
> in practice. :)

I agree that we should have similar guarantees as the OS provides,
especially if the OS already has MAXSYMLINKS defined.  What then, should
the fall back value be if the OS doesn't have this defined?  5 like we
have done historically, or something around the 20-30 range as Torsten
suggests?

-- 
Brandon Williams

^ permalink raw reply

* Re: [PATCH v4 1/4] Avoid Coverity warning about unfree()d git_exec_path()
From: Stefan Beller @ 2017-01-04 18:09 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: git@vger.kernel.org, Junio C Hamano, David Aguilar,
	Dennis Kaarsemaker, Paul Sbarra
In-Reply-To: <alpine.DEB.2.20.1701032231400.3469@virtualbox>

On Tue, Jan 3, 2017 at 1:33 PM, Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> This patch was originally only to appease Coverity, but it actually *does*
> plug a very real memory leak: previously, *every* call to git_exec_path()
> *possibly* returned a newly-malloc()ed buffer. Now, the first call will
> store that pointer in a static variable and reuse it later.
>
> Could you maybe help me with improving the commit message?

As someone not familiar with that area of code, this explained it
enough for me to understand, so maybe:

    exec_cmd: do not leak via git_exec_path

    Every call to git_exec_path() possibly returned a newly-malloc()ed
    buffer. Now, the first call will allocate the buffer and subsequent
    calls return a pointer to it, which then prevents leaking memory
    on each call.

The return value of a "const char *" hints to the caller, that the memory
is not owned by the caller, do we need to be explicit there (i.e. a comment
declaring the memory ownership? Probably not.)

Thanks,
Stefan

^ permalink raw reply

* Re: [PATCH v4 4/5] real_path: have callers use real_pathdup and strbuf_realpath
From: Brandon Williams @ 2017-01-04 18:14 UTC (permalink / raw)
  To: Jacob Keller
  Cc: Git mailing list, Stefan Beller, Jeff King, Junio C Hamano,
	Ramsay Jones, Torsten Bögershausen, Johannes Sixt,
	Duy Nguyen, Lars Schneider
In-Reply-To: <CA+P7+xrgt+aTF4ibJ139=WihwHwG_m01bjAaF5-VW=Rk8u1ykA@mail.gmail.com>

On 01/03, Jacob Keller wrote:
> On Tue, Jan 3, 2017 at 11:09 AM, Brandon Williams <bmwill@google.com> wrote:
> > Migrate callers of real_path() who duplicate the retern value to use
> > real_pathdup or strbuf_realpath.
> 
> Nit: s/retern/return

Thanks for catching that, I'll fix that in the next reroll.

-- 
Brandon Williams

^ permalink raw reply

* Re: [PATCH v4 0/5] road to reentrant real_path
From: Stefan Beller @ 2017-01-04 18:22 UTC (permalink / raw)
  To: Brandon Williams
  Cc: Jeff King, Torsten Bögershausen, git@vger.kernel.org,
	Jacob Keller, Junio C Hamano, Ramsay Jones, Johannes Sixt,
	Duy Nguyen, Lars Schneider
In-Reply-To: <20170104181318.GC69227@google.com>

On Wed, Jan 4, 2017 at 10:13 AM, Brandon Williams <bmwill@google.com> wrote:
> On 01/04, Jeff King wrote:
>> On Wed, Jan 04, 2017 at 07:56:02AM +0100, Torsten Bögershausen wrote:
>>
>> > On 04.01.17 01:48, Jeff King wrote:
>> > > On Tue, Jan 03, 2017 at 11:09:18AM -0800, Brandon Williams wrote:
>> > >
>> > >> Only change with v4 is in [1/5] renaming the #define MAXSYMLINKS back to
>> > >> MAXDEPTH due to a naming conflict brought up by Lars Schneider.
>> > >
>> > > Hmm. Isn't MAXSYMLINKS basically what you want here, though? It what's
>> > > what all other similar functions will be using.
>> > >
>> > > The only problem was that we were redefining the macro. So maybe:
>> > >
>> > >   #ifndef MAXSYMLINKS
>> > >   #define MAXSYMLINKS 5
>> > >   #endif
>> > >
>> > > would be a good solution?
>> > Why 5  ? (looking at the  20..30 below)
>> > And why 5 on one system and e.g. on my Mac OS
>> > #define MAXSYMLINKS     32
>>
>> I mentioned "5" because that is the current value of MAXDEPTH. I do
>> think it would be reasonable to bump it to something higher.
>>
>> > Would the same value value for all Git installations on all platforms make sense?
>> > #define GITMAXSYMLINKS 20
>>
>> I think it's probably more important to match the rest of the OS, so
>> that open("foo") and realpath("foo") behave similarly on the same
>> system. Though I think even that isn't always possible, as the limit is
>> dynamic on some systems.
>>
>> I think the idea of the 20-30 range is that it's small enough to catch
>> an infinite loop quickly, and large enough that nobody will ever hit it
>> in practice. :)
>
> I agree that we should have similar guarantees as the OS provides,
> especially if the OS already has MAXSYMLINKS defined.  What then, should
> the fall back value be if the OS doesn't have this defined?  5 like we
> have done historically, or something around the 20-30 range as Torsten
> suggests?

As a fallback I'd rather go for a larger number than too small.
The reason for the existence is just to break an infinite loop
(and report an error? which the current code doesn't quite do,
but your series actually does).

If the number is too large, then it takes a bit longer to generate the error
message, but the error path is no big deal w.r.t. performance, so it's fine
for it taking a bit longer.

If the number is too low, then we may hinder people from getting actual
work done, (i.e. they have to figure out what the problem is and recompile
git), so I'd think a larger number is not harmful. So 32?

>
> --
> Brandon Williams

^ permalink raw reply

* Re: [PATCH 2/2] pathspec: give better message for submodule related pathspec error
From: Stefan Beller @ 2017-01-04 18:46 UTC (permalink / raw)
  To: Jeff King; +Cc: Junio C Hamano, git@vger.kernel.org, Brandon Williams
In-Reply-To: <20170104075506.sa5oa5bheykswkwn@sigill.intra.peff.net>

On Tue, Jan 3, 2017 at 11:55 PM, Jeff King <peff@peff.net> wrote:
> But as this commit message needs to stand on its own, rather than as part of a
> larger discussion thread, it might be worth expanding "one of the cases"
> here. And talking about what's happening to the other cases.
>
> Like:
>
>   This assertion triggered for cases where there wasn't a programming
>   bug, but just bogus input. In particular, if the user asks for a
>   pathspec that is inside a submodule, we shouldn't assert() or
>   die("BUG"); we should tell the user their request is bogus.
>
>   We'll retain the assertion for non-submodule cases, though. We don't
>   know of any cases that would trigger this, but it _would_ be
>   indicative of a programming error, and we should catch it here.

makes sense.

>
> or something. Writing the first paragraph made me wonder if a better
> solution, though, would be to catch and complain about this case
> earlier. IOW, this _is_ a programming bug, because we're violating some
> assumption of the pathspec code. And whatever is putting that item into
> the pathspec list is what should be fixed.
>
> I haven't looked closely enough to have a real opinion on that, though.

Well I think you get different behavior with different flags enabled, i.e.
the test provided is a cornercase (as "git add ." in the submodule should
not yell at us IF PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE
were set, in my understanding of the code, so maybe the test rather adds
a ./file/with/characters inside the submodule directory)

I think a valid long term vision would be to have

    $ git -C submodule add file
    $ echo $?
    0

to behave the same as

    $ git add submodule/file
    advice/hint: adding file inside of a submodule
    $ echo $?
    0
    $ git -c submodule.iKnowWhatIDo add submodule/anotherfile
    $ echo $?
    0

Brandon, who is refactoring the pathspec stuff currently may have
an opinion if we could catch it earlier and still have beautiful code.

Thanks,
Stefan

> Given the discussion, this comment seems funny now. Who cares about
> "historically"? It should probably be something like:
>
>   /*
>    * This case can be triggered by the user pointing us to a pathspec
>    * inside a submodule, which is an input error. Detect that here
>    * and complain, but fallback in the non-submodule case to a BUG,
>    * as we have no idea what would trigger that.
>    */

Makes sense.

>
> -Peff

^ permalink raw reply

* Re: [PATCH 2/4] t7610: make tests more independent and debuggable
From: Stefan Beller @ 2017-01-04 20:27 UTC (permalink / raw)
  To: Richard Hansen; +Cc: git@vger.kernel.org, David Aguilar, Johannes Sixt
In-Reply-To: <20170104005042.51530-3-hansenr@google.com>

On Tue, Jan 3, 2017 at 4:50 PM, Richard Hansen <hansenr@google.com> wrote:
> If a test fails it might leave the repository in a strange state.  Add
> 'git reset --hard' at the beginning of each test to increase the odds
> of passing when an earlier test fails.

So each test is cleaning up the previous test, which *may* confuse
a reader ("how is the reset --hard relevant for this test? Oooh it's
just a cleanup").

We could put it another way by having each test itself make clean
up after itself via

  test_when_finished "git reset --hard" &&
  ..

at the beginning of each test.
This would produce the same order of operations, i.e. a
reset run between each test, but semantically tells the reader
that the reset is part of the current test cleaning up after itself,
as "reset" is operation for this particular test to cleanup.
Does that make sense?


>
> Also use test-specific branches to avoid interfering with later tests
> and to make the tests easier to debug.

That sounds great!
Though in the code I only spot one occurrence for

+       git checkout -b test$test_count branch1 &&

so maybe that could be part of the first patch in the series?

Thanks,
Stefan

^ permalink raw reply

* Re: [PATCH 2/2] pathspec: give better message for submodule related pathspec error
From: Brandon Williams @ 2017-01-04 20:53 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jeff King, Junio C Hamano, git@vger.kernel.org
In-Reply-To: <CAGZ79kY3oVfn-xH4RQR9jMoKxoQUtF5HezY9HPUZGbx9KP-S5Q@mail.gmail.com>

On 01/04, Stefan Beller wrote:
> On Tue, Jan 3, 2017 at 11:55 PM, Jeff King <peff@peff.net> wrote:
> > But as this commit message needs to stand on its own, rather than as part of a
> > larger discussion thread, it might be worth expanding "one of the cases"
> > here. And talking about what's happening to the other cases.
> >
> > Like:
> >
> >   This assertion triggered for cases where there wasn't a programming
> >   bug, but just bogus input. In particular, if the user asks for a
> >   pathspec that is inside a submodule, we shouldn't assert() or
> >   die("BUG"); we should tell the user their request is bogus.
> >
> >   We'll retain the assertion for non-submodule cases, though. We don't
> >   know of any cases that would trigger this, but it _would_ be
> >   indicative of a programming error, and we should catch it here.
> 
> makes sense.
> 
> >
> > or something. Writing the first paragraph made me wonder if a better
> > solution, though, would be to catch and complain about this case
> > earlier. IOW, this _is_ a programming bug, because we're violating some
> > assumption of the pathspec code. And whatever is putting that item into
> > the pathspec list is what should be fixed.
> >
> > I haven't looked closely enough to have a real opinion on that, though.
> 
> Well I think you get different behavior with different flags enabled, i.e.
> the test provided is a cornercase (as "git add ." in the submodule should
> not yell at us IF PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE
> were set, in my understanding of the code, so maybe the test rather adds
> a ./file/with/characters inside the submodule directory)
> 
> I think a valid long term vision would be to have
> 
>     $ git -C submodule add file
>     $ echo $?
>     0
> 
> to behave the same as
> 
>     $ git add submodule/file
>     advice/hint: adding file inside of a submodule
>     $ echo $?
>     0
>     $ git -c submodule.iKnowWhatIDo add submodule/anotherfile
>     $ echo $?
>     0
> 
> Brandon, who is refactoring the pathspec stuff currently may have
> an opinion if we could catch it earlier and still have beautiful code.
> 
> Thanks,
> Stefan
> 
> > Given the discussion, this comment seems funny now. Who cares about
> > "historically"? It should probably be something like:
> >
> >   /*
> >    * This case can be triggered by the user pointing us to a pathspec
> >    * inside a submodule, which is an input error. Detect that here
> >    * and complain, but fallback in the non-submodule case to a BUG,
> >    * as we have no idea what would trigger that.
> >    */
> 
> Makes sense.
> 
> >
> > -Peff

So there are really two different things going on in the pathspec code
with regards to submodules.

The case that this series is trying to solve is not because the user
provided a pathspec into a submodule, but rather they are executing in
the context of the submodule with bogus state.  Typically this bogus
state has something to do with the submodule's .gitdir being blown away
(like in the last test (3/3) added in this patch).  Because the
submodule doesn't have a .gitdir, it searches backward in the directory
hierarchy for a .gitdir and it happens to find the superproject's gitdir
and uses that as its own .gitdir.  When this happens test 3/3 catches
that assert with the prefix being "sub/" and match being "sub" (since
the submodule slash was removed).  The condition doesn't trigger when
you supply a pathspec of "./b/a" assuming you have a file 'a' in
directory 'b' inside the submodule, since the prefix would still be
"sub/" while the match string would be "sub/b/a".  Coincidentally the
check that PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE does, does in fact
catch it (if using say the 'git add' command).

This leads me into the second case.  If
PATHSPEC_STRIP_SUBMODULE_SLASH_EXPENSIVE is set, then any pathspec which
decends into a submodule will indeed be caught and cause and error (as
was happens in test 2/3 in this patch).

So in my opinion, the assert at the end of constructing a
pathspec object probably isn't the best place for determining if the
submodule's gitdir has been destroyed and instead it has fallen back to
its parent's gitdir.  A check for something like this should happen much
sooner.

There are cases where it is advantages to be able to supply a pathspec
into a submodule without it erroring out (git grep --recurse-submodules
is one example).  So right now the current method for not allowing a
pathspec into a submodule is to pass the STRIP_SUBMODULE_SLASH_EXPENSIVE
flag when creating the pathspec object.

-- 
Brandon Williams

^ permalink raw reply

* Re: [PATCH v4 0/5] road to reentrant real_path
From: Jacob Keller @ 2017-01-04 21:46 UTC (permalink / raw)
  To: Stefan Beller
  Cc: Brandon Williams, Jeff King, Torsten Bögershausen,
	git@vger.kernel.org, Junio C Hamano, Ramsay Jones, Johannes Sixt,
	Duy Nguyen, Lars Schneider
In-Reply-To: <CAGZ79kbdNuGe038Wb9OR1SKq-XYtsPrLsn6XueO6zsKKGFYiNg@mail.gmail.com>

On Wed, Jan 4, 2017 at 10:22 AM, Stefan Beller <sbeller@google.com> wrote:
> On Wed, Jan 4, 2017 at 10:13 AM, Brandon Williams <bmwill@google.com> wrote:
>> On 01/04, Jeff King wrote:
>>> On Wed, Jan 04, 2017 at 07:56:02AM +0100, Torsten Bögershausen wrote:
>>>
>>> > On 04.01.17 01:48, Jeff King wrote:
>>> > > On Tue, Jan 03, 2017 at 11:09:18AM -0800, Brandon Williams wrote:
>>> > >
>>> > >> Only change with v4 is in [1/5] renaming the #define MAXSYMLINKS back to
>>> > >> MAXDEPTH due to a naming conflict brought up by Lars Schneider.
>>> > >
>>> > > Hmm. Isn't MAXSYMLINKS basically what you want here, though? It what's
>>> > > what all other similar functions will be using.
>>> > >
>>> > > The only problem was that we were redefining the macro. So maybe:
>>> > >
>>> > >   #ifndef MAXSYMLINKS
>>> > >   #define MAXSYMLINKS 5
>>> > >   #endif
>>> > >
>>> > > would be a good solution?
>>> > Why 5  ? (looking at the  20..30 below)
>>> > And why 5 on one system and e.g. on my Mac OS
>>> > #define MAXSYMLINKS     32
>>>
>>> I mentioned "5" because that is the current value of MAXDEPTH. I do
>>> think it would be reasonable to bump it to something higher.
>>>
>>> > Would the same value value for all Git installations on all platforms make sense?
>>> > #define GITMAXSYMLINKS 20
>>>
>>> I think it's probably more important to match the rest of the OS, so
>>> that open("foo") and realpath("foo") behave similarly on the same
>>> system. Though I think even that isn't always possible, as the limit is
>>> dynamic on some systems.
>>>
>>> I think the idea of the 20-30 range is that it's small enough to catch
>>> an infinite loop quickly, and large enough that nobody will ever hit it
>>> in practice. :)
>>
>> I agree that we should have similar guarantees as the OS provides,
>> especially if the OS already has MAXSYMLINKS defined.  What then, should
>> the fall back value be if the OS doesn't have this defined?  5 like we
>> have done historically, or something around the 20-30 range as Torsten
>> suggests?
>
> As a fallback I'd rather go for a larger number than too small.
> The reason for the existence is just to break an infinite loop
> (and report an error? which the current code doesn't quite do,
> but your series actually does).
>
> If the number is too large, then it takes a bit longer to generate the error
> message, but the error path is no big deal w.r.t. performance, so it's fine
> for it taking a bit longer.
>
> If the number is too low, then we may hinder people from getting actual
> work done, (i.e. they have to figure out what the problem is and recompile
> git), so I'd think a larger number is not harmful. So 32?
>

I think I agree as well.

Thanks,
Jake

>>
>> --
>> Brandon Williams

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox