Git development

Git development
 help / color / mirror / Atom feed

* [PATCH v5 4/5] urlmatch: include host in urlmatch ranking
From: Patrick Steinhardt @ 2017-01-31  9:01 UTC (permalink / raw)
  To: git; +Cc: Patrick Steinhardt, Junio C Hamano, Patrick Steinhardt
In-Reply-To: <cover.1485853153.git.ps@pks.im>

From: Patrick Steinhardt <patrick.steinhardt@elego.de>

In order to be able to rank positive matches by `urlmatch`, we inspect
the path length and user part to decide whether a match is better than
another match. As all other parts are matched exactly between both URLs,
this is the correct thing to do right now.

In the future, though, we want to introduce wild cards for the domain
part. When doing this, it does not make sense anymore to only compare
the path lengths. Instead, we also want to compare the domain lengths to
determine which of both URLs matches the host part more closely.

Signed-off-by: Patrick Steinhardt <patrick.steinhardt@elego.de>
---
 t/t1300-repo-config.sh | 33 ++++++++++++++++++++++++++++
 urlmatch.c             | 59 +++++++++++++++++++++++++++++---------------------
 urlmatch.h             |  3 ++-
 3 files changed, 69 insertions(+), 26 deletions(-)

diff --git a/t/t1300-repo-config.sh b/t/t1300-repo-config.sh
index 923bfc5a2..6c844d519 100755
--- a/t/t1300-repo-config.sh
+++ b/t/t1300-repo-config.sh
@@ -1177,6 +1177,39 @@ test_expect_success 'urlmatch' '
 	test_cmp expect actual
 '
 
+test_expect_success 'urlmatch favors more specific URLs' '
+	cat >.git/config <<-\EOF &&
+	[http "https://example.com/"]
+		cookieFile = /tmp/root.txt
+	[http "https://example.com/subdirectory"]
+		cookieFile = /tmp/subdirectory.txt
+	[http "https://user@example.com/"]
+		cookieFile = /tmp/user.txt
+	[http "https://averylonguser@example.com/"]
+		cookieFile = /tmp/averylonguser.txt
+	EOF
+
+	echo http.cookiefile /tmp/root.txt >expect &&
+	git config --get-urlmatch HTTP https://example.com >actual &&
+	test_cmp expect actual &&
+
+	echo http.cookiefile /tmp/subdirectory.txt >expect &&
+	git config --get-urlmatch HTTP https://example.com/subdirectory >actual &&
+	test_cmp expect actual &&
+
+	echo http.cookiefile /tmp/subdirectory.txt >expect &&
+	git config --get-urlmatch HTTP https://example.com/subdirectory/nested >actual &&
+	test_cmp expect actual &&
+
+	echo http.cookiefile /tmp/user.txt >expect &&
+	git config --get-urlmatch HTTP https://user@example.com/ >actual &&
+	test_cmp expect actual &&
+
+	echo http.cookiefile /tmp/subdirectory.txt >expect &&
+	git config --get-urlmatch HTTP https://averylonguser@example.com/subdirectory >actual &&
+	test_cmp expect actual
+'
+
 # good section hygiene
 test_expect_failure 'unsetting the last key in a section removes header' '
 	cat >.git/config <<-\EOF &&
diff --git a/urlmatch.c b/urlmatch.c
index e328905eb..f79887825 100644
--- a/urlmatch.c
+++ b/urlmatch.c
@@ -426,7 +426,7 @@ static size_t url_match_prefix(const char *url,
 
 static int match_urls(const struct url_info *url,
 		      const struct url_info *url_prefix,
-		      int *exactusermatch)
+		      struct urlmatch_item *match)
 {
 	/*
 	 * url_prefix matches url if the scheme, host and port of url_prefix
@@ -445,8 +445,8 @@ static int match_urls(const struct url_info *url,
 	 * contained a user name or false if url_prefix did not have a
 	 * user name.  If there is no match *exactusermatch is left untouched.
 	 */
-	int usermatched = 0;
-	int pathmatchlen;
+	char usermatched = 0;
+	size_t pathmatchlen;
 
 	if (!url || !url_prefix || !url->url || !url_prefix->url)
 		return 0;
@@ -483,22 +483,38 @@ static int match_urls(const struct url_info *url,
 		url->url + url->path_off,
 		url_prefix->url + url_prefix->path_off,
 		url_prefix->url_len - url_prefix->path_off);
+	if (!pathmatchlen)
+		return 0; /* paths do not match */
 
-	if (pathmatchlen && exactusermatch)
-		*exactusermatch = usermatched;
-	return pathmatchlen;
+	if (match) {
+		match->hostmatch_len = url_prefix->host_len;
+		match->pathmatch_len = pathmatchlen;
+		match->user_matched = usermatched;
+	}
+
+	return 1;
+}
+
+static int cmp_matches(const struct urlmatch_item *a,
+		       const struct urlmatch_item *b)
+{
+	if (a->hostmatch_len != b->hostmatch_len)
+		return a->hostmatch_len < b->hostmatch_len ? -1 : 1;
+	if (a->pathmatch_len != b->pathmatch_len)
+		return a->pathmatch_len < b->pathmatch_len ? -1 : 1;
+	if (a->user_matched != b->user_matched)
+		return b->user_matched ? -1 : 1;
+	return 0;
 }
 
 int urlmatch_config_entry(const char *var, const char *value, void *cb)
 {
 	struct string_list_item *item;
 	struct urlmatch_config *collect = cb;
-	struct urlmatch_item *matched;
+	struct urlmatch_item matched = {0};
 	struct url_info *url = &collect->url;
 	const char *key, *dot;
 	struct strbuf synthkey = STRBUF_INIT;
-	size_t matched_len = 0;
-	int user_matched = 0;
 	int retval;
 
 	if (!skip_prefix(var, collect->section, &key) || *(key++) != '.') {
@@ -516,9 +532,9 @@ int urlmatch_config_entry(const char *var, const char *value, void *cb)
 		free(config_url);
 		if (!norm_url)
 			return 0;
-		matched_len = match_urls(url, &norm_info, &user_matched);
+		retval = match_urls(url, &norm_info, &matched);
 		free(norm_url);
-		if (!matched_len)
+		if (!retval)
 			return 0;
 		key = dot + 1;
 	}
@@ -528,24 +544,17 @@ int urlmatch_config_entry(const char *var, const char *value, void *cb)
 
 	item = string_list_insert(&collect->vars, key);
 	if (!item->util) {
-		matched = xcalloc(1, sizeof(*matched));
-		item->util = matched;
+		item->util = xcalloc(1, sizeof(matched));
 	} else {
-		matched = item->util;
-		/*
-		 * Is our match shorter?  Is our match the same
-		 * length, and without user while the current
-		 * candidate is with user?  Then we cannot use it.
-		 */
-		if (matched_len < matched->matched_len ||
-		    ((matched_len == matched->matched_len) &&
-		     (!user_matched && matched->user_matched)))
+		if (cmp_matches(&matched, item->util) <= 0)
+			 /*
+			  * Our match is worse than the old one,
+			  * we cannot use it.
+			  */
 			return 0;
-		/* Otherwise, replace it with this one. */
 	}
 
-	matched->matched_len = matched_len;
-	matched->user_matched = user_matched;
+	memcpy(item->util, &matched, sizeof(matched));
 	strbuf_addstr(&synthkey, collect->section);
 	strbuf_addch(&synthkey, '.');
 	strbuf_addstr(&synthkey, key);
diff --git a/urlmatch.h b/urlmatch.h
index 0ea812b03..37ee5da85 100644
--- a/urlmatch.h
+++ b/urlmatch.h
@@ -34,7 +34,8 @@ struct url_info {
 extern char *url_normalize(const char *, struct url_info *);
 
 struct urlmatch_item {
-	size_t matched_len;
+	size_t hostmatch_len;
+	size_t pathmatch_len;
 	char user_matched;
 };
 
-- 
2.11.0


^ permalink raw reply related

* [PATCH v5 3/5] urlmatch: split host and port fields in `struct url_info`
From: Patrick Steinhardt @ 2017-01-31  9:01 UTC (permalink / raw)
  To: git; +Cc: Patrick Steinhardt, Junio C Hamano, Patrick Steinhardt
In-Reply-To: <cover.1485853153.git.ps@pks.im>

From: Patrick Steinhardt <patrick.steinhardt@elego.de>

The `url_info` structure contains information about a normalized URL
with the URL's components being represented by different fields. The
host and port part though are to be accessed by the same `host` field,
so that getting the host and/or port separately becomes more involved
than really necessary.

To make the port more readily accessible, split up the host and port
fields. Namely, the `host_len` will not include the port length anymore
and a new `port_off` field has been added which includes the offset to
the port, if available.

The only user of these fields is `url_normalize_1`. This change makes it
easier later on to treat host and port differently when introducing
globs for domains.

Signed-off-by: Patrick Steinhardt <patrick.steinhardt@elego.de>
---
 urlmatch.c | 16 ++++++++++++----
 urlmatch.h |  9 +++++----
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/urlmatch.c b/urlmatch.c
index d350478c0..e328905eb 100644
--- a/urlmatch.c
+++ b/urlmatch.c
@@ -104,7 +104,7 @@ static char *url_normalize_1(const char *url, struct url_info *out_info, char al
 	struct strbuf norm;
 	size_t spanned;
 	size_t scheme_len, user_off=0, user_len=0, passwd_off=0, passwd_len=0;
-	size_t host_off=0, host_len=0, port_len=0, path_off, path_len, result_len;
+	size_t host_off=0, host_len=0, port_off=0, port_len=0, path_off, path_len, result_len;
 	const char *slash_ptr, *at_ptr, *colon_ptr, *path_start;
 	char *result;
 
@@ -263,6 +263,7 @@ static char *url_normalize_1(const char *url, struct url_info *out_info, char al
 				return NULL;
 			}
 			strbuf_addch(&norm, ':');
+			port_off = norm.len;
 			strbuf_add(&norm, url, slash_ptr - url);
 			port_len = slash_ptr - url;
 		}
@@ -270,7 +271,7 @@ static char *url_normalize_1(const char *url, struct url_info *out_info, char al
 		url = slash_ptr;
 	}
 	if (host_off)
-		host_len = norm.len - host_off;
+		host_len = norm.len - host_off - (port_len ? port_len + 1 : 0);
 
 
 	/*
@@ -378,6 +379,7 @@ static char *url_normalize_1(const char *url, struct url_info *out_info, char al
 		out_info->passwd_len = passwd_len;
 		out_info->host_off = host_off;
 		out_info->host_len = host_len;
+		out_info->port_off = port_off;
 		out_info->port_len = port_len;
 		out_info->path_off = path_off;
 		out_info->path_len = path_len;
@@ -464,11 +466,17 @@ static int match_urls(const struct url_info *url,
 		usermatched = 1;
 	}
 
-	/* check the host and port */
+	/* check the host */
 	if (url_prefix->host_len != url->host_len ||
 	    strncmp(url->url + url->host_off,
 		    url_prefix->url + url_prefix->host_off, url->host_len))
-		return 0; /* host names and/or ports do not match */
+		return 0; /* host names do not match */
+
+	/* check the port */
+	if (url_prefix->port_len != url->port_len ||
+	    strncmp(url->url + url->port_off,
+		    url_prefix->url + url_prefix->port_off, url->port_len))
+		return 0; /* ports do not match */
 
 	/* check the path */
 	pathmatchlen = url_match_prefix(
diff --git a/urlmatch.h b/urlmatch.h
index 528862adc..0ea812b03 100644
--- a/urlmatch.h
+++ b/urlmatch.h
@@ -18,11 +18,12 @@ struct url_info {
 	size_t passwd_len;	/* length of passwd; if passwd_off != 0 but
 				   passwd_len == 0, an empty passwd was given */
 	size_t host_off;	/* offset into url to start of host name (0 => none) */
-	size_t host_len;	/* length of host name; this INCLUDES any ':portnum';
+	size_t host_len;	/* length of host name;
 				 * file urls may have host_len == 0 */
-	size_t port_len;	/* if a portnum is present (port_len != 0), it has
-				 * this length (excluding the leading ':') at the
-				 * end of the host name (always 0 for file urls) */
+	size_t port_off;	/* offset into url to start of port number (0 => none) */
+	size_t port_len;	/* if a portnum is present (port_off != 0), it has
+				 * this length (excluding the leading ':') starting
+				 * from port_off (always 0 for file urls) */
 	size_t path_off;	/* offset into url to the start of the url path;
 				 * this will always point to a '/' character
 				 * after the url has been normalized */
-- 
2.11.0


^ permalink raw reply related

* [PATCH v5 0/5] urlmatch: allow wildcard-based matches
From: Patrick Steinhardt @ 2017-01-31  9:01 UTC (permalink / raw)
  To: git; +Cc: Patrick Steinhardt, Junio C Hamano
In-Reply-To: <20170123130635.29577-1-patrick.steinhardt@elego.de>

Hi,

this is version 5 of my patch series. The previous version can
be found at [1]. The use case is to be able to configure an HTTP
proxy for all subdomains of a domain where there are hundreds of
subdomains.

This includes only a single change, interdiff is included below.
The previous version had an embarassing bug because a variable
was not properly initialized in all cases, leading to undefined
behavior. I also verified that the patches work on top of
4e59582ff (Seventh batch for 2.12, 2017-01-23), where Junio
reported the test failures.

Regards
Patrick

Patrick Steinhardt (5):
  mailmap: add Patrick Steinhardt's work address
  urlmatch: enable normalization of URLs with globs
  urlmatch: split host and port fields in `struct url_info`
  urlmatch: include host in urlmatch ranking
  urlmatch: allow globbing for the URL host part

 .mailmap                 |   1 +
 Documentation/config.txt |   5 +-
 t/t1300-repo-config.sh   | 105 ++++++++++++++++++++++++++++++++++++
 urlmatch.c               | 138 +++++++++++++++++++++++++++++++++++------------
 urlmatch.h               |  12 +++--
 5 files changed, 220 insertions(+), 41 deletions(-)

-- 
2.11.0

diff --git a/urlmatch.c b/urlmatch.c
index bb5267732..6c12f1a48 100644
--- a/urlmatch.c
+++ b/urlmatch.c
@@ -552,7 +552,7 @@ int urlmatch_config_entry(const char *var, const char *value, void *cb)
 {
 	struct string_list_item *item;
 	struct urlmatch_config *collect = cb;
-	struct urlmatch_item matched;
+	struct urlmatch_item matched = {0};
 	struct url_info *url = &collect->url;
 	const char *key, *dot;
 	struct strbuf synthkey = STRBUF_INIT;

^ permalink raw reply related

* [PATCH v5 1/5] mailmap: add Patrick Steinhardt's work address
From: Patrick Steinhardt @ 2017-01-31  9:01 UTC (permalink / raw)
  To: git; +Cc: Patrick Steinhardt, Junio C Hamano, Patrick Steinhardt
In-Reply-To: <cover.1485853153.git.ps@pks.im>

From: Patrick Steinhardt <patrick.steinhardt@elego.de>

Signed-off-by: Patrick Steinhardt <patrick.steinhardt@elego.de>
---
 .mailmap | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.mailmap b/.mailmap
index 9c87a3840..ea59205b9 100644
--- a/.mailmap
+++ b/.mailmap
@@ -177,6 +177,7 @@ Paolo Bonzini <bonzini@gnu.org> <paolo.bonzini@lu.unisi.ch>
 Pascal Obry <pascal@obry.net> <pascal.obry@gmail.com>
 Pascal Obry <pascal@obry.net> <pascal.obry@wanadoo.fr>
 Pat Notz <patnotz@gmail.com> <pknotz@sandia.gov>
+Patrick Steinhardt <ps@pks.im> <patrick.steinhardt@elego.de>
 Paul Mackerras <paulus@samba.org> <paulus@dorrigo.(none)>
 Paul Mackerras <paulus@samba.org> <paulus@pogo.(none)>
 Peter Baumann <waste.manager@gmx.de> <Peter.B.Baumann@stud.informatik.uni-erlangen.de>
-- 
2.11.0


^ permalink raw reply related

* [PATCH v5 2/5] urlmatch: enable normalization of URLs with globs
From: Patrick Steinhardt @ 2017-01-31  9:01 UTC (permalink / raw)
  To: git; +Cc: Patrick Steinhardt, Junio C Hamano, Patrick Steinhardt
In-Reply-To: <cover.1485853153.git.ps@pks.im>

From: Patrick Steinhardt <patrick.steinhardt@elego.de>

The `url_normalize` function is used to validate and normalize URLs. As
such, it does not allow for some special characters to be part of the
URLs that are to be normalized. As we want to allow using globs in some
configuration keys making use of URLs, namely `http.<url>.<key>`, but
still normalize them, we need to somehow enable some additional allowed
characters.

To do this without having to change all callers of `url_normalize`,
where most do not actually want globbing at all, we split off another
function `url_normalize_1`. This function accepts an additional
parameter `allow_globs`, which is subsequently called by `url_normalize`
with `allow_globs=0`.

As of now, this function is not used with globbing enabled. A caller
will be added in the following commit.

Signed-off-by: Patrick Steinhardt <patrick.steinhardt@elego.de>
---
 urlmatch.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/urlmatch.c b/urlmatch.c
index 132d342bc..d350478c0 100644
--- a/urlmatch.c
+++ b/urlmatch.c
@@ -63,7 +63,7 @@ static int append_normalized_escapes(struct strbuf *buf,
 	return 1;
 }

-char *url_normalize(const char *url, struct url_info *out_info)
+static char *url_normalize_1(const char *url, struct url_info *out_info, char allow_globs)
 {
 	/*
 	 * Normalize NUL-terminated url using the following rules:
@@ -191,7 +191,12 @@ char *url_normalize(const char *url, struct url_info *out_info)
 		strbuf_release(&norm);
 		return NULL;
 	}
-	spanned = strspn(url, URL_HOST_CHARS);
+
+	if (allow_globs)
+		spanned = strspn(url, URL_HOST_CHARS "*");
+	else
+		spanned = strspn(url, URL_HOST_CHARS);
+
 	if (spanned < colon_ptr - url) {
 		/* Host name has invalid characters */
 		if (out_info) {
@@ -380,6 +385,11 @@ char *url_normalize(const char *url, struct url_info *out_info)
 	return result;
 }

+char *url_normalize(const char *url, struct url_info *out_info)
+{
+	return url_normalize_1(url, out_info, 0);
+}
+
 static size_t url_match_prefix(const char *url,
 			       const char *url_prefix,
 			       size_t url_prefix_len)
-- 
2.11.0

^ permalink raw reply related

* [PATCH v5 5/5] urlmatch: allow globbing for the URL host part
From: Patrick Steinhardt @ 2017-01-31  9:01 UTC (permalink / raw)
  To: git; +Cc: Patrick Steinhardt, Junio C Hamano, Patrick Steinhardt
In-Reply-To: <cover.1485853153.git.ps@pks.im>

From: Patrick Steinhardt <patrick.steinhardt@elego.de>

The URL matching function computes for two URLs whether they match not.
The match is performed by splitting up the URL into different parts and
then doing an exact comparison with the to-be-matched URL.

The main user of `urlmatch` is the configuration subsystem. It allows to
set certain configurations based on the URL which is being connected to
via keys like `http.<url>.*`. A common use case for this is to set
proxies for only some remotes which match the given URL. Unfortunately,
having exact matches for all parts of the URL can become quite tedious
in some setups. Imagine for example a corporate network where there are
dozens or even hundreds of subdomains, which would have to be configured
individually.

Allow users to write an asterisk '*' in place of any 'host' or
'subdomain' label as part of the host name.  For example,
"http.https://*.example.com.proxy" sets "http.proxy" for all direct
subdomains of "https://example.com", e.g. "https://foo.example.com", but
not "https://foo.bar.example.com".

Signed-off-by: Patrick Steinhardt <patrick.steinhardt@elego.de>
Helped-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/config.txt |  5 +++-
 t/t1300-repo-config.sh   | 72 ++++++++++++++++++++++++++++++++++++++++++++++++
 urlmatch.c               | 49 +++++++++++++++++++++++++++++---
 3 files changed, 121 insertions(+), 5 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index af2ae4cc0..ee155d8a6 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1914,7 +1914,10 @@ http.<url>.*::
   must match exactly between the config key and the URL.
 
 . Host/domain name (e.g., `example.com` in `https://example.com/`).
-  This field must match exactly between the config key and the URL.
+  This field must match between the config key and the URL. It is
+  possible to specify a `*` as part of the host name to match all subdomains
+  at this level. `https://*.example.com/` for example would match
+  `https://foo.example.com/`, but not `https://foo.bar.example.com/`.
 
 . Port number (e.g., `8080` in `http://example.com:8080/`).
   This field must match exactly between the config key and the URL.
diff --git a/t/t1300-repo-config.sh b/t/t1300-repo-config.sh
index 6c844d519..052f12021 100755
--- a/t/t1300-repo-config.sh
+++ b/t/t1300-repo-config.sh
@@ -1187,6 +1187,18 @@ test_expect_success 'urlmatch favors more specific URLs' '
 		cookieFile = /tmp/user.txt
 	[http "https://averylonguser@example.com/"]
 		cookieFile = /tmp/averylonguser.txt
+	[http "https://preceding.example.com"]
+		cookieFile = /tmp/preceding.txt
+	[http "https://*.example.com"]
+		cookieFile = /tmp/wildcard.txt
+	[http "https://*.example.com/wildcardwithsubdomain"]
+		cookieFile = /tmp/wildcardwithsubdomain.txt
+	[http "https://trailing.example.com"]
+		cookieFile = /tmp/trailing.txt
+	[http "https://user@*.example.com/"]
+		cookieFile = /tmp/wildcardwithuser.txt
+	[http "https://sub.example.com/"]
+		cookieFile = /tmp/sub.txt
 	EOF
 
 	echo http.cookiefile /tmp/root.txt >expect &&
@@ -1207,6 +1219,66 @@ test_expect_success 'urlmatch favors more specific URLs' '
 
 	echo http.cookiefile /tmp/subdirectory.txt >expect &&
 	git config --get-urlmatch HTTP https://averylonguser@example.com/subdirectory >actual &&
+	test_cmp expect actual &&
+
+	echo http.cookiefile /tmp/preceding.txt >expect &&
+	git config --get-urlmatch HTTP https://preceding.example.com >actual &&
+	test_cmp expect actual &&
+
+	echo http.cookiefile /tmp/wildcard.txt >expect &&
+	git config --get-urlmatch HTTP https://wildcard.example.com >actual &&
+	test_cmp expect actual &&
+
+	echo http.cookiefile /tmp/sub.txt >expect &&
+	git config --get-urlmatch HTTP https://sub.example.com/wildcardwithsubdomain >actual &&
+	test_cmp expect actual &&
+
+	echo http.cookiefile /tmp/trailing.txt >expect &&
+	git config --get-urlmatch HTTP https://trailing.example.com >actual &&
+	test_cmp expect actual &&
+
+	echo http.cookiefile /tmp/sub.txt >expect &&
+	git config --get-urlmatch HTTP https://user@sub.example.com >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'urlmatch with wildcard' '
+	cat >.git/config <<-\EOF &&
+	[http]
+		sslVerify
+	[http "https://*.example.com"]
+		sslVerify = false
+		cookieFile = /tmp/cookie.txt
+	EOF
+
+	test_expect_code 1 git config --bool --get-urlmatch doesnt.exist https://good.example.com >actual &&
+	test_must_be_empty actual &&
+
+	echo true >expect &&
+	git config --bool --get-urlmatch http.SSLverify https://example.com >actual &&
+	test_cmp expect actual &&
+
+	echo true >expect &&
+	git config --bool --get-urlmatch http.SSLverify https://good-example.com >actual &&
+	test_cmp expect actual &&
+
+	echo true >expect &&
+	git config --bool --get-urlmatch http.sslverify https://deep.nested.example.com >actual &&
+	test_cmp expect actual &&
+
+	echo false >expect &&
+	git config --bool --get-urlmatch http.sslverify https://good.example.com >actual &&
+	test_cmp expect actual &&
+
+	{
+		echo http.cookiefile /tmp/cookie.txt &&
+		echo http.sslverify false
+	} >expect &&
+	git config --get-urlmatch HTTP https://good.example.com >actual &&
+	test_cmp expect actual &&
+
+	echo http.sslverify >expect &&
+	git config --get-urlmatch HTTP https://more.example.com.au >actual &&
 	test_cmp expect actual
 '
 
diff --git a/urlmatch.c b/urlmatch.c
index f79887825..6c12f1a48 100644
--- a/urlmatch.c
+++ b/urlmatch.c
@@ -63,6 +63,49 @@ static int append_normalized_escapes(struct strbuf *buf,
 	return 1;
 }
 
+static const char *end_of_token(const char *s, int c, size_t n)
+{
+	const char *next = memchr(s, c, n);
+	if (!next)
+		next = s + n;
+	return next;
+}
+
+static int match_host(const struct url_info *url_info,
+		      const struct url_info *pattern_info)
+{
+	const char *url = url_info->url + url_info->host_off;
+	const char *pat = pattern_info->url + pattern_info->host_off;
+	int url_len = url_info->host_len;
+	int pat_len = pattern_info->host_len;
+
+	while (url_len && pat_len) {
+		const char *url_next = end_of_token(url, '.', url_len);
+		const char *pat_next = end_of_token(pat, '.', pat_len);
+
+		if (pat_next == pat + 1 && pat[0] == '*')
+			/* wildcard matches anything */
+			;
+		else if ((pat_next - pat) == (url_next - url) &&
+			 !memcmp(url, pat, url_next - url))
+			/* the components are the same */
+			;
+		else
+			return 0; /* found an unmatch */
+
+		if (url_next < url + url_len)
+			url_next++;
+		url_len -= url_next - url;
+		url = url_next;
+		if (pat_next < pat + pat_len)
+			pat_next++;
+		pat_len -= pat_next - pat;
+		pat = pat_next;
+	}
+
+	return (!url_len && !pat_len);
+}
+
 static char *url_normalize_1(const char *url, struct url_info *out_info, char allow_globs)
 {
 	/*
@@ -467,9 +510,7 @@ static int match_urls(const struct url_info *url,
 	}
 
 	/* check the host */
-	if (url_prefix->host_len != url->host_len ||
-	    strncmp(url->url + url->host_off,
-		    url_prefix->url + url_prefix->host_off, url->host_len))
+	if (!match_host(url, url_prefix))
 		return 0; /* host names do not match */
 
 	/* check the port */
@@ -528,7 +569,7 @@ int urlmatch_config_entry(const char *var, const char *value, void *cb)
 		struct url_info norm_info;
 
 		config_url = xmemdupz(key, dot - key);
-		norm_url = url_normalize(config_url, &norm_info);
+		norm_url = url_normalize_1(config_url, &norm_info, 1);
 		free(config_url);
 		if (!norm_url)
 			return 0;
-- 
2.11.0


^ permalink raw reply related

* Re: [PATCH v4 0/5] urlmatch: allow wildcard-based matches
From: Patrick Steinhardt @ 2017-01-31  8:26 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Patrick Steinhardt, Philip Oakley
In-Reply-To: <xmqqfuk0tb3j.fsf@gitster.mtv.corp.google.com>

[-- Attachment #1: Type: text/plain, Size: 1373 bytes --]

On Mon, Jan 30, 2017 at 02:52:00PM -0800, Junio C Hamano wrote:
> Junio C Hamano <gitster@pobox.com> writes:
> 
> > Patrick Steinhardt <patrick.steinhardt@elego.de> writes:
> >
> >>  - I realized that with my patches, "ranking" of URLs was broken.
> >>    Previously, we've always taken the longest matching URL. As
> >>    previously, only the user and path could actually differ, only
> >>    these two components were used for the comparison. I've
> >>    changed this now to also include the host part so that URLs
> >>    with a longer host will take precedence. This resulted in a
> >>    the patch 4.
> >
> > Good thinking.  I was wondering about this, too.
> >
> > Thanks.  Will read it through and replace.
> 
> Ugh.  When applied on top of 4e59582ff7 ("Seventh batch for 2.12",
> 2017-01-23), Git fails its self-test in t5551 #31 (I do not run with
> EXPENSIVE so some tests liks #30 are skipped, if it matters).
> 

Thanks for letting me know. I didn't have any errors on my other
machine, but was actually able to reproduce test failures on this
one.

Embarassingly, I forgot to zero-initialize the `struct
urlmatch_item`, leading to undefined behavior when searching for
configuration keys without a dot inside. This is the case for
nearly all keys, except for example the ones with a URL inside.

Will re-send a fixed version.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply

* Re: gitconfig get out of sync with submodule entries on branch switch
From: Benjamin Schindler @ 2017-01-31  7:46 UTC (permalink / raw)
  To: Brandon Williams; +Cc: git, sbeller
In-Reply-To: <20170130175118.GA35626@google.com>

Hi Brandon

I did try your suggestion, so basically:

git checkout branch
git submodule init
git submodule update

Unfortunately, I still have two entries in my git config this way. It 
seems that git submodule update only considers submodules listed in 
.gitmodules.

The background of my question is this - we have a jenkins farm which 
needs to switch branches continuously in an automated fashion and this 
needs to work even in when submodules are around. I did however, just 
now, find a reliable way to switch a branch, keeping the gitconfig in sync:
The basic workflow for switching a branch is:
git submodule deinit .
git checkout branch
git submodule init
git submodule update

Because the .git folder of the submodules are not within the submodule 
directories, this is, while still quite heavy-handed, reasonably fast 
and robust. At least it is better than deleting the entire repository 
every time a branch switch is issued.

Regards

Benjamin Schindler

On 30.01.2017 18:51, Brandon Williams wrote:
> On 01/30, Benjamin Schindler wrote:
>> Hi
>>
>> Consider the following usecase: I have the master branch where I
>> have a submodule A. I create a branch where I rename the submodule
>> to be in the directory B. After doing all of this, everything looks
>> good.
>> Now, I switch back to master. The first oddity is, that it fails to
>> remove the folder B because there are still files in there:
>>
>> bschindler@metis ~/Projects/submodule_test (testbranch) $ git
>> checkout master
>> warning: unable to rmdir other_submodule: Directory not empty
>> Switched to branch 'master'
>>
>> Git submodule deinit on B fails because the submodule is not known
>> to git anymore (after all, the folder B exists only in the other
>> branch). I can easily just remove the folder B from disk and
>> initialize the submodule A again, so all seems good.
>>
>> However, what is not good is that the submodule b is still known in
>> .git/config. This is in particular a problem for us, because I know
>> a number of tools which use git config to retrieve the submodule
>> list. Is it therefore a bug that upon branch switch, the submodule
>> gets deregistered, but its entry in .git/config remains?
>>
>> thanks a lot
>> Benjamin Schindler
>>
>> P.s. I did not subscribe to the mailing list, please add me at least
>> do CC. Thanks
> submodules and checkout don't really play nicely with each other at the
> moment.  Stefan (cc'd) is currently working on a patch series to improve
> the behavior of checkout with submodules.  Currently, if you want to
> ensure you have a good working state after a checkout you should run
> `git submodule update` to update all of the submoules.  As far as your
> submodule still being listed in the config, that should be expected
> given the scenario you described.
>
> If I'm understanding you correctly, A and B are both the same submodule
> just renamed on a different branch.  The moment you add a submoule to a
> repository it is given a name which is fixed.  Typically this is the
> path from the root of the repository.  The thing is, since you are able
> to freely move a submodule, its path can change.  To account for this
> there is the .gitmodules file which allows you to do a lookup from
> submodule name to the path at which it exists (or vice versa).  The
> submodules that are stored in .git/config are those which are
> 'initialize' or rather the submodules in which you are interested in and
> will be updated by `git submodule update`.  So given your scenario you
> should only have a single submodule in .git/config and the .gitmodules
> file should have a single entry with a differing path for each branch.
>
> Hopefully this gives you a bit more information to work with.  Since
> Stefan has been working with this more recently than me he may have some
> more input.
>


^ permalink raw reply

* Git clonebundles
From: Stefan Saasen @ 2017-01-31  7:00 UTC (permalink / raw)
  To: Git Mailing List

Hi all,

Bitbucket recently added support for Mercurial’s clonebundle extension
(http://gregoryszorc.com/blog/2015/10/22/cloning-improvements-in-mercurial-3.6/).
Mercurial’s clone bundles allow the Mercurial client to seed a repository using
a bundle file instead of dynamically generating a bundle for the client.

Mercurial clonebundles?
~~~~~~~~~~~~~~~~~~~~~~~

With Mercurial clonebundles the high level clone sequence looks like this:

1. The command "hg clone URL"  attempts to clone the repository at URL.
2. If a bundle file exists for the repository, the existence of the file
`clonebundles.manifest` causes the server to advertise the `clonebundle`
capability (capabilities lookup is the first command the client issues).
3. In the above case the client then executes the command "clonebundles".
4. The manifest file will be returned.
5. The client then selects a bundle file to download from the list of URLs
advertised in the manifests file, to seed the repository.
6. To update the repository the last step involves fetching the latest changes.

Why is this useful?
~~~~~~~~~~~~~~~~~~~

The fact that clone bundles can be distributed as static files enables us to
use static file servers for bundle distribution. Users have also reported
latency improvements for clone operations of popular Mercurial repositories.
Additionally this significantly reduces the resource usage of clone operations,
as clone operations are reduced to simpler fetches to resolve the delta between
the current repository and the downloaded bundle state.

clonebundles for git?
~~~~~~~~~~~~~~~~~~~~~

We recently looked into how this concept could be translated to git. This is
not a new idea and has been discussed before (more on that later) but our
success with the Mercurial clonebundle rollout prompted us to revisit this
topic.

We believe that bringing a similar concept to git could have the following
benefits:

* Improved clone times for users that clone large git repositories, especially
  if bundle file distribution leverages global CDNs.
* Improved scalability of git for managing large popular repositories.
  Offloading a significant portion of the clone resource usage to CDNs or static
  file hosts.

Our current proof-of-concept to explore this space, closely follows
the approach from Mercurial outlined above.

* An `/info/bundle` path returns a bundle manifest (over HTTP)
* The bundle manifest contains a simple list of URLs with some additional meta
  data that allows the client to select a suitable bundle download URL
* The bundle download URL points to a bundle file generated using `git bundle
  create` including all the relevant refs as a self contained repository seed.
* The client probes the target URL with a `GET` request to $URL/info/bundle and
  downloads the bundle file if present.
* The repository will be created based on the downloaded bundle (downloading a
  static file allows resumable downloads or parallel downloads of chunks if the
  file/web server supports range requests).
* A `git fetch` and the appropriate checkout then updates the "cloned"
  repository to match the latest upstream state.

The proof-of-concept was built as an external binary `git-clone2` that
mimics the behaviour of the `git clone` command, so unfortunately I
can't provide any patches to git to demonstrate the behaviour.

Ultimately our proof-of-concept is built around a few core ideas:

* Re-use the existing bundle format as a single-file, self-contained
repository representation.
* Introduce a bundle manifest (accessible at `$URL/info/bundle`) that allows
  the client to resolve a suitable bundle download URL.
* Teach the `git clone` command to accept and prefer seeding a repository using
  a static bundle file that is advertised in a bundle manifest.
* Re-use as much as possible of the existing commands and in particular the
  `git bundle` machinery to seed the repository and to create the static bundle
  file.
* We accept additional storage requirements for the bundle files in addition to
  the actual repository content in pack-files or loose objects.
Hosting providers
  or system administrators are free to decide how many bundles to advertise and
  how frequently the bundles are updated.
* It targets the "seed from a bundle file" use case, with resumable clones just
  being a potential side-effect.

Some of the problems that need to be solved with an approach like this are:

* Bundle advertisement/bundle negotiation: We considered advertising a
  new capability "clonebundle" as part of the rev advertisement
capabilities list.
  This would allow clients that support clonebundles to abort the clone attempt
  and resolve a suitable bundle URL from a bundle manifest at `$URL/info/bundle`
  instead. For HTTP this would amount to an early termination when
retrieving the
  ref-advertisement.
  Note: We didn't pursue this for our proof-of-concept so we didn't
explore whether
  this is feasible.
* Uniform approach for the supported transports: Our proof-of-concept
only supports HTTP as
  a transport. Ideally the clonebundle capability could be supported by all
  available transports (of which at least ssh would be highly desirable).
* Bundle manifest and bundle download: It is unclear whose responsibility it is
  to generate the bundle manifest with the bundle download URLs. Most likely the
  bundle files will be served using a webserver or CDN, so download
URL generation
  should not be a core git responsibility. For hosting purpose we envision that
  the bundle manifest might contain dynamic download URLs with personalised
  access tokens with expiry.
* Bundle generation: Similar to the above it is unclear how bundle
generation is handled.
  For hosting purposes, the operator would likely want to influence
when and how bundles are generated.

Prior art
~~~~~~~~~

Our proof-of-concept is built on top of ideas that have been
circulating for a while. We are aware of a number of proposed changes
in this space:

* Jeff King's work on network bundles:
https://github.com/peff/git/commit/17e2409df37edd0c49ef7d35f47a7695f9608900
* Nguyễn Thái Ngọc Duy's work on "[PATCH 0/8] Resumable clone
revisited, proof of concept":
https://www.spinics.net/lists/git/msg267260.html
* Resumable clone work by Kevin Wern:
https://public-inbox.org/git/1473984742-12516-1-git-send-email-kevin.m.wern@gmail.com/

Whilst the above mentioned proposals/proposed changes are in a similar
space, I would be interest to understand whether there is any
consensus on the general idea of supporting static bundle files as a
mechanism to seed a repository?
I would also appreciate any pointers to other discussions in this area.

Best regards,
Stefan Saasen & Erik van Zijst; Atlassian Bitbucket

^ permalink raw reply

* Re: [PATCH 1/4] git-prompt.sh: add submodule indicator
From: Junio C Hamano @ 2017-01-31  3:11 UTC (permalink / raw)
  To: Benjamin Fuchs; +Cc: git, szeder.dev, sbeller, sandals, ville.skytta
In-Reply-To: <c872072a-4754-051d-81e7-1e2166560733@benjaminfuchs.de>

Benjamin Fuchs <email@benjaminfuchs.de> writes:

> In [2/4] I got rid of the loop by feedback of Gábor.
> Sorry if my patch wasn't well formed.

While it might be the way other development communities work, in the
Git development community, we do not work that way when presenting
your second and subsequent attempt to the community.

Having the initial draft from the original developers that records
the bugs and misdesigns in an earlier parts of a series and separate
patches that record how the problems were fixed to arrive at the
final shape of the codebase might be interesting to the original
developers, and they may even find such a history valuable, but in
the context of the history that will be recorded in the official
tree of the project for eternity, that just adds useless noise.

Instead of keeping the original, in which problems were pointed out,
and adding later commits to correct them incrementally, a patch is
"rerolled".  That is, you are expected to learn from the review
comments and pretend as if you did the work from scratch and you
already possessed the wisdom lent by the reviewers when you started
your work.  In the "rerolled" patches you send, you pretend as if
you worked without making mistakes you made in the earlier rounds at
all, producing (more) perfect patches from the beginning.  

In reality, you may locally be using Git tools like rebase,
cherry-pick and "add -p" while preparing these "rerolled" rounds of
patches, but the name of the game is to hide that effort from the
public and pretend to be a perfect human, recording the result of
exercising your best ability in the official history ;-).

So this is OK:

    0/3: I want to improve X, and for that I identified that I need
    A, B and C done.  A or B alone is already an improvement, but A
    and B together makes it even more useful, and implementation of
    C requires patches to do A and B.

    1/3: do A
    2/3: do B
    3/3: do C, building on A and B

This is not:

    0/3: I want to improve X, and for that I need to do C.
    1/3: I couldn't do C, and I did A instead.
    2/3: A was totally useless. I fix it to do B.
    3/3: B is not very useful, either. I fix it to do C.

^ permalink raw reply

* Re: difflame
From: Edmundo Carmona Antoranz @ 2017-01-31  2:37 UTC (permalink / raw)
  To: Jeff King; +Cc: Junio C Hamano, Git List
In-Reply-To: <CAOc6etZZSgeBRwQA4C4Ag5A48W8tAAArdOaaKxkTOVvVGi+EpQ@mail.gmail.com>

Maybe a little work on blame to get the actual revision where some
lines were "deleted"?

Something like git blame --blame-deletion that could be based on --reverse?

On Mon, Jan 30, 2017 at 7:35 PM, Edmundo Carmona Antoranz
<eantoranz@gmail.com> wrote:
> I'm thinking of something like this:
>
> Say I just discovered a problem in a file.... I want to see who worked
> on it since some revision that I know is working fine (or even
> something as generic as HEAD~100..). It could be a number of people
> with different revisions. I would diff to see what changed.... and
> blame the added lines (blame reverse to try to get as close as
> possible with a single command in case I want to see what happened
> with something that was deleted). If I could get blame information of
> added/deleted lines in a single run, that would help a lot.
>
> Lo and behold: difflame.
>
>
>
> On Mon, Jan 30, 2017 at 5:26 PM, Jeff King <peff@peff.net> wrote:
>> On Mon, Jan 30, 2017 at 01:08:41PM -0800, Junio C Hamano wrote:
>>
>>> Jeff King <peff@peff.net> writes:
>>>
>>> > On Tue, Jan 17, 2017 at 11:24:02PM -0600, Edmundo Carmona Antoranz wrote:
>>> >
>>> >> For a very long time I had wanted to get the output of diff to include
>>> >> blame information as well (to see when something was added/removed).
>>> >
>>> > This is something I've wanted, too. The trickiest part, though, is
>>> > blaming deletions, because git-blame only tracks the origin of content,
>>> > not the origin of a change.
>>>
>>> Hmph, this is a comment without looking at what difflame does
>>> internally, so you can ignore me if I am misunderstood what problem
>>> you are pointing out, but I am not sure how "tracks the origin of
>>> content" could be a problem.
>>>
>>> If output from "git show" says this:
>>>
>>>       --- a/file
>>>       +++ b/file
>>>       @@ -1,5 +1,6 @@
>>>        a
>>>        b
>>>       -c
>>>       +C
>>>       +D
>>>        d
>>>        e
>>>
>>> in order to annotate lines 'a', 'b', 'd', and 'e' for their origin,
>>> you would run 'blame' on the commit the above output was taken from
>>> (or its parent---they are in the context so either would be OK).
>>>
>>> You know where 'C' and 'D' came from already.  It's the commit you
>>> are feeding "git show".
>>
>> I think the point (or at least as I understand it) is that the diff is
>> not "git show" for a particular commit. It could be part of a much
>> larger diff that covers many commits.
>>
>> As a non-hypothetical instance, I have a fork of git.git that has
>> various enhancements. I want to feed those enhancements upstream. I need
>> to know which commits should be cherry-picked to get those various
>> enhancements.
>>
>> Looking at "log origin..fork" tells me too many commits, because it
>> includes ones which aren't useful anymore. Either because they already
>> went upstream, or because they were cherry-picked to the fork and their
>> upstream counterparts merged (or even equivalent commits made upstream
>> that obsoleted the features).
>>
>> Looking at "git diff origin fork" tells me what the actual differences
>> are, but it doesn't show me which commits are responsible for them.
>>
>> I can "git blame" each individual line of the diff (starting with "fork"
>> as the tip), but that doesn't work for lines that no longer exist (i.e.,
>> when the interesting change is a deletion).
>>
>>> In order to run blame to find where 'c' came from, you need to start
>>> at the _parent_ of the commit the above output came from, and the
>>> hunk header shows which line range to find the final 'c'.
>>
>> So perhaps that explains my comment more. "blame" is not good for
>> finding which commit took away a line. I've tried using "blame
>> --reverse", but it shows you the parent of the commit you are looking
>> for, which is slightly annoying. :)
>>
>> "git log -S" is probably a better tool for finding that.
>>
>> -Peff

^ permalink raw reply

* Re: [PATCH] blame: draft of line format
From: Edmundo Carmona Antoranz @ 2017-01-31  2:34 UTC (permalink / raw)
  To: Git List, Jeff King, pranit.bauva; +Cc: Edmundo Carmona Antoranz
In-Reply-To: <20170131022830.8538-1-eantoranz@gmail.com>

I'm basing in on the "pretty API" so that we don't have to reinvent
the wheel. I have already noticed that many of the formatting options
available for pretty are not working... I'm sure it would require some
work setting up the call to pretty api but the basic is laid out
there.

Let me know of your thoughts.

^ permalink raw reply

* [PATCH] blame: draft of line format
From: Edmundo Carmona Antoranz @ 2017-01-31  2:28 UTC (permalink / raw)
  To: git; +Cc: Edmundo Carmona Antoranz

---
 builtin/blame.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/builtin/blame.c b/builtin/blame.c
index 126b8c9e5..89c1a862d 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -52,6 +52,7 @@ static int xdl_opts;
 static int abbrev = -1;
 static int no_whole_file_rename;
 static int show_progress;
+static char *format_line;
 
 static struct date_mode blame_date_mode = { DATE_ISO8601 };
 static size_t blame_date_width;
@@ -1931,6 +1932,19 @@ static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent,
 		putchar('\n');
 }
 
+static void pretty_info(char* revid, struct blame_entry *ent, struct strbuf *rev_buffer)
+{
+	struct pretty_print_context ctx = {0};
+	struct rev_info rev;
+
+	struct strbuf format = STRBUF_INIT;
+	strbuf_addstr(&format, format_line);
+	ctx.fmt = CMIT_FMT_USERFORMAT;
+	get_commit_format(format.buf, &rev);
+	pretty_print_commit(&ctx, ent->suspect->commit, rev_buffer);
+	strbuf_release(&format);
+}
+
 static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
 {
 	int cnt;
@@ -1939,11 +1953,15 @@ static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
 	struct commit_info ci;
 	char hex[GIT_SHA1_HEXSZ + 1];
 	int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
+	struct strbuf line_revision_buf = STRBUF_INIT;
 
 	get_commit_info(suspect->commit, &ci, 1);
 	sha1_to_hex_r(hex, suspect->commit->object.oid.hash);
 
 	cp = nth_line(sb, ent->lno);
+
+	if (format_line)
+		pretty_info(hex, ent, &line_revision_buf);
 	for (cnt = 0; cnt < ent->num_lines; cnt++) {
 		char ch;
 		int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? GIT_SHA1_HEXSZ : abbrev;
@@ -1968,6 +1986,10 @@ static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
 			       format_time(ci.author_time, ci.author_tz.buf,
 					   show_raw_time),
 			       ent->lno + 1 + cnt);
+		} else if (format_line) {
+			printf("%s", line_revision_buf.buf);
+			printf(" %*d) ",
+			       max_digits, ent->lno + 1 + cnt);
 		} else {
 			if (opt & OUTPUT_SHOW_SCORE)
 				printf(" %*d %02d",
@@ -2007,6 +2029,7 @@ static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
 	if (sb->final_buf_size && cp[-1] != '\n')
 		putchar('\n');
 
+	strbuf_release(&line_revision_buf);
 	commit_info_destroy(&ci);
 }
 
@@ -2605,6 +2628,7 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
 
 		OPT_BIT(0, "minimal", &xdl_opts, N_("Spend extra cycles to find better match"), XDF_NEED_MINIMAL),
 		OPT_STRING('S', NULL, &revs_file, N_("file"), N_("Use revisions from <file> instead of calling git-rev-list")),
+		OPT_STRING(0, "format-line", &format_line, N_("format-line"), N_("Use pretty format for revisions")),
 		OPT_STRING(0, "contents", &contents_from, N_("file"), N_("Use <file>'s contents as the final image")),
 		{ OPTION_CALLBACK, 'C', NULL, &opt, N_("score"), N_("Find line copies within and across files"), PARSE_OPT_OPTARG, blame_copy_callback },
 		{ OPTION_CALLBACK, 'M', NULL, &opt, N_("score"), N_("Find line movements within and across files"), PARSE_OPT_OPTARG, blame_move_callback },
-- 
2.11.0.rc1


^ permalink raw reply related

* Re: difflame
From: Edmundo Carmona Antoranz @ 2017-01-31  1:35 UTC (permalink / raw)
  To: Jeff King; +Cc: Junio C Hamano, Git List
In-Reply-To: <20170130232559.krdxkt4dq4lfv4rj@sigill.intra.peff.net>

I'm thinking of something like this:

Say I just discovered a problem in a file.... I want to see who worked
on it since some revision that I know is working fine (or even
something as generic as HEAD~100..). It could be a number of people
with different revisions. I would diff to see what changed.... and
blame the added lines (blame reverse to try to get as close as
possible with a single command in case I want to see what happened
with something that was deleted). If I could get blame information of
added/deleted lines in a single run, that would help a lot.

Lo and behold: difflame.



On Mon, Jan 30, 2017 at 5:26 PM, Jeff King <peff@peff.net> wrote:
> On Mon, Jan 30, 2017 at 01:08:41PM -0800, Junio C Hamano wrote:
>
>> Jeff King <peff@peff.net> writes:
>>
>> > On Tue, Jan 17, 2017 at 11:24:02PM -0600, Edmundo Carmona Antoranz wrote:
>> >
>> >> For a very long time I had wanted to get the output of diff to include
>> >> blame information as well (to see when something was added/removed).
>> >
>> > This is something I've wanted, too. The trickiest part, though, is
>> > blaming deletions, because git-blame only tracks the origin of content,
>> > not the origin of a change.
>>
>> Hmph, this is a comment without looking at what difflame does
>> internally, so you can ignore me if I am misunderstood what problem
>> you are pointing out, but I am not sure how "tracks the origin of
>> content" could be a problem.
>>
>> If output from "git show" says this:
>>
>>       --- a/file
>>       +++ b/file
>>       @@ -1,5 +1,6 @@
>>        a
>>        b
>>       -c
>>       +C
>>       +D
>>        d
>>        e
>>
>> in order to annotate lines 'a', 'b', 'd', and 'e' for their origin,
>> you would run 'blame' on the commit the above output was taken from
>> (or its parent---they are in the context so either would be OK).
>>
>> You know where 'C' and 'D' came from already.  It's the commit you
>> are feeding "git show".
>
> I think the point (or at least as I understand it) is that the diff is
> not "git show" for a particular commit. It could be part of a much
> larger diff that covers many commits.
>
> As a non-hypothetical instance, I have a fork of git.git that has
> various enhancements. I want to feed those enhancements upstream. I need
> to know which commits should be cherry-picked to get those various
> enhancements.
>
> Looking at "log origin..fork" tells me too many commits, because it
> includes ones which aren't useful anymore. Either because they already
> went upstream, or because they were cherry-picked to the fork and their
> upstream counterparts merged (or even equivalent commits made upstream
> that obsoleted the features).
>
> Looking at "git diff origin fork" tells me what the actual differences
> are, but it doesn't show me which commits are responsible for them.
>
> I can "git blame" each individual line of the diff (starting with "fork"
> as the tip), but that doesn't work for lines that no longer exist (i.e.,
> when the interesting change is a deletion).
>
>> In order to run blame to find where 'c' came from, you need to start
>> at the _parent_ of the commit the above output came from, and the
>> hunk header shows which line range to find the final 'c'.
>
> So perhaps that explains my comment more. "blame" is not good for
> finding which commit took away a line. I've tried using "blame
> --reverse", but it shows you the parent of the commit you are looking
> for, which is slightly annoying. :)
>
> "git log -S" is probably a better tool for finding that.
>
> -Peff

^ permalink raw reply

* Re: [RFC] Proof of concept: Support multiple authors
From: Cornelius Schumacher @ 2017-01-31  0:54 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Christian Couder, Josh Triplett
In-Reply-To: <xmqqinowuvd7.fsf@gitster.mtv.corp.google.com>

On Monday 30 January 2017 12:48:52 Junio C Hamano wrote:
> 
>   https://public-inbox.org/git/?q=gmane:83880
>   https://public-inbox.org/git/?q=gmane:146223
>   https://public-inbox.org/git/?q=gmane:146886

Thanks for putting the links together. That's very useful as a reference.

> The older discussions already cited the cost to update both in-tree
> and out-of-tree tools not to barf when they see such a commit object
> and one of the reason why we would not want to do this, and Ted Ts'o
> gave us another excellent reason why not to do multiple author
> header lines in a commit object header, i.e. "How often is that all
> of the authors are completely equal?"

Just to give a bit of context: In the pair programming environment where I 
work we usually use non-personalized workstations and switch the keyboard 
between the two people working together quite frequently, sometimes every few 
minutes, or even within writing a commit message. There the person pressing 
the return button on the commit really does not have a special role. In this 
style of working I think it feels like the fairest and most practical 
assumption to treat all authors as equal.

> My advice to those who want to record credit to multiple authors is
> to treat the commit author line as recording the primary contact
> when there is a question on the commit and nothing else.  Anything
> with richer semantics is better done in the trailer by enriching the
> support of trailer lines with interpret-trailers framework.

Thanks for the advice. I think I will explore this direction a little bit 
further and see how handling a situation of multiple authors could be better 
done in this framework.

-- 
Cornelius Schumacher <schumacher@kde.org>

^ permalink raw reply

* Re: [ANNOUNCE] Git Merge Contributor Summit topic planning
From: Jeff King @ 2017-01-31  0:59 UTC (permalink / raw)
  To: git
In-Reply-To: <20170131004804.p5sule4rh2xrgtwe@sigill.intra.peff.net>

On Tue, Jan 31, 2017 at 01:48:05AM +0100, Jeff King wrote:

> The list of topics is totally open. If you're coming and have something
> you'd like to present or discuss, then propose it here. If you're _not_
> coming, you may still chime in with input on topics, but please don't
> suggest a topic unless somebody who is there will agree to lead the
> discussion.

Here are the two topics I plan on bringing:

  - Git / Software Freedom Conservancy yearly report. I'll plan to give
    a rundown of the past year's activities and financials, along with
    some open questions that could benefit from community input.

  - The git-scm.com website: who runs that thing, anyway? An overview
    of the site, how it's managed, and what it needs.

I plan to send out detailed emails on both topics to the list on
Wednesday, and then follow-up with a summary of any useful in-person
discussion (since obviously not everybody will be at the summit).

I'd encourage anybody with a topic to present to consider doing
something similar.

-Peff

^ permalink raw reply

* [ANNOUNCE] Git Merge Contributor Summit topic planning
From: Jeff King @ 2017-01-31  0:48 UTC (permalink / raw)
  To: git

The Contributor Summit is only a few days away; I'd like to work out a
few bits of logistics ahead of time.

We're up to 26 attendees. The room layout will probably be three big
round-tables, with a central projector. We should be able to have
everybody pay attention to a single speaker, or break into 3 separate
conversations.

The list of topics is totally open. If you're coming and have something
you'd like to present or discuss, then propose it here. If you're _not_
coming, you may still chime in with input on topics, but please don't
suggest a topic unless somebody who is there will agree to lead the
discussion.

We'll write the final list on a whiteboard on Thursday morning, vote on
what looks good, and then work our way down the list.  Topics don't
_have_ to be proposed here ahead of time, but I'd encourage people to do
so as it leaves time for others to consider them and possibly do any
background thinking or research.

The rough schedule is:

  0830 to 0930 - registration, breakfast, milling about and socializing;
                 be aware that Git Merge Workshop attendees will be
		 doing the same things in the same space, so show up
		 with enough time to navigate a bit of a crowd.

  0930 to 1215 - we retire to our Fortress of Solitude to talk about
                 Very Important Git Things

  1215 to 1330 - lunch

  1330 to 1500 - Very Important Git Things, part deux. The end time
		 isn't a hard deadline, so we can go as late as 1600 if
		 the discussion keeps up.

There's no organized dinner planned. At our size, I think it's probably
most productive to let people form small groups for dinner if they want
to. But if somebody is really interested in trying to do a big group
reservation, they are welcome to try to organize it.

-Peff

^ permalink raw reply

* Re: [PATCH] Completion: Add support for --submodule=diff
From: Jacob Keller @ 2017-01-31  0:10 UTC (permalink / raw)
  To: peterjclaw; +Cc: Git mailing list, Junio C Hamano, SZEDER Gábor
In-Reply-To: <20161204144127.28452-1-peterjclaw@gmail.com>

On Sun, Dec 4, 2016 at 6:41 AM,  <peterjclaw@gmail.com> wrote:
> From: Peter Law <PeterJCLaw@gmail.com>
>
> Teach git-completion.bash about the 'diff' option to 'git diff
> --submodule=', which was added in Git 2.11.
>
> Signed-off-by: Peter Law <PeterJCLaw@gmail.com>
> ---
>  contrib/completion/git-completion.bash | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
> index 21016bf8d..ab11e7371 100644
> --- a/contrib/completion/git-completion.bash
> +++ b/contrib/completion/git-completion.bash
> @@ -1206,7 +1206,7 @@ _git_describe ()
>
>  __git_diff_algorithms="myers minimal patience histogram"
>
> -__git_diff_submodule_formats="log short"
> +__git_diff_submodule_formats="diff log short"
>
>  __git_diff_common_options="--stat --numstat --shortstat --summary
>                         --patch-with-stat --name-only --name-status --color
> --
> 2.11.0
>

Yep, this looks good to me.

Thanks,
Jake

^ permalink raw reply

* Re: [PATCH 1/4] git-prompt.sh: add submodule indicator
From: Benjamin Fuchs @ 2017-01-31  0:10 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, szeder.dev, sbeller, sandals, ville.skytta
In-Reply-To: <xmqqr33krtww.fsf@gitster.mtv.corp.google.com>

Hi Junio,
thanks for your reply. Some of your suggestions are covered by my rework 
in Patch [2/4].
In [2/4] I got rid of the loop by feedback of Gábor.
Sorry if my patch wasn't well formed.

On 31.01.2017 00:48, Junio C Hamano wrote:
> Benjamin Fuchs <email@benjaminfuchs.de> writes:
>
>> I expirienced that working with submodules can be confusing. This indicator
>> will make you notice very easy when you switch into a submodule.
> I am not quite sure what the above wants to say.  If you work on two
> projects, A and B, then having something that quickly reminds you
> which one you are in is beneficial and people often do so by having
> the current directory name in the prompt.
>
> The log message needs to explain why working on a submodule C of a
> project A is any more special, i.e. why are the existing ways the
> users are using to remind them between A and B cannot be used for C.
A submodule can be anywhere in your parent git repository. And while 
walking through the
parent repository it is sometime a good reminder to know when to just 
entered a submodule.
Because now all git command will work on the submodule. I guess this 
indicator is a good way
to show that.
And with patch [2/4] (and fixed it with [3/4]) I also introduced the 
"dirty" indicator for the submodule, which can
show you, that your current checked out state in the submodule is not 
the one committed
to the parent.

>
>> diff --git a/contrib/completion/git-prompt.sh b/contrib/completion/git-prompt.sh
>> index 97eacd7..4c82e7f 100644
>> --- a/contrib/completion/git-prompt.sh
>> +++ b/contrib/completion/git-prompt.sh
>> @@ -93,6 +93,10 @@
>>   # directory is set up to be ignored by git, then set
>>   # GIT_PS1_HIDE_IF_PWD_IGNORED to a nonempty value. Override this on the
>>   # repository level by setting bash.hideIfPwdIgnored to "false".
>> +#
>> +# If you would like __git_ps1 to indicate that you are in a submodule,
>> +# set GIT_PS1_SHOWSUBMODULE. In this case a "sub:" will be added before
>> +# the branch name.
>>   
>>   # check whether printf supports -v
>>   __git_printf_supports_v=
>> @@ -284,6 +288,32 @@ __git_eread ()
>>   	test -r "$f" && read "$@" <"$f"
>>   }
>>   
>> +# __git_is_submodule
>> +# Based on:
>> +# http://stackoverflow.com/questions/7359204/git-command-line-know-if-in-submodule
>> +__git_is_submodule ()
> It seems like this function is checking if you are "IN" submodule,
> not "IS" submodule.  A misnamed function?
Reworked in Patch [2/4]
>> +{
>> +	local git_dir parent_git module_name path
>> +	# Find the root of this git repo, then check if its parent dir is also a repo
>> +	git_dir="$(git rev-parse --show-toplevel)"
>> +	module_name="$(basename "$git_dir")"
> This does not give "module_name" in the sense the word is used in
> the submodule subsystem.  If this variable is useful, call that
> with "path" in its name (I do not think it alone is useful at all).
Reworked in Patch [2/4]
>
>> +	parent_git="$(cd "$git_dir/.." && git rev-parse --show-toplevel 2> /dev/null)"
>> +	if [[ -n $parent_git ]]; then
>> +		# List all the submodule paths for the parent repo
>> +		while read path
>> +		do
>> +			if [[ "$path" != "$module_name" ]]; then continue; fi
>> +			if [[ -d "$git_dir/../$path" ]];    then return 0; fi
>> +		done < <(cd $parent_git && git submodule --quiet foreach 'echo $path' 2> /dev/null)
> Instead of doing this "loop over submodules and just get true or
> false", it may make a lot more sense to find the module name and
> report that.  That would allow you to have the actual submodule
> name, not a generic "sub:" which does not help the users to tell
> which one of 47 submodules they have in their top-level project
> they are currently in.
>
> If your projects are layed out like so
>
> 	/home/bf/projects/A/
> 	/home/bf/projects/A/lib/B/ -- submodule
> 	/home/bf/projects/A/Doc/ -- another submodule
> 	/home/bf/projects/A/Doc/technical -- a subdirectory of Doc submodule
>
> and you are in /home/bf/projects/A/Doc/technical/ subdirectory, your
> $git_dir variable (which is grossly misnamed, too; call it "top"
> perhaps?) would be "/home/bf/projects/A/Doc" and then your
> $parent_git variable (again, that is misnamed; call it
> "parent_top"?) would be "/home/bf/projects/A".  The submodule path
> to the submodule you are currently in is "Doc" (you learn it as the
> difference between these two).
>
> You can ask the top-level project the name of the submodule that is
> currently at "Doc" with "submodule--helper name".  Unless the project
> has moved it since it first added the submodule, the module name may
> also be "Doc", but if it were moved, it may be different.
>
> And that "module name" is a more useful thing than a hardcoded
> string "sub:" to use in the prompt, I would think.
Reworked in Patch [2/4].


^ permalink raw reply

* Re: [PATCH 1/4] git-prompt.sh: add submodule indicator
From: Junio C Hamano @ 2017-01-30 23:48 UTC (permalink / raw)
  To: Benjamin Fuchs; +Cc: git, szeder.dev, sbeller, sandals, ville.skytta
In-Reply-To: <1485809065-11978-2-git-send-email-email@benjaminfuchs.de>

Benjamin Fuchs <email@benjaminfuchs.de> writes:

> I expirienced that working with submodules can be confusing. This indicator
> will make you notice very easy when you switch into a submodule.

I am not quite sure what the above wants to say.  If you work on two
projects, A and B, then having something that quickly reminds you
which one you are in is beneficial and people often do so by having
the current directory name in the prompt.  

The log message needs to explain why working on a submodule C of a
project A is any more special, i.e. why are the existing ways the
users are using to remind them between A and B cannot be used for C.

> diff --git a/contrib/completion/git-prompt.sh b/contrib/completion/git-prompt.sh
> index 97eacd7..4c82e7f 100644
> --- a/contrib/completion/git-prompt.sh
> +++ b/contrib/completion/git-prompt.sh
> @@ -93,6 +93,10 @@
>  # directory is set up to be ignored by git, then set
>  # GIT_PS1_HIDE_IF_PWD_IGNORED to a nonempty value. Override this on the
>  # repository level by setting bash.hideIfPwdIgnored to "false".
> +#
> +# If you would like __git_ps1 to indicate that you are in a submodule,
> +# set GIT_PS1_SHOWSUBMODULE. In this case a "sub:" will be added before
> +# the branch name.
>  
>  # check whether printf supports -v
>  __git_printf_supports_v=
> @@ -284,6 +288,32 @@ __git_eread ()
>  	test -r "$f" && read "$@" <"$f"
>  }
>  
> +# __git_is_submodule
> +# Based on:
> +# http://stackoverflow.com/questions/7359204/git-command-line-know-if-in-submodule
> +__git_is_submodule ()

It seems like this function is checking if you are "IN" submodule,
not "IS" submodule.  A misnamed function?

> +{
> +	local git_dir parent_git module_name path
> +	# Find the root of this git repo, then check if its parent dir is also a repo
> +	git_dir="$(git rev-parse --show-toplevel)"
> +	module_name="$(basename "$git_dir")"

This does not give "module_name" in the sense the word is used in
the submodule subsystem.  If this variable is useful, call that
with "path" in its name (I do not think it alone is useful at all).

> +	parent_git="$(cd "$git_dir/.." && git rev-parse --show-toplevel 2> /dev/null)"
> +	if [[ -n $parent_git ]]; then
> +		# List all the submodule paths for the parent repo
> +		while read path
> +		do
> +			if [[ "$path" != "$module_name" ]]; then continue; fi
> +			if [[ -d "$git_dir/../$path" ]];    then return 0; fi
> +		done < <(cd $parent_git && git submodule --quiet foreach 'echo $path' 2> /dev/null)

Instead of doing this "loop over submodules and just get true or
false", it may make a lot more sense to find the module name and
report that.  That would allow you to have the actual submodule
name, not a generic "sub:" which does not help the users to tell
which one of 47 submodules they have in their top-level project
they are currently in.

If your projects are layed out like so

	/home/bf/projects/A/
	/home/bf/projects/A/lib/B/ -- submodule
	/home/bf/projects/A/Doc/ -- another submodule
	/home/bf/projects/A/Doc/technical -- a subdirectory of Doc submodule

and you are in /home/bf/projects/A/Doc/technical/ subdirectory, your
$git_dir variable (which is grossly misnamed, too; call it "top"
perhaps?) would be "/home/bf/projects/A/Doc" and then your
$parent_git variable (again, that is misnamed; call it
"parent_top"?) would be "/home/bf/projects/A".  The submodule path
to the submodule you are currently in is "Doc" (you learn it as the
difference between these two).

You can ask the top-level project the name of the submodule that is
currently at "Doc" with "submodule--helper name".  Unless the project
has moved it since it first added the submodule, the module name may
also be "Doc", but if it were moved, it may be different.

And that "module name" is a more useful thing than a hardcoded
string "sub:" to use in the prompt, I would think.

^ permalink raw reply

* Re: [PATCH v3 2/3] refs: add option core.logAllRefUpdates = always
From: Jeff King @ 2017-01-30 23:37 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: cornelius.weig, git
In-Reply-To: <xmqq37g0us5p.fsf@gitster.mtv.corp.google.com>

On Mon, Jan 30, 2017 at 01:58:10PM -0800, Junio C Hamano wrote:

> >     When writing the test for git-tag, I realized that the option
> >     --no-create-reflog to git-tag does not take precedence over
> >     logAllRefUpdate=always. IOW the setting cannot be overridden on the command
> >     line. Do you think this is a defect or would it not be desirable to have this
> >     feature anyway?
> 
> "--no-create-reflog" should override the configuration set to "true"
> or "always".  Also "--create-reflog" should override the
> configuration set to "false".
> 
> If the problem was inherited from the original code before your
> change (e.g. you set logAllRefUpdates to true and then did
> "update-ref --no-create-reflog refs/heads/foo".  Does the code
> before your change ignore the command lne option and create a reflog
> for the branch?), then it would be ideal to fix the bug before this
> series as a preparatory fix.  If the problem was introduced by this
> patch set, then we would need a fix not to introduce it ;-)

I hadn't thought about that. I think "git branch --no-create-reflog" has
the same problem in the existing code.

I suspect nobody cares much in practice. Even if you say "don't create a
reflog now", if you have core.logAllRefUpdates turned on, then it's
likely that some _other_ operation will create the reflog later
accidentally (e.g., as soon as you "git checkout foo && git commit",
you'll get a reflog). I think you're fighting an uphill battle to turn
logAllRefUpdates on and then try to disable some reflogs selectively.

So I agree the current behavior is quietly broken, which is not good.
But I wonder if "--no-create-reflog" is really sane in the first place,
and whether we might be better off to simply disallow it.

-Peff

^ permalink raw reply

* Re: difflame
From: Jeff King @ 2017-01-30 23:26 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Edmundo Carmona Antoranz, Git List
In-Reply-To: <xmqqd1f4uug6.fsf@gitster.mtv.corp.google.com>

On Mon, Jan 30, 2017 at 01:08:41PM -0800, Junio C Hamano wrote:

> Jeff King <peff@peff.net> writes:
> 
> > On Tue, Jan 17, 2017 at 11:24:02PM -0600, Edmundo Carmona Antoranz wrote:
> >
> >> For a very long time I had wanted to get the output of diff to include
> >> blame information as well (to see when something was added/removed).
> >
> > This is something I've wanted, too. The trickiest part, though, is
> > blaming deletions, because git-blame only tracks the origin of content,
> > not the origin of a change.
> 
> Hmph, this is a comment without looking at what difflame does
> internally, so you can ignore me if I am misunderstood what problem
> you are pointing out, but I am not sure how "tracks the origin of
> content" could be a problem.
> 
> If output from "git show" says this:
> 
> 	--- a/file
> 	+++ b/file
> 	@@ -1,5 +1,6 @@
> 	 a
> 	 b
> 	-c
> 	+C
> 	+D
> 	 d
> 	 e
> 
> in order to annotate lines 'a', 'b', 'd', and 'e' for their origin,
> you would run 'blame' on the commit the above output was taken from
> (or its parent---they are in the context so either would be OK).
> 
> You know where 'C' and 'D' came from already.  It's the commit you
> are feeding "git show".

I think the point (or at least as I understand it) is that the diff is
not "git show" for a particular commit. It could be part of a much
larger diff that covers many commits.

As a non-hypothetical instance, I have a fork of git.git that has
various enhancements. I want to feed those enhancements upstream. I need
to know which commits should be cherry-picked to get those various
enhancements.

Looking at "log origin..fork" tells me too many commits, because it
includes ones which aren't useful anymore. Either because they already
went upstream, or because they were cherry-picked to the fork and their
upstream counterparts merged (or even equivalent commits made upstream
that obsoleted the features).

Looking at "git diff origin fork" tells me what the actual differences
are, but it doesn't show me which commits are responsible for them.

I can "git blame" each individual line of the diff (starting with "fork"
as the tip), but that doesn't work for lines that no longer exist (i.e.,
when the interesting change is a deletion).

> In order to run blame to find where 'c' came from, you need to start
> at the _parent_ of the commit the above output came from, and the
> hunk header shows which line range to find the final 'c'.

So perhaps that explains my comment more. "blame" is not good for
finding which commit took away a line. I've tried using "blame
--reverse", but it shows you the parent of the commit you are looking
for, which is slightly annoying. :)

"git log -S" is probably a better tool for finding that.

-Peff

^ permalink raw reply

* Re: [PATCH] merge-recursive: make "CONFLICT (rename/delete)" message show both paths
From: Junio C Hamano @ 2017-01-30 23:21 UTC (permalink / raw)
  To: Matt McCutchen; +Cc: git
In-Reply-To: <1485636764.2482.2.camel@mattmccutchen.net>

Matt McCutchen <matt@mattmccutchen.net> writes:

> The current message printed by "git merge-recursive" for a rename/delete
> conflict is like this:
>
> CONFLICT (rename/delete): new-path deleted in HEAD and renamed in
> other-branch. Version other-branch of new-path left in tree.
>
> To be more helpful, the message should show both paths of the rename and
> state that the deletion occurred at the old path, not the new path.  So
> change the message to the following format:
>
> CONFLICT (rename/delete): old-path deleted in HEAD and renamed to
> new-path in other-branch. Version other-branch of new-path left in tree.

Sounds like a sensible goal.

> Please check that my refactoring is indeed correct!  All the existing tests pass
> for me, but the existing test coverage of these conflict messages looks poor.

This unfortunately is doing a bit too many things at once from that
point of view.  I need to reserve a solid quiet 20-minutes without
distraction to check it, which I am hoping to do tonight.

Thanks.

>
>  merge-recursive.c              | 117 ++++++++++++++++++++++-------------------
>  t/t6045-merge-rename-delete.sh |  23 ++++++++
>  2 files changed, 86 insertions(+), 54 deletions(-)
>  create mode 100755 t/t6045-merge-rename-delete.sh
>
> diff --git a/merge-recursive.c b/merge-recursive.c
> index d327209..e8fce10 100644
> --- a/merge-recursive.c
> +++ b/merge-recursive.c
> @@ -1061,16 +1061,20 @@ static int merge_file_one(struct merge_options *o,
>  }
>  
>  static int handle_change_delete(struct merge_options *o,
> -				 const char *path,
> +				 const char *path, const char *old_path,
>  				 const struct object_id *o_oid, int o_mode,
> -				 const struct object_id *a_oid, int a_mode,
> -				 const struct object_id *b_oid, int b_mode,
> +				 const struct object_id *changed_oid,
> +				 int changed_mode,
> +				 const char *change_branch,
> +				 const char *delete_branch,
>  				 const char *change, const char *change_past)
>  {
> -	char *renamed = NULL;
> +	char *alt_path = NULL;
> +	const char *update_path = path;
>  	int ret = 0;
> +
>  	if (dir_in_way(path, !o->call_depth, 0)) {
> -		renamed = unique_path(o, path, a_oid ? o->branch1 : o->branch2);
> +		update_path = alt_path = unique_path(o, path, change_branch);
>  	}
>  
>  	if (o->call_depth) {
> @@ -1081,43 +1085,43 @@ static int handle_change_delete(struct merge_options *o,
>  		 */
>  		ret = remove_file_from_cache(path);
>  		if (!ret)
> -			ret = update_file(o, 0, o_oid, o_mode,
> -					  renamed ? renamed : path);
> -	} else if (!a_oid) {
> -		if (!renamed) {
> -			output(o, 1, _("CONFLICT (%s/delete): %s deleted in %s "
> -			       "and %s in %s. Version %s of %s left in tree."),
> -			       change, path, o->branch1, change_past,
> -			       o->branch2, o->branch2, path);
> -			ret = update_file(o, 0, b_oid, b_mode, path);
> -		} else {
> -			output(o, 1, _("CONFLICT (%s/delete): %s deleted in %s "
> -			       "and %s in %s. Version %s of %s left in tree at %s."),
> -			       change, path, o->branch1, change_past,
> -			       o->branch2, o->branch2, path, renamed);
> -			ret = update_file(o, 0, b_oid, b_mode, renamed);
> -		}
> +			ret = update_file(o, 0, o_oid, o_mode, update_path);
>  	} else {
> -		if (!renamed) {
> -			output(o, 1, _("CONFLICT (%s/delete): %s deleted in %s "
> -			       "and %s in %s. Version %s of %s left in tree."),
> -			       change, path, o->branch2, change_past,
> -			       o->branch1, o->branch1, path);
> +		if (!alt_path) {
> +			if (!old_path) {
> +				output(o, 1, _("CONFLICT (%s/delete): %s deleted in %s "
> +				       "and %s in %s. Version %s of %s left in tree."),
> +				       change, path, delete_branch, change_past,
> +				       change_branch, change_branch, path);
> +			} else {
> +				output(o, 1, _("CONFLICT (%s/delete): %s deleted in %s "
> +				       "and %s to %s in %s. Version %s of %s left in tree."),
> +				       change, old_path, delete_branch, change_past, path,
> +				       change_branch, change_branch, path);
> +			}
>  		} else {
> -			output(o, 1, _("CONFLICT (%s/delete): %s deleted in %s "
> -			       "and %s in %s. Version %s of %s left in tree at %s."),
> -			       change, path, o->branch2, change_past,
> -			       o->branch1, o->branch1, path, renamed);
> -			ret = update_file(o, 0, a_oid, a_mode, renamed);
> +			if (!old_path) {
> +				output(o, 1, _("CONFLICT (%s/delete): %s deleted in %s "
> +				       "and %s in %s. Version %s of %s left in tree at %s."),
> +				       change, path, delete_branch, change_past,
> +				       change_branch, change_branch, path, alt_path);
> +			} else {
> +				output(o, 1, _("CONFLICT (%s/delete): %s deleted in %s "
> +				       "and %s to %s in %s. Version %s of %s left in tree at %s."),
> +				       change, old_path, delete_branch, change_past, path,
> +				       change_branch, change_branch, path, alt_path);
> +			}
>  		}
>  		/*
> -		 * No need to call update_file() on path when !renamed, since
> -		 * that would needlessly touch path.  We could call
> -		 * update_file_flags() with update_cache=0 and update_wd=0,
> -		 * but that's a no-op.
> +		 * No need to call update_file() on path when change_branch ==
> +		 * o->branch1 && !alt_path, since that would needlessly touch
> +		 * path.  We could call update_file_flags() with update_cache=0
> +		 * and update_wd=0, but that's a no-op.
>  		 */
> +		if (change_branch != o->branch1 || alt_path)
> +			ret = update_file(o, 0, changed_oid, changed_mode, update_path);
>  	}
> -	free(renamed);
> +	free(alt_path);
>  
>  	return ret;
>  }
> @@ -1125,28 +1129,17 @@ static int handle_change_delete(struct merge_options *o,
>  static int conflict_rename_delete(struct merge_options *o,
>  				   struct diff_filepair *pair,
>  				   const char *rename_branch,
> -				   const char *other_branch)
> +				   const char *delete_branch)
>  {
>  	const struct diff_filespec *orig = pair->one;
>  	const struct diff_filespec *dest = pair->two;
> -	const struct object_id *a_oid = NULL;
> -	const struct object_id *b_oid = NULL;
> -	int a_mode = 0;
> -	int b_mode = 0;
> -
> -	if (rename_branch == o->branch1) {
> -		a_oid = &dest->oid;
> -		a_mode = dest->mode;
> -	} else {
> -		b_oid = &dest->oid;
> -		b_mode = dest->mode;
> -	}
>  
>  	if (handle_change_delete(o,
>  				 o->call_depth ? orig->path : dest->path,
> +				 o->call_depth ? NULL : orig->path,
>  				 &orig->oid, orig->mode,
> -				 a_oid, a_mode,
> -				 b_oid, b_mode,
> +				 &dest->oid, dest->mode,
> +				 rename_branch, delete_branch,
>  				 _("rename"), _("renamed")))
>  		return -1;
>  
> @@ -1665,11 +1658,27 @@ static int handle_modify_delete(struct merge_options *o,
>  				 struct object_id *a_oid, int a_mode,
>  				 struct object_id *b_oid, int b_mode)
>  {
> +	const char *modify_branch, *delete_branch;
> +	struct object_id *changed_oid;
> +	int changed_mode;
> +
> +	if (a_oid) {
> +		modify_branch = o->branch1;
> +		delete_branch = o->branch2;
> +		changed_oid = a_oid;
> +		changed_mode = a_mode;
> +	} else {
> +		modify_branch = o->branch2;
> +		delete_branch = o->branch1;
> +		changed_oid = b_oid;
> +		changed_mode = b_mode;
> +	}
> +
>  	return handle_change_delete(o,
> -				    path,
> +				    path, NULL,
>  				    o_oid, o_mode,
> -				    a_oid, a_mode,
> -				    b_oid, b_mode,
> +				    changed_oid, changed_mode,
> +				    modify_branch, delete_branch,
>  				    _("modify"), _("modified"));
>  }
>  
> diff --git a/t/t6045-merge-rename-delete.sh b/t/t6045-merge-rename-delete.sh
> new file mode 100755
> index 0000000..8f33244
> --- /dev/null
> +++ b/t/t6045-merge-rename-delete.sh
> @@ -0,0 +1,23 @@
> +#!/bin/sh
> +
> +test_description='Merge-recursive rename/delete conflict message'
> +. ./test-lib.sh
> +
> +test_expect_success 'rename/delete' '
> +echo foo >A &&
> +git add A &&
> +git commit -m "initial" &&
> +
> +git checkout -b rename &&
> +git mv A B &&
> +git commit -m "rename" &&
> +
> +git checkout master &&
> +git rm A &&
> +git commit -m "delete" &&
> +
> +test_must_fail git merge --strategy=recursive rename >output &&
> +test_i18ngrep "CONFLICT (rename/delete): A deleted in HEAD and renamed to B in rename. Version rename of B left in tree." output
> +'
> +
> +test_done

^ permalink raw reply

* Re: [PATCH 0/5] introduce SWAP macro
From: Junio C Hamano @ 2017-01-30 23:20 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List, Johannes Schindelin
In-Reply-To: <8ef4c833-45bd-6831-0683-6d01f30aa518@web.de>

René Scharfe <l.s.r@web.de> writes:

> Exchanging the value of two variables requires declaring a temporary
> variable and repeating their names.  The swap macro in apply.c
> simplifies this for its callers without changing the compiled binary.
> Polish this macro and export it, then use it throughout the code to
> reduce repetition and hide the declaration of temporary variables.

All looked reasonable (modulo "swap tree2 and tree1???" ordering).
Also the object-id ones looked good.

Thanks.

^ permalink raw reply

* Re: [PATCH] Completion: Add support for --submodule=diff
From: Junio C Hamano @ 2017-01-30 23:03 UTC (permalink / raw)
  To: Peter Law; +Cc: git, Jacob Keller, szeder
In-Reply-To: <CAKoneT+Bn+MdbeNnPJsu23rbLCZ=jxADNVtpNefw9zNYMq26dA@mail.gmail.com>

Peter Law <peterjclaw@gmail.com> writes:

>> Teach git-completion.bash about the 'diff' option to 'git diff
>> --submodule=', which was added in Git 2.11.
>
> I posted this patch back in December, but I've not heard anything. I'm
> sure as maintainers you're all quite busy, but I was wondering how
> long it usually takes to get a response to patches? (also whether I'd
> gotten some part of the submission process wrong?)

When there is clear "subsystem maintainer(s)" in the area, I try to
refrain from commenting until they speak up, and completion scripts
are one of these areas.  I usually ping them after a few days but in
December with holidays and things people are usually slow, and so
was I X-<.

Will pick it up.  Thanks for a reminder; you absolutely did the
right thing (i.e. sending it out with people who are likely to know
about the area CC'ed, waiting for a while and then sending a
reminder).

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox