[RFC/PATCH] pull: set-upstream implementation

git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [RFC/PATCH] pull: set-upstream implementation
@ 2016-05-25 15:25 Erwan Mathoniere
  2016-05-25 18:09 ` Junio C Hamano
  2016-06-06  9:34 ` [RFC/PATCH v2] pull: add --set-upstream Erwan Mathoniere
  0 siblings, 2 replies; 12+ messages in thread
From: Erwan Mathoniere @ 2016-05-25 15:25 UTC (permalink / raw)
  To: git
  Cc: tom.russello, samuel.groot, Erwan Mathoniere, Jordan De Gea,
	Matthieu Moy

Implements pull [--set-upstream | -u] which sets the remote tracking
branch to the one the user just pulled from.

	git pull [--set-upstream | -u] <remote> <branch>

After successfully fetched from <remote>, sets branch.<name>.remote to
<remote> and branch.<name>.merge to follow <remote>/<branch>

Signed-off-by: Erwan Mathoniere <erwan.mathoniere@grenoble-inp.org>
Signed-off-by: Jordan De Gea <jordan.de-gea@grenoble-inp.org>
Signed-off-by: Matthieu Moy <matthieu.moy@grenoble-inp.fr>
---
Hello,

`git push --set-upstream <remote> <branch>` sets the remote and merge config
variables for <branch> after successfully pushed.

This patch implements an equivalent for `git pull` or
`git branch --set-upstream-to=<remote>/<branch>`.

Difficulties:
	- I can't figure out what remote branch sould be tracked
	in that case: `git pull -u origin :master`
	- Is the result of 'resolve_ref_unsafe' should be freed ?

What's your opinion ?


 Documentation/git-pull.txt |  7 +++++
 builtin/pull.c             | 61 +++++++++++++++++++++++++++++++++++++
 t/t5544-pull-upstream.sh   | 75 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 143 insertions(+)
 create mode 100755 t/t5544-pull-upstream.sh

diff --git a/Documentation/git-pull.txt b/Documentation/git-pull.txt
index d033b25..3a2e0b7 100644
--- a/Documentation/git-pull.txt
+++ b/Documentation/git-pull.txt
@@ -93,6 +93,13 @@ OPTIONS
 	has to be called afterwards to bring the work tree up to date with the
 	merge result.
 
+-u::
+--set-upstream::
+	For each branch that is successfully pulled, add
+	upstream (tracking) reference, used by argument-less
+	linkgit:git-push[1] and other commands. For more information,
+	see 'branch.<name>.merge' in linkgit:git-config[1].
+
 Options related to merging
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/builtin/pull.c b/builtin/pull.c
index 1d7333c..e096858 100644
--- a/builtin/pull.c
+++ b/builtin/pull.c
@@ -17,6 +17,7 @@
 #include "revision.h"
 #include "tempfile.h"
 #include "lockfile.h"
+#include "branch.h"
 
 enum rebase_type {
 	REBASE_INVALID = -1,
@@ -109,6 +110,9 @@ static char *opt_unshallow;
 static char *opt_update_shallow;
 static char *opt_refmap;
 
+/* Options about upstream */
+static int opt_set_upstream;
+
 static struct option pull_options[] = {
 	/* Shared options */
 	OPT__VERBOSITY(&opt_verbosity),
@@ -116,6 +120,9 @@ static struct option pull_options[] = {
 		N_("force progress reporting"),
 		PARSE_OPT_NOARG),
 
+	/* Options about upstream */
+	OPT_BOOL('u', "set-upstream", &opt_set_upstream, N_("set upstream for git pull/status")),
+
 	/* Options passed to git-merge or git-rebase */
 	OPT_GROUP(N_("Options related to merging")),
 	{ OPTION_CALLBACK, 'r', "rebase", &opt_rebase,
@@ -829,6 +836,57 @@ static int run_rebase(const unsigned char *curr_head,
 	return ret;
 }
 
+static int set_pull_upstream(const char *repo, const char **refspecs_name)
+{
+	unsigned char sha1[GIT_SHA1_RAWSZ];
+	const char *local_branch, *remote_branch;
+	struct strbuf remote_name;
+	struct refspec *refspecs;
+	int nr_refspec, i;
+
+	if (repo == NULL) {
+		warning(N_("no remote was specified"));
+		return 1;
+	}
+
+	for (nr_refspec = 0; refspecs_name[nr_refspec] != NULL; nr_refspec++);
+
+	if (nr_refspec == 0) {
+		warning(N_("no refspec was specified"));
+		return 1;
+	}
+
+	refspecs = parse_fetch_refspec(nr_refspec, refspecs_name);
+
+	for (i = 0; i < nr_refspec; i++) {
+		if (refspecs[i].src != NULL && strlen(refspecs[i].src) > 0) {
+			remote_branch = refspecs[i].src;
+		} else {
+			warning(N_("not yet implemented"));
+			continue;
+		}
+
+		if (refspecs[i].dst != NULL && strlen(refspecs[i].dst) > 0) {
+			local_branch = refspecs[i].dst;
+		} else {
+			// TODO : Should it be freed ?
+			local_branch = resolve_ref_unsafe("HEAD", 0, sha1, NULL);
+			skip_prefix(local_branch, "refs/heads/", &local_branch);
+		}
+
+		strbuf_init(&remote_name, strlen(repo) + strlen(remote_branch) + 1);
+		strbuf_addf(&remote_name, "%s/%s", repo, remote_branch);
+
+		create_branch(local_branch, local_branch, remote_name.buf, 0, 0, 0, opt_verbosity < 0, BRANCH_TRACK_OVERRIDE);
+
+		strbuf_release(&remote_name);
+	}
+
+	free_refspec(nr_refspec, refspecs);
+
+	return 0;
+}
+
 int cmd_pull(int argc, const char **argv, const char *prefix)
 {
 	const char *repo, **refspecs;
@@ -884,6 +942,9 @@ int cmd_pull(int argc, const char **argv, const char *prefix)
 	if (opt_dry_run)
 		return 0;
 
+	if (opt_set_upstream)
+		set_pull_upstream(repo, refspecs);
+
 	if (get_sha1("HEAD", curr_head))
 		hashclr(curr_head);
 
diff --git a/t/t5544-pull-upstream.sh b/t/t5544-pull-upstream.sh
new file mode 100755
index 0000000..38eb51d
--- /dev/null
+++ b/t/t5544-pull-upstream.sh
@@ -0,0 +1,75 @@
+#!/bin/sh
+
+test_description='pull with --set-upstream'
+. ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-terminal.sh
+
+ensure_fresh_upstream() {
+	rm -rf parent &&
+	git init parent &&
+	cd parent &&
+	(
+		echo content >file &&
+		git add file &&
+		git commit -m one &&
+		git checkout -b other &&
+		echo content >file2 &&
+		git add file2 &&
+		git commit -m two &&
+		git checkout -b master2 &&
+		git checkout master
+	) &&
+	cd ..
+}
+
+test_expect_success 'setup bare parent' '
+	ensure_fresh_upstream &&
+	git remote add upstream parent &&
+	git pull upstream master
+'
+
+check_config() {
+	(echo $2; echo $3) >expect.$1
+	(git config branch.$1.remote
+	 git config branch.$1.merge) >actual.$1
+	test_cmp expect.$1 actual.$1
+}
+
+test_expect_success 'pull -u master' '
+	git pull -u upstream master &&
+	check_config master upstream refs/heads/master
+'
+
+test_expect_success 'pull -u master:other' '
+	git pull -u upstream master:other &&
+	check_config other upstream refs/heads/master
+'
+
+test_expect_success 'pull -u --dry-run other:other' '
+	git pull -u --dry-run upstream other:other &&
+	check_config other upstream refs/heads/master
+'
+
+test_expect_success 'pull -u master2:master2 master:other' '
+	git branch master2 &&
+	git pull -u upstream master2:master2 master:other &&
+	check_config master2 upstream refs/heads/master2 &&
+	check_config other upstream refs/heads/master
+'
+
+test_expect_success 'pull -u HEAD' '
+	git clone parent son &&
+	cd son &&
+	git checkout -b headbranch &&
+	git pull -u origin HEAD &&
+	check_config headbranch origin refs/heads/master
+'
+
+test_expect_success TTY 'quiet pull' '
+	ensure_fresh_upstream &&
+
+	test_terminal git pull -u --quiet upstream master 2>&1 | tee output &&
+	test_cmp /dev/null output
+'
+
+test_done
-- 
2.8.2.562.gdbf6e3e

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH] pull: set-upstream implementation
  2016-05-25 15:25 [RFC/PATCH] pull: set-upstream implementation Erwan Mathoniere
@ 2016-05-25 18:09 ` Junio C Hamano
  2016-05-29 20:00   ` Erwan Mathoniere
  2016-06-06  9:34 ` [RFC/PATCH v2] pull: add --set-upstream Erwan Mathoniere
  1 sibling, 1 reply; 12+ messages in thread
From: Junio C Hamano @ 2016-05-25 18:09 UTC (permalink / raw)
  To: Erwan Mathoniere
  Cc: git, tom.russello, samuel.groot, Jordan De Gea, Matthieu Moy

Erwan Mathoniere <erwan.mathoniere@grenoble-inp.org> writes:

> Subject: Re: [RFC/PATCH] pull: set-upstream implementation

If this were multi-patch series and one of the other patches were
"pull: set-upstream design" or something, then it might have been
understandable, but otherwise the word "implementation" sits rather
strangely in the title of this patch.

> Implements pull [--set-upstream | -u] which sets the remote tracking
> branch to the one the user just pulled from.
>
> 	git pull [--set-upstream | -u] <remote> <branch>
>
> After successfully fetched from <remote>, sets branch.<name>.remote to
> <remote> and branch.<name>.merge to follow <remote>/<branch>
>
> Signed-off-by: Erwan Mathoniere <erwan.mathoniere@grenoble-inp.org>
> Signed-off-by: Jordan De Gea <jordan.de-gea@grenoble-inp.org>
> Signed-off-by: Matthieu Moy <matthieu.moy@grenoble-inp.fr>
> ---
> Hello,
>
> `git push --set-upstream <remote> <branch>` sets the remote and merge config
> variables for <branch> after successfully pushed.
>
> This patch implements an equivalent for `git pull` or
> `git branch --set-upstream-to=<remote>/<branch>`.
>
> Difficulties:
> 	- I can't figure out what remote branch sould be tracked
> 	in that case: `git pull -u origin :master`

What does the command do without "-u"?

> 	- Is the result of 'resolve_ref_unsafe' should be freed ?

Check its function signature--it returns "const char *" which is a
sign that the memory does not belong to you (i.e. the caller), and
you should never free it.

>  Documentation/git-pull.txt |  7 +++++
>  builtin/pull.c             | 61 +++++++++++++++++++++++++++++++++++++
>  t/t5544-pull-upstream.sh   | 75 ++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 143 insertions(+)
>  create mode 100755 t/t5544-pull-upstream.sh
>
> diff --git a/Documentation/git-pull.txt b/Documentation/git-pull.txt
> index d033b25..3a2e0b7 100644
> --- a/Documentation/git-pull.txt
> +++ b/Documentation/git-pull.txt
> @@ -93,6 +93,13 @@ OPTIONS
>  	has to be called afterwards to bring the work tree up to date with the
>  	merge result.
>  
> +-u::
> +--set-upstream::
> +	For each branch that is successfully pulled, add
> +	upstream (tracking) reference, used by argument-less
> +	linkgit:git-push[1] and other commands. For more information,
> +	see 'branch.<name>.merge' in linkgit:git-config[1].

To me, "For each branch" hints that I could do this:

	git pull --set-upstream git://git.kernel.org/r/e/p/o foo bar

and the command would do something to 'foo' and 'bar'.

But I suspect that is not what is going on and that is not what you
wanted to achieve.

A crucial piece of information that is lacking in the above is where
<name> comes from.  The same issue exists in your proposed commit
log message.

I think that you meant to add a feature to add branch.<name>.remote
and branch.<name>.merge configuration variables for the current
branch whose name is <name>, and the values to be recorded for these
two configuration variables are the same as those given on the
command line.  For example "git checkout -b topic master && git pull
origin that" would set "branch.topic.remote" and
"branch.topic.merge" to "origin" and "that", respectively.

Without explaining what <name> is, "For more information" that
refers to 'branch.<name>.merge' does not help the readers much.

	Side note.  It is an understandable mistake.  After one
	spent a lot of effort designing, implementing and debugging
	a feature, by the time one describes what it does, some
	assumptions one made earlier becomes so fundamental in one's
	mind that one forgets that it is not obvious to others.

There a few design decisions you must have made that needs to be
either described fully or at least hinted here, too, such as (not
exhaustive and in random order):

 * What should happen when the current branch already has these two
   configuration variables defined?  Should the user get a warning
   when this changes the setting from what was already there?

 * What should happen when the remote is specified as a Git URL, not
   as a remote nickname?  You want a nickname for the value to place
   in "branch.<name>.remote".

    - Should Git find an existing remote that points at the URL and
      use it?  If so, and if the value we are about to place in
      "branch.<name>.merge" is outside its configured fetch refspec,
      should Git tweak the fetch refspec of the existing remote?

    - Should Git create a new remote nickname for the URL?  If so,
      what should the fetch refspec be for the newly created remote?
      Should it fetch only the branch we fetched just now?

 * What should happen when more than one ref is given?
   branch_get_upstream() can return only one ref; should Git choose
   one of them at random?  Should Git warn and turn this into a
   no-op?

 * What should happen when the ref given to pull is not a branch
   (e.g. a tag)?  A tag, meant to be a stable anchoring point, is
   not something suitable to be set as branch.<name>.merge, even
   though it might be technically possible to configure it as such.
   Should Git warn and turn this into a no-op?

> diff --git a/builtin/pull.c b/builtin/pull.c
> index 1d7333c..e096858 100644
> --- a/builtin/pull.c
> +++ b/builtin/pull.c
> @@ -829,6 +836,57 @@ static int run_rebase(const unsigned char *curr_head,
>  	return ret;
>  }
>  
> +static int set_pull_upstream(const char *repo, const char **refspecs_name)
> +{
> +	unsigned char sha1[GIT_SHA1_RAWSZ];
> +	const char *local_branch, *remote_branch;
> +	struct strbuf remote_name;
> +	struct refspec *refspecs;

Style.  Name a pointer that points at the first element of an array
as singular, so that "element[4]", not "elements[4]", becomes the
way to refer to its fourth element.

> +	int nr_refspec, i;
> +
> +	if (repo == NULL) {
> +		warning(N_("no remote was specified"));
> +		return 1;
> +	}
> +
> +	for (nr_refspec = 0; refspecs_name[nr_refspec] != NULL; nr_refspec++);

Style.  Give an empty statement its own line to make it stand out,
i.e.

	for (....)
        	; /* just counting */

> +	if (nr_refspec == 0) {
> +		warning(N_("no refspec was specified"));
> +		return 1;
> +	}

OK, this is "git pull -u <REMOTE>" without any explicit branch name;
the pull may have already used configured one (or it may have fetched
nothing); there is nothing to do here either way.

> +	refspecs = parse_fetch_refspec(nr_refspec, refspecs_name);
> +
> +	for (i = 0; i < nr_refspec; i++) {
> +		if (refspecs[i].src != NULL && strlen(refspecs[i].src) > 0) {

Style.  Lose the " != NULL" and useless strlen().  I.e.

	if (refspec[i].src && *refspec[i].src)

More importantly, what would you see here in .src when your command
line were:

	$ git pull -u <REMOTE> "refs/heads/*:refs/remotes/foo/*"

Hint: check .pattern to make sure it is false.  Skip them at the top
of the loop body, perhaps.

> +			remote_branch = refspecs[i].src;
> +		} else {
> +			warning(N_("not yet implemented"));

What do you plan to implement here?

> +			continue;
> +		}
> +
> +		if (refspecs[i].dst != NULL && strlen(refspecs[i].dst) > 0) {
> +			local_branch = refspecs[i].dst;
> +		} else {
> +			// TODO : Should it be freed ?

Style.  No "// comment".

> +			local_branch = resolve_ref_unsafe("HEAD", 0, sha1, NULL);
> +			skip_prefix(local_branch, "refs/heads/", &local_branch);

What happens here if your user did this?

	$ git checkout HEAD^0 && git pull -u <REMOTE> ...

You do not have "local_branch" here.  Check the condition, warn and
turn it into no-op (I do not think "pull -u" is important enough to
fail the entire command).

> +		}
> +
> +		strbuf_init(&remote_name, strlen(repo) + strlen(remote_branch) + 1);
> +		strbuf_addf(&remote_name, "%s/%s", repo, remote_branch);

What happens here if your user did this?

	$ git pull -u git://git.kernel.org/r/e/p/o master

Specifically, what is "repo" at this point?

> +		create_branch(local_branch, local_branch, remote_name.buf, 0, 0, 0, opt_verbosity < 0, BRANCH_TRACK_OVERRIDE);

I thought you were only setting up configurations for existing
branch.  Why do you call create_branch() on the existing
local_branch, which is supposed to be the current one?

Perhaps create_branch() API is too crappy and it needs to be
properly refactored in preparation for this patch.  I dunno.

What does it mean for this loop to execute more than once, flipping
the configuration for the current branch number of times?  The last
one wins?  Does it even make sense from the end-user's point of
view?

> +
> +		strbuf_release(&remote_name);
> +	}
> +
> +	free_refspec(nr_refspec, refspecs);
> +
> +	return 0;
> +}
> +
>  int cmd_pull(int argc, const char **argv, const char *prefix)
>  {
>  	const char *repo, **refspecs;
> @@ -884,6 +942,9 @@ int cmd_pull(int argc, const char **argv, const char *prefix)
>  	if (opt_dry_run)
>  		return 0;
>  
> +	if (opt_set_upstream)
> +		set_pull_upstream(repo, refspecs);
> +
>  	if (get_sha1("HEAD", curr_head))
>  		hashclr(curr_head);
>  
> diff --git a/t/t5544-pull-upstream.sh b/t/t5544-pull-upstream.sh
> new file mode 100755
> index 0000000..38eb51d
> --- /dev/null
> +++ b/t/t5544-pull-upstream.sh
> @@ -0,0 +1,75 @@
> +#!/bin/sh
> +
> +test_description='pull with --set-upstream'
> +. ./test-lib.sh
> +. "$TEST_DIRECTORY"/lib-terminal.sh
> +
> +ensure_fresh_upstream() {

Style:

	ensure_fresh_upstream () {

> +	rm -rf parent &&
> +	git init parent &&
> +	cd parent &&
> +	(
> +		echo content >file &&
> +		git add file &&
> +		git commit -m one &&
> +		git checkout -b other &&
> +		echo content >file2 &&
> +		git add file2 &&
> +		git commit -m two &&
> +		git checkout -b master2 &&
> +		git checkout master
> +	) &&
> +	cd ..

The tests typically do

	... some stuff ... &&
        (
		cd elsewhere &&
                ... more stuff ...
	) &&
        ... yet more stuff ...

to make sure that even after any of "... more stuff ..." fails, the
caller of this sequence will find it in an expected and stable
place.  As written in your patch, if any of the "... more stuff ..."
fails, the caller will be in "parent" directory, not in the original
directory, because your "cd .." will not be exectued.

I wonder if you are completely missing the point of using a subshell
here?

> +}
> +
> +test_expect_success 'setup bare parent' '
> +	ensure_fresh_upstream &&
> +	git remote add upstream parent &&
> +	git pull upstream master
> +'
> +
> +check_config() {
> +	(echo $2; echo $3) >expect.$1

	test_write_lines "$2" "$3" >"expect.$1"

Unless you want your variable reference is split at $IFS, quote
your variables.

> +	(git config branch.$1.remote
> +	 git config branch.$1.merge) >actual.$1
> +	test_cmp expect.$1 actual.$1

It is not _wrong_ per-se, but I do not think ".$1" suffix is adding
any value.  I'd drop it if I were doing this patch.

> +}
> +
> +test_expect_success 'pull -u master' '
> +	git pull -u upstream master &&
> +	check_config master upstream refs/heads/master
> +'
> +
> +test_expect_success 'pull -u master:other' '
> +	git pull -u upstream master:other &&
> +	check_config other upstream refs/heads/master
> +'
> +
> +test_expect_success 'pull -u --dry-run other:other' '
> +	git pull -u --dry-run upstream other:other &&
> +	check_config other upstream refs/heads/master
> +'
> +
> +test_expect_success 'pull -u master2:master2 master:other' '
> +	git branch master2 &&
> +	git pull -u upstream master2:master2 master:other &&
> +	check_config master2 upstream refs/heads/master2 &&
> +	check_config other upstream refs/heads/master
> +'
> +
> +test_expect_success 'pull -u HEAD' '
> +	git clone parent son &&
> +	cd son &&
> +	git checkout -b headbranch &&
> +	git pull -u origin HEAD &&
> +	check_config headbranch origin refs/heads/master

Do not "chdir" outside a subshell.

I think the next test will not work if this test, specifically the
first two steps that creates a new repository "son" and moves into
it, fails.  Don't create such an interdependency between tests when
you can avoid it.

Hint. Immediately after creating "parent", in the same test, clone
it into "son".  Everybody needs to depend on that "setup bare
parent" test anyway, so that is not making things worse.  And then
update this test like so:

	test_expect_success 'title' '
        	(
                	cd son &&
                        git checkout -b headbranch &&
                        ...
		)
	'

making sure that outside test_expect_success block, the process will
stay at the original directory, no matter which tests fail or get
skipped.

> +'
> +
> +test_expect_success TTY 'quiet pull' '
> +	ensure_fresh_upstream &&
> +
> +	test_terminal git pull -u --quiet upstream master 2>&1 | tee output &&
> +	test_cmp /dev/null output
> +'

There is no test that makes sure that the new feature does not kick
in when it should not.  E.g. pulling from somewhere that is not a
configured remote.  For example, 

> +test_done

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH] pull: set-upstream implementation
  2016-05-25 18:09 ` Junio C Hamano
@ 2016-05-29 20:00   ` Erwan Mathoniere
  0 siblings, 0 replies; 12+ messages in thread
From: Erwan Mathoniere @ 2016-05-29 20:00 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: git, tom.russello, samuel.groot, Jordan De Gea, Matthieu Moy



On 25/05/2016 20:09, Junio C Hamano wrote:
 > Erwan Mathoniere <erwan.mathoniere@grenoble-inp.org> writes:
 >> Difficulties:
 >>     - I can't figure out what remote branch sould be tracked
 >>     in that case: `git pull -u origin :master`
 >
 > What does the command do without "-u"?

After some research, I think it creates a new branch if the ref doesn't 
exist. Otherwise it does nothing. So no branch should be tracked in that 
case.

 >> +-u::
 >> +--set-upstream::
 >> +    For each branch that is successfully pulled, add
 >> +    upstream (tracking) reference, used by argument-less
 >> +    linkgit:git-push[1] and other commands. For more information,
 >> +    see 'branch.<name>.merge' in linkgit:git-config[1].
 >
 > A crucial piece of information that is lacking in the above is where
 > <name> comes from.  The same issue exists in your proposed commit
 > log message.

You're right, documentation is unclear. I'll sort it out.

 > There a few design decisions you must have made that needs to be
 > either described fully or at least hinted here, too, such as (not
 > exhaustive and in random order):
 >
 >  * What should happen when the current branch already has these two
 >    configuration variables defined?  Should the user get a warning
 >    when this changes the setting from what was already there?

Neither `git pull --set-upstream` nor `git branch --set-upstream-to` 
warns the user in such case. So `git pull --set-upstream` should simply 
override the previous configuration the same way these commands do.


 >  * What should happen when the remote is specified as a Git URL, not
 >    as a remote nickname?  You want a nickname for the value to place
 >    in "branch.<name>.remote".
 >
 >     - Should Git find an existing remote that points at the URL and
 >       use it?  If so, and if the value we are about to place in
 >       "branch.<name>.merge" is outside its configured fetch refspec,
 >       should Git tweak the fetch refspec of the existing remote?
 >
 >     - Should Git create a new remote nickname for the URL?  If so,
 >       what should the fetch refspec be for the newly created remote?
 >       Should it fetch only the branch we fetched just now?

Still in comparison with `git push --set-upstream`, when using a Git URL 
as <repo>, "push" just sets "branch.<name>.remote" to the URL given in 
argument (even if there is a configured remote with this URL).
`git pull --set-upstream` should work the same way.

 >  * What should happen when more than one ref is given?
 >    branch_get_upstream() can return only one ref; should Git choose
 >    one of them at random?  Should Git warn and turn this into a
 >    no-op?

It depends whether refspecs given in arguments refers to the same branch 
or not. I discuss this point later in this email.

 >  * What should happen when the ref given to pull is not a branch
 >    (e.g. a tag)?  A tag, meant to be a stable anchoring point, is
 >    not something suitable to be set as branch.<name>.merge, even
 >    though it might be technically possible to configure it as such.
 >    Should Git warn and turn this into a no-op?

`pull --set-upstream` should only set "branch.<name>.merge" to remote 
branches. So yes a check must be done with eventually a no-op.

 >> +    refspecs = parse_fetch_refspec(nr_refspec, refspecs_name);
 >> +
 >> +    for (i = 0; i < nr_refspec; i++) {
 >> +        if (refspecs[i].src != NULL && strlen(refspecs[i].src) > 0) {
 >
 > More importantly, what would you see here in .src when your command
 > line were:
 >
 >     $ git pull -u <REMOTE> "refs/heads/*:refs/remotes/foo/*"
 >
 > Hint: check .pattern to make sure it is false.  Skip them at the top
 > of the loop body, perhaps.

This case wasn't handled at all, thanks for the hint.

 >> +            remote_branch = refspecs[i].src;
 >> +        } else {
 >> +            warning(N_("not yet implemented"));
 >
 > What do you plan to implement here?

Now that I understand `git pull -u origin :master`, there's nothing to 
implement here, except maybe a warning?

 >> +            local_branch = resolve_ref_unsafe("HEAD", 0, sha1, NULL);
 >> +            skip_prefix(local_branch, "refs/heads/", &local_branch);
 >
 > What happens here if your user did this?
 >
 >     $ git checkout HEAD^0 && git pull -u <REMOTE> ...
 >
 > You do not have "local_branch" here.  Check the condition, warn and
 > turn it into no-op (I do not think "pull -u" is important enough to
 > fail the entire command).

"pull -u" should indeed only work for branches, I'll put a check on this.
But I'm not really sure what procedure I should use to fully resolve 
refs. In git code, sometimes "dwim_ref" is used, sometimes it's 
"resolve_ref_unsafe" and haven't found any documentation on this.

 >> +        }
 >> +
 >> +        strbuf_init(&remote_name, strlen(repo) + 
strlen(remote_branch) + 1);
 >> +        strbuf_addf(&remote_name, "%s/%s", repo, remote_branch);
 >
 > What happens here if your user did this?
 >
 >     $ git pull -u git://git.kernel.org/r/e/p/o master
 >
 > Specifically, what is "repo" at this point?
 >
 >> +        create_branch(local_branch, local_branch, remote_name.buf, 
0, 0, 0, opt_verbosity < 0, BRANCH_TRACK_OVERRIDE);
 >
 > I thought you were only setting up configurations for existing
 > branch.  Why do you call create_branch() on the existing
 > local_branch, which is supposed to be the current one?

We took the example of `git branch --set-upstream-to=<remote>/<branch>` 
for this one. Perhaps using directly install_branch_config() is more 
relevant here.
It allows to set a non-configured remote (I mean in the .git/config) as 
the remote for the branch (with the full URL), fixing the example you 
gave me and working like `git push -u git://git.kernel.org/r/e/p/o master`.


 > What does it mean for this loop to execute more than once, flipping
 > the configuration for the current branch number of times?  The last
 > one wins?  Does it even make sense from the end-user's point of
 > view?

The loop is here to handle when the user uses multiple 
'remote_branch:local_branch' notation.
(e.g. `git pull -u <remote> master:master other:other`)

For now, the last one wins. But I agree, it doesn't really make sense.
The simplest solution from the technical point of view would be to no-op 
when multiple refs are passed in arguments.
However `git pull -u <remote> master:master other:other` seems legit.

So another solution would be to warn and no-op if the same branch config 
is modified twice.

 >> +    rm -rf parent &&
 >> +    git init parent &&
 >> +    cd parent &&
 >> +    (
 >> +        echo content >file &&
 >> +        git add file &&
 >> +        git commit -m one &&
 >> +        git checkout -b other &&
 >> +        echo content >file2 &&
 >> +        git add file2 &&
 >> +        git commit -m two &&
 >> +        git checkout -b master2 &&
 >> +        git checkout master
 >> +    ) &&
 >> +    cd ..
 >
 > The tests typically do
 >
 >     ... some stuff ... &&
 >         (
 >         cd elsewhere &&
 >                 ... more stuff ...
 >     ) &&
 >         ... yet more stuff ...
 >
 > to make sure that even after any of "... more stuff ..." fails, the
 > caller of this sequence will find it in an expected and stable
 > place.  As written in your patch, if any of the "... more stuff ..."
 > fails, the caller will be in "parent" directory, not in the original
 > directory, because your "cd .." will not be exectued.
 >
 > I wonder if you are completely missing the point of using a subshell
 > here?

You quoted the same issue later and indeed I didn't use it correctly.

 >> +check_config() {
 >> +    (echo $2; echo $3) >expect.$1
 >
 >     test_write_lines "$2" "$3" >"expect.$1"
 >
 > Unless you want your variable reference is split at $IFS, quote
 > your variables.
 >

 >> +    (git config branch.$1.remote
 >> +     git config branch.$1.merge) >actual.$1
 >> +    test_cmp expect.$1 actual.$1
 >
 > It is not _wrong_ per-se, but I do not think ".$1" suffix is adding
 > any value.  I'd drop it if I were doing this patch.

I took these functions from "t/t5523-push-upstream.sh" and I didn't 
closely look at it. I'll clean it up.

 >> +'
 >> +
 >> +test_expect_success TTY 'quiet pull' '
 >> +    ensure_fresh_upstream &&
 >> +
 >> +    test_terminal git pull -u --quiet upstream master 2>&1 | tee 
output &&
 >> +    test_cmp /dev/null output
 >> +'
 >
 > There is no test that makes sure that the new feature does not kick
 > in when it should not.  E.g. pulling from somewhere that is not a
 > configured remote.  For example,
 >

Yes you're right. I'll reshape tests and create new ones to cover more 
use cases.

Thanks for the time you spent for this huge and very useful feedback.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC/PATCH v2] pull: add --set-upstream
  2016-05-25 15:25 [RFC/PATCH] pull: set-upstream implementation Erwan Mathoniere
  2016-05-25 18:09 ` Junio C Hamano
@ 2016-06-06  9:34 ` Erwan Mathoniere
  2016-06-06 15:54   ` Matthieu Moy
  2016-06-06 16:29   ` Philip Oakley
  1 sibling, 2 replies; 12+ messages in thread
From: Erwan Mathoniere @ 2016-06-06  9:34 UTC (permalink / raw)
  To: git
  Cc: jordan.de-gea, samuel.groot, erwan.mathoniere, tom.russello,
	gitster, Matthieu Moy

Implement `git pull [--set-upstream | -u] <remote> <refspecs>` that set
tracking to the remote branch the user just pulled from.

After successfully pulling from `<remote>`, for each `<refspec>`
described in format `<remote_branch>:<local_branch>`, set
`branch.<local_branch>.remote` to `<remote>` and
`branch.<local_branch>.merge` to `refs/heads/<remote_branch>`. If
`<refspec>` lacks `<local_branch>` in the previous format or directly
refers to a branch, use the current branch as `<local_branch>` in the
above configuration setting.

`git push` has already its `--set-upstream`, it makes sense to have its
symmetrical for `git pull`.

For a beginner, when trying to use argumentless `git pull` without
tracking information set, advising to use 
`git branch --set-upstream-to` to set upstream can be quite confusing.
Using this `git pull --set-upstream` is easier and more natural.

Signed-off-by: Erwan Mathoniere <erwan.mathoniere@grenoble-inp.org>
Signed-off-by: Jordan De Gea <jordan.de-gea@grenoble-inp.org>
Signed-off-by: Matthieu Moy <matthieu.moy@grenoble-inp.fr>
---

Changes from v1:
- Code reshaped to :
  * warn + no-op when pulling from or to something that isn't a branch
or a configured remote
  * set upstream only after successfully merging/rebasing
- More relevant documentation
- Tests reshaped to be more independent from each others
- More tests (tags, detached heads, non-configured remote...)


For now, the documentation is quite hard to understand, but I didn't
figure how to explain without using too technical words. Should it stay
as it is or should I write something similar the above commit message?

Allowing to set non-configured repository as upstream isn't easy to
handle since the type of refspec must be checked and this is done by
verifying the existence of the remote-tracking branch at
`refs/remotes/<remote>/<branch>`.


 Documentation/git-pull.txt |  18 +++++
 builtin/pull.c             | 106 ++++++++++++++++++++++++++++-
 t/t5544-pull-upstream.sh   | 164 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 285 insertions(+), 3 deletions(-)
 create mode 100755 t/t5544-pull-upstream.sh

diff --git a/Documentation/git-pull.txt b/Documentation/git-pull.txt
index d033b25..6ae5e58 100644
--- a/Documentation/git-pull.txt
+++ b/Documentation/git-pull.txt
@@ -93,6 +93,24 @@ OPTIONS
 	has to be called afterwards to bring the work tree up to date with the
 	merge result.
 
+-u::
+--set-upstream::
+	After successfully pulling from explicitly given <repository> and
+	<refspecs>, set the configuration of the local branches pulled on, so
+	that each one tracks the remote branch pulled from. If a configuration
+	already exists, it is overwriten. For example, with `git pull -u origin
+	branch` the current branch will track `branch` from `origin`.
++
+If two or more branches are pulled on the same local branch, only the last one
+in arguments will be tracked.
++
+The given <repository> must be a configured remote. Can only set tracking to
+remote branches (e.g. can't set upstream to remote HEAD).
++
+Works symmetrically as `--set-upstream` for linkgit:git-push[1]. Allow using
+argumentless linkgit:git-pull[1] and other commands.  For more information, see
+`branch.<name>.merge` in linkgit:git-config[1].
+
 Options related to merging
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/builtin/pull.c b/builtin/pull.c
index 1d7333c..d9823d5 100644
--- a/builtin/pull.c
+++ b/builtin/pull.c
@@ -17,6 +17,7 @@
 #include "revision.h"
 #include "tempfile.h"
 #include "lockfile.h"
+#include "branch.h"
 
 enum rebase_type {
 	REBASE_INVALID = -1,
@@ -109,6 +110,9 @@ static char *opt_unshallow;
 static char *opt_update_shallow;
 static char *opt_refmap;
 
+/* Options about upstream */
+static int opt_set_upstream;
+
 static struct option pull_options[] = {
 	/* Shared options */
 	OPT__VERBOSITY(&opt_verbosity),
@@ -116,6 +120,9 @@ static struct option pull_options[] = {
 		N_("force progress reporting"),
 		PARSE_OPT_NOARG),
 
+	/* Options about upstream */
+	OPT_BOOL('u', "set-upstream", &opt_set_upstream, N_("set upstream for git pull/status")),
+
 	/* Options passed to git-merge or git-rebase */
 	OPT_GROUP(N_("Options related to merging")),
 	{ OPTION_CALLBACK, 'r', "rebase", &opt_rebase,
@@ -497,6 +504,10 @@ static void NORETURN die_no_merge_candidates(const char *repo, const char **refs
 		fprintf(stderr, "\n");
 		fprintf_ln(stderr, _("If you wish to set tracking information for this branch you can do so with:"));
 		fprintf(stderr, "\n");
+		fprintf_ln(stderr, "    git pull -u %s %s", _("<remote>"), _("<branch>"));
+		fprintf(stderr, "\n");
+		fprintf_ln(stderr, _("or"));
+		fprintf(stderr, "\n");
 		fprintf_ln(stderr, "    git branch --set-upstream-to=%s/%s %s\n",
 				remote_name, _("<branch>"), curr_branch->name);
 	} else
@@ -829,8 +840,92 @@ static int run_rebase(const unsigned char *curr_head,
 	return ret;
 }
 
+static void set_pull_upstream(const char *repo, const char **refspecs_name)
+{
+	unsigned char sha1[GIT_SHA1_RAWSZ];
+	struct refspec *refspec;
+	struct branch *branch;
+	struct remote *remote;
+	struct strbuf buf;
+	struct refspec tracking_refspec;
+	int nr_refspec, i, flags;
+
+	if (!repo) {
+		warning(_("a remote must be specified to set the upstream"));
+		return;
+	}
+
+	remote = remote_get(repo);
+	if (!remote) {
+		warning(_("cannot set upstream: "
+			"'%s' is not a configured remote"), repo);
+	}
+
+	for (nr_refspec = 0; refspecs_name[nr_refspec] != NULL; nr_refspec++)
+		; /* just counting */
+
+	if (nr_refspec == 0) {
+		warning(_("a remote branch must be specified to set the upstream"));
+		return;
+	}
+
+	strbuf_init(&buf, 0);
+	refspec = parse_fetch_refspec(nr_refspec, refspecs_name);
+
+	for (i = 0; i < nr_refspec; i++) {
+		if (refspec[i].pattern) {
+			warning(_("upstream cannot be set for patterns"));
+			continue;
+		}
+
+		branch = branch_get(refspec[i].dst);
+		if (!branch || !ref_exists(branch->refname)) {
+			if (!refspec[i].dst || !*refspec[i].dst)
+				warning(_("could not set upstream of HEAD when "
+					"it does not point to any branch."));
+			else
+				warning(_("cannot set upstream: "
+					"'%s' is not a branch"), refspec[i].dst);
+
+			continue;
+		}
+
+		if (!refspec[i].src || !*refspec[i].src) {
+			warning(_("remote branch must be specified "
+				"to set upstream"));
+			continue;
+		}
+		
+		strbuf_reset(&buf);
+		strbuf_addf(&buf, "refs/heads/%s", refspec[i].src);
+		memset(&tracking_refspec, 0, sizeof(struct refspec));
+		tracking_refspec.src = buf.buf;
+
+		if (remote_find_tracking(remote, &tracking_refspec)) {
+			warning(_("cannot set upstream: "
+				"no such remote branch '%s'"), refspec[i].src);
+			continue;
+		}
+
+		if (!resolve_ref_unsafe(tracking_refspec.dst, RESOLVE_REF_READING,
+					sha1, &flags)
+				|| (flags & REF_ISSYMREF)) {
+			warning(_("cannot set upstream: "
+				"no such remote branch '%s'"), refspec[i].src);
+			continue;
+		}
+
+		install_branch_config((opt_verbosity >= 0 ? BRANCH_CONFIG_VERBOSE : 0),
+		      branch->name, repo, buf.buf);
+	}
+
+	free_refspec(nr_refspec, refspec);
+	strbuf_release(&buf);
+}
+
 int cmd_pull(int argc, const char **argv, const char *prefix)
 {
+	int ret;
 	const char *repo, **refspecs;
 	struct sha1_array merge_heads = SHA1_ARRAY_INIT;
 	unsigned char orig_head[GIT_SHA1_RAWSZ], curr_head[GIT_SHA1_RAWSZ];
@@ -918,11 +1013,16 @@ int cmd_pull(int argc, const char **argv, const char *prefix)
 	if (is_null_sha1(orig_head)) {
 		if (merge_heads.nr > 1)
 			die(_("Cannot merge multiple branches into empty head."));
-		return pull_into_void(*merge_heads.sha1, curr_head);
+		ret = pull_into_void(*merge_heads.sha1, curr_head);
 	} else if (opt_rebase) {
 		if (merge_heads.nr > 1)
 			die(_("Cannot rebase onto multiple branches."));
-		return run_rebase(curr_head, *merge_heads.sha1, rebase_fork_point);
+		ret = run_rebase(curr_head, *merge_heads.sha1, rebase_fork_point);
 	} else
-		return run_merge();
+		ret = run_merge();
+
+	if (opt_set_upstream && ret < 128)
+		set_pull_upstream(repo, refspecs);
+
+	return ret;
 }
diff --git a/t/t5544-pull-upstream.sh b/t/t5544-pull-upstream.sh
new file mode 100755
index 0000000..59f009d
--- /dev/null
+++ b/t/t5544-pull-upstream.sh
@@ -0,0 +1,164 @@
+#!/bin/sh
+
+test_description='pull with --set-upstream'
+. ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-terminal.sh
+
+test_config_unchanged () {
+	git config --list --local >original
+	"$@"
+	git config --list --local >modified
+	test_cmp original modified
+}
+
+check_config () {
+	(echo "$2"; echo "$3") >expect
+	(git config branch.$1.remote
+	 git config branch.$1.merge) >actual
+	test_cmp expect actual
+}
+
+test_expect_success 'setup repos' '
+	git init parent &&
+	(
+		cd parent &&
+		echo content >file &&
+		git add file &&
+		git commit -am one &&
+		git tag initial_tag &&
+		git checkout -b master2 &&
+		echo content_modified >file &&
+		git commit -am "file modification"
+		git checkout -b other master &&
+		echo content >file2 &&
+		git add file2 &&
+		git commit -am two &&
+		git checkout -b other2
+	) &&
+	git init step_parent &&
+	(
+		cd step_parent &&
+		echo step_content >step_file &&
+		git add step_file &&
+		git commit -m step_one
+	) &&
+	git remote add upstream parent &&
+	git remote add step_upstream step_parent &&
+	git pull upstream master &&
+	git branch other
+'
+
+test_expect_success 'pull -u master' '
+	git pull -u upstream master &&
+	check_config master upstream refs/heads/master
+'
+
+test_expect_success 'pull -u takes the last branch as upstream' '
+	test_might_fail git config --unset branch.master.merge &&
+	test_might_fail git config --unset branch.master.remote &&
+	git pull -u upstream master master2 &&
+	check_config master upstream refs/heads/master2
+'
+
+test_expect_success 'pull -u master:other' '
+	git pull -u upstream master:other &&
+	check_config other upstream refs/heads/master
+'
+
+
+test_expect_success 'pull -u tracking non-local branch' '
+	git checkout -b master2_local &&
+	git pull -u upstream master2 &&
+	check_config master2_local upstream refs/heads/master2
+'
+
+
+test_expect_success 'pull -u --dry-run other:other' '
+	git config branch.other.merge refs/heads/master &&
+	git config branch.other.remote upstream &&
+	git pull -u --dry-run upstream other:other &&
+	check_config other upstream refs/heads/master
+'
+
+test_expect_success 'pull -u master2:master2 master:other' '
+	git branch master2 &&
+	git pull -u upstream master2:master2 master:other &&
+	check_config master2 upstream refs/heads/master2 &&
+	check_config other upstream refs/heads/master
+'
+
+test_expect_success 'pull -u HEAD does not track origin/HEAD nor remote HEAD on origin' '
+	git checkout -b other_head master &&
+	git fetch upstream other &&
+	git remote set-head upstream other &&
+	test_config_unchanged git pull -u upstream HEAD
+'
+
+test_expect_success 'pull -u sets upstream when merge conflicts occur' '
+	git checkout -b master_edited master &&
+	echo conflict >file2 &&
+	git add file2 &&
+	git commit -am conflict &&
+	test_must_fail git pull -u upstream other &&
+	git rm file2 &&
+	git commit &&
+	check_config master_edited upstream refs/heads/other
+'
+
+test_expect_success 'pull -u should not work when merging unrelated histories' '
+	git checkout master &&
+	test_config_unchanged test_must_fail git pull -u step_parent master
+'
+
+test_expect_success 'pull -u sets upstream after rebasing' '
+	git checkout -b other_rebased other &&
+	git config branch.other_rebased.merge master &&
+	ls .git/refs/remotes/upstream &&
+	git pull -r -u upstream master2 &&
+	git branch --set-upstream-to=upstream/master2 &&
+	ls .git/refs/remotes/upstream &&
+	check_config other_rebased upstream refs/heads/master2
+'
+
+test_expect_success 'pull -u refs/heads/*:refs/remotes/origin/* should not work' '
+	git checkout master &&
+	test_config_unchanged git pull -u upstream "refs/heads/*:refs/remotes/upstream/*"
+'
+
+test_expect_success 'pull -u master:refs/remotes/origin/master should not work' '
+	test_config_unchanged git pull -u upstream master:refs/remotes/upstream/master
+'
+
+test_expect_success 'pull -u with a tag should not work' '
+	git checkout master &&
+	test_config_unchanged git pull -u upstream initial_tag
+'
+
+test_expect_success 'pull -u on detached head should not work' '
+	git checkout HEAD^0 &&
+	test_config_unchanged git pull -u upstream master2 &&
+	git checkout -
+'
+
+test_expect_success 'pull -u with an unconfigured remote should not work' '
+	git checkout master &&
+	git clone parent unconfigured_parent &&
+	test_config_unchanged git pull -u "$(pwd)/unconfigured_parent" other2
+'
+
+test_expect_success 'pull -u with a modified remote.fetch' '
+	git remote add origin_modified parent &&
+	git push upstream master:custom_branch &&
+	git config remote.origin_modified.fetch "+refs/heads/*:refs/remotes/custom/*" &&
+	git checkout -b lonely_branch master &&
+	git pull -u origin_modified custom_branch &&
+	check_config lonely_branch origin_modified refs/heads/custom_branch
+'
+
+test_expect_success TTY 'quiet pull' '
+	git checkout master &&
+	test_terminal git pull -u --quiet upstream master 2>&1 | tee output &&
+	test_cmp /dev/null output
+'
+
+test_done
-- 
2.8.2.662.g22d3535.dirty

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH v2] pull: add --set-upstream
  2016-06-06  9:34 ` [RFC/PATCH v2] pull: add --set-upstream Erwan Mathoniere
@ 2016-06-06 15:54   ` Matthieu Moy
  2016-06-06 19:06     ` Junio C Hamano
  2016-06-07 12:43     ` Erwan Mathoniere
  2016-06-06 16:29   ` Philip Oakley
  1 sibling, 2 replies; 12+ messages in thread
From: Matthieu Moy @ 2016-06-06 15:54 UTC (permalink / raw)
  To: Erwan Mathoniere; +Cc: git, jordan.de-gea, samuel.groot, tom.russello, gitster

Erwan Mathoniere <erwan.mathoniere@grenoble-inp.org> writes:

> @@ -497,6 +504,10 @@ static void NORETURN die_no_merge_candidates(const char *repo, const char **refs
>  		fprintf(stderr, "\n");
>  		fprintf_ln(stderr, _("If you wish to set tracking information for this branch you can do so with:"));
>  		fprintf(stderr, "\n");
> +		fprintf_ln(stderr, "    git pull -u %s %s", _("<remote>"), _("<branch>"));

I'd rather use the long syntax "--set-upstream" in the advice. It gives
a hint to the user about what the command is actually going to do.

> +static void set_pull_upstream(const char *repo, const char **refspecs_name)
> +{
> +	unsigned char sha1[GIT_SHA1_RAWSZ];

The trend in the codebase is to use object_id instead of these char
sha1[] arrays. See the output of "git log --grep object_id" for details.

> +	strbuf_init(&buf, 0);
> +	refspec = parse_fetch_refspec(nr_refspec, refspecs_name);
> +
> +	for (i = 0; i < nr_refspec; i++) {
> +		if (refspec[i].pattern) {
> +			warning(_("upstream cannot be set for patterns"));
> +			continue;
> +		}
> +
> +		branch = branch_get(refspec[i].dst);
> +		if (!branch || !ref_exists(branch->refname)) {
> +			if (!refspec[i].dst || !*refspec[i].dst)
> +				warning(_("could not set upstream of HEAD when "
> +					"it does not point to any branch."));
> +			else
> +				warning(_("cannot set upstream: "
> +					"'%s' is not a branch"), refspec[i].dst);

Inconsistent message: "could not"/"cannot".

For this kind of code, it greatly helps to have comments refering to the
input syntax for each branch of each conditionals. I'm not familiar with
this part of the code, but if I understood correctly, the above would be

if ()
	/* refspec: <branch>: */
        warning(...);
else
	/* refspec: <branch>:<no-such-branch> */
        warning(...);

I think it would make sense to actually raise an error (i.e. set the
exit status of the "git pull" process to a non-zero value) when such
issue occur. OTOH, "git push --set-upstream" does not even issue a
warning when trying to --set-upstream to a tag for example, so not
raising an error is consistant.

>  int cmd_pull(int argc, const char **argv, const char *prefix)
>  {
> +	int ret;
>  	const char *repo, **refspecs;
>  	struct sha1_array merge_heads = SHA1_ARRAY_INIT;
>  	unsigned char orig_head[GIT_SHA1_RAWSZ], curr_head[GIT_SHA1_RAWSZ];
> @@ -918,11 +1013,16 @@ int cmd_pull(int argc, const char **argv, const char *prefix)
>  	if (is_null_sha1(orig_head)) {
>  		if (merge_heads.nr > 1)
>  			die(_("Cannot merge multiple branches into empty head."));
> -		return pull_into_void(*merge_heads.sha1, curr_head);
> +		ret = pull_into_void(*merge_heads.sha1, curr_head);
>  	} else if (opt_rebase) {
>  		if (merge_heads.nr > 1)
>  			die(_("Cannot rebase onto multiple branches."));
> -		return run_rebase(curr_head, *merge_heads.sha1, rebase_fork_point);
> +		ret = run_rebase(curr_head, *merge_heads.sha1, rebase_fork_point);
>  	} else
> -		return run_merge();
> +		ret = run_merge();
> +
> +	if (opt_set_upstream && ret < 128)

Shouldn't this be "ret == 0"?

> --- /dev/null
> +++ b/t/t5544-pull-upstream.sh
> @@ -0,0 +1,164 @@
> +#!/bin/sh
> +
> +test_description='pull with --set-upstream'
> +. ./test-lib.sh
> +. "$TEST_DIRECTORY"/lib-terminal.sh
> +
> +test_config_unchanged () {
> +	git config --list --local >original
> +	"$@"
> +	git config --list --local >modified
> +	test_cmp original modified
> +}

The test passes if "$@" fails. You should &&-chain the lines here to
catch things like crashes or unexpected "exit 1" in git.

> +check_config () {

Perhaps "check_upstream" is more expressive.

> +	(echo "$2"; echo "$3") >expect

What happened to Junio's suggestion with test_write_lines?

> +test_expect_success 'setup repos' '
> +	git init parent &&
> +	(
> +		cd parent &&
> +		echo content >file &&
> +		git add file &&
> +		git commit -am one &&

test_commit can make this code simpler.

> +		echo content_modified >file &&
> +		git commit -am "file modification"

Likewise.

> +test_expect_success 'pull -u master' '
> +	git pull -u upstream master &&
> +	check_config master upstream refs/heads/master
> +'
> +
> +test_expect_success 'pull -u takes the last branch as upstream' '
> +	test_might_fail git config --unset branch.master.merge &&
> +	test_might_fail git config --unset branch.master.remote &&
> +	git pull -u upstream master master2 &&
> +	check_config master upstream refs/heads/master2
> +'
> +
> +test_expect_success 'pull -u master:other' '
> +	git pull -u upstream master:other &&
> +	check_config other upstream refs/heads/master
> +'
> +
> +

Nit: two blank lines instead of one.

> +test_expect_success 'pull -u refs/heads/*:refs/remotes/origin/* should not work' '
> +	git checkout master &&
> +	test_config_unchanged git pull -u upstream "refs/heads/*:refs/remotes/upstream/*"
> +'
> +
> +test_expect_success 'pull -u master:refs/remotes/origin/master should not work' '
> +	test_config_unchanged git pull -u upstream master:refs/remotes/upstream/master
> +'
> +
> +test_expect_success 'pull -u with a tag should not work' '
> +	git checkout master &&
> +	test_config_unchanged git pull -u upstream initial_tag
> +'
> +
> +test_expect_success 'pull -u on detached head should not work' '
> +	git checkout HEAD^0 &&
> +	test_config_unchanged git pull -u upstream master2 &&
> +	git checkout -
> +'

For all these "test_config_unchanged", it would be nice to check that
the error message is the right one too.

-- 
Matthieu Moy
http://www-verimag.imag.fr/~moy/

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH v2] pull: add --set-upstream
  2016-06-06 15:54   ` Matthieu Moy
@ 2016-06-06 19:06     ` Junio C Hamano
  2016-06-07  7:06       ` Matthieu Moy
  2016-06-07 13:15       ` Erwan Mathoniere
  2016-06-07 12:43     ` Erwan Mathoniere
  1 sibling, 2 replies; 12+ messages in thread
From: Junio C Hamano @ 2016-06-06 19:06 UTC (permalink / raw)
  To: Matthieu Moy
  Cc: Erwan Mathoniere, git, jordan.de-gea, samuel.groot, tom.russello

Matthieu Moy <Matthieu.Moy@grenoble-inp.fr> writes:

>> +test_config_unchanged () {
>> +	git config --list --local >original
>> +	"$@"
>> +	git config --list --local >modified
>> +	test_cmp original modified
>> +}
>
> The test passes if "$@" fails. You should &&-chain the lines here to
> catch things like crashes or unexpected "exit 1" in git.

That is true, but allowing "$@" failure may be deliberate.  After
all, "git pull -u there that", when pulling that from there fails,
may not want to update the upstream tracking information.

But I am unhappy with a more serious problem with the tests in this
patch.  They assume that "-u" option will forever be the only thing
that is allowed to modify the configuration during "git pull -u".
It should never make such an assumption.

The only thing these additional tests later in the patch (ommitted)
want to check, if I understand them correctly, is that when -u is
used on a ref that shouldn't be tracked from the given remote then 
remote.<that remote>.merge etc. are not updated.  Make a list of the
configuration variables the feature cares about, and check them and
ignore changes to any other variable.  Somebody else's feature that
will be added to "git pull" may have legitimate reason to update
configuration variables that are not releated to this feature, and
you shouldn't be writing your test for your feature in such a way
to forbid such a new feature by others from being added.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH v2] pull: add --set-upstream
  2016-06-06 19:06     ` Junio C Hamano
@ 2016-06-07  7:06       ` Matthieu Moy
  2016-06-07 12:54         ` Erwan Mathoniere
  2016-06-07 13:15       ` Erwan Mathoniere
  1 sibling, 1 reply; 12+ messages in thread
From: Matthieu Moy @ 2016-06-07  7:06 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Erwan Mathoniere, git, jordan.de-gea, samuel.groot, tom.russello

Junio C Hamano <gitster@pobox.com> writes:

> Matthieu Moy <Matthieu.Moy@grenoble-inp.fr> writes:
>
>>> +test_config_unchanged () {
>>> +	git config --list --local >original
>>> +	"$@"
>>> +	git config --list --local >modified
>>> +	test_cmp original modified
>>> +}
>>
>> The test passes if "$@" fails. You should &&-chain the lines here to
>> catch things like crashes or unexpected "exit 1" in git.
>
> That is true, but allowing "$@" failure may be deliberate. 

I don't think so:

+test_expect_success 'pull -u should not work when merging unrelated histories' '
+	git checkout master &&
+	test_config_unchanged test_must_fail git pull -u step_parent master
+'

;-)

-- 
Matthieu Moy
http://www-verimag.imag.fr/~moy/

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH v2] pull: add --set-upstream
  2016-06-07  7:06       ` Matthieu Moy
@ 2016-06-07 12:54         ` Erwan Mathoniere
  0 siblings, 0 replies; 12+ messages in thread
From: Erwan Mathoniere @ 2016-06-07 12:54 UTC (permalink / raw)
  To: Matthieu Moy, Junio C Hamano
  Cc: git, jordan.de-gea, samuel.groot, tom.russello



On 07/06/2016 09:06, Matthieu Moy wrote:
> Junio C Hamano <gitster@pobox.com> writes:
>
>> Matthieu Moy <Matthieu.Moy@grenoble-inp.fr> writes:
>>
>>>> +test_config_unchanged () {
>>>> +	git config --list --local >original
>>>> +	"$@"
>>>> +	git config --list --local >modified
>>>> +	test_cmp original modified
>>>> +}
>>>
>>> The test passes if "$@" fails. You should &&-chain the lines here to
>>> catch things like crashes or unexpected "exit 1" in git.
>>
>> That is true, but allowing "$@" failure may be deliberate.
>
> I don't think so:
>
> +test_expect_success 'pull -u should not work when merging unrelated histories' '
> +	git checkout master &&
> +	test_config_unchanged test_must_fail git pull -u step_parent master
> +'
>
> ;-)
>

When writing `test_config_unchanged` procedure, I wanted to return the 
comparison even if "$@" failed. But it's indeed not consistent with the 
way I wrote the tests.

Adding "&&" between instructions and using `test_must_fail` when calling 
the procedure is clearer and more logical.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH v2] pull: add --set-upstream
  2016-06-06 19:06     ` Junio C Hamano
  2016-06-07  7:06       ` Matthieu Moy
@ 2016-06-07 13:15       ` Erwan Mathoniere
  1 sibling, 0 replies; 12+ messages in thread
From: Erwan Mathoniere @ 2016-06-07 13:15 UTC (permalink / raw)
  To: Junio C Hamano, Matthieu Moy
  Cc: git, jordan.de-gea, samuel.groot, tom.russello



On 06/06/2016 21:06, Junio C Hamano wrote:
>
> But I am unhappy with a more serious problem with the tests in this
> patch.  They assume that "-u" option will forever be the only thing
> that is allowed to modify the configuration during "git pull -u".
> It should never make such an assumption.
>
> The only thing these additional tests later in the patch (ommitted)
> want to check, if I understand them correctly, is that when -u is
> used on a ref that shouldn't be tracked from the given remote then
> remote.<that remote>.merge etc. are not updated.  Make a list of the
> configuration variables the feature cares about, and check them and
> ignore changes to any other variable.  Somebody else's feature that
> will be added to "git pull" may have legitimate reason to update
> configuration variables that are not releated to this feature, and
> you shouldn't be writing your test for your feature in such a way
> to forbid such a new feature by others from being added.

I asked myself these questions but I came to the wrong conclusion since 
I considered that testing if `--set-upstream` doesn't alter any 
configuration var it shouldn't was also important. But there is no 
reason "git pull -u" modify the configuration in such a chaotic way.

I'll apply your suggestions, thanks.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH v2] pull: add --set-upstream
  2016-06-06 15:54   ` Matthieu Moy
  2016-06-06 19:06     ` Junio C Hamano
@ 2016-06-07 12:43     ` Erwan Mathoniere
  1 sibling, 0 replies; 12+ messages in thread
From: Erwan Mathoniere @ 2016-06-07 12:43 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: git, jordan.de-gea, samuel.groot, tom.russello, gitster



On 06/06/2016 17:54, Matthieu Moy wrote:
> Erwan Mathoniere <erwan.mathoniere@grenoble-inp.org> writes:
>
>> @@ -497,6 +504,10 @@ static void NORETURN die_no_merge_candidates(const char *repo, const char **refs
>>  		fprintf(stderr, "\n");
>>  		fprintf_ln(stderr, _("If you wish to set tracking information for this branch you can do so with:"));
>>  		fprintf(stderr, "\n");
>> +		fprintf_ln(stderr, "    git pull -u %s %s", _("<remote>"), _("<branch>"));
>
> I'd rather use the long syntax "--set-upstream" in the advice. It gives
> a hint to the user about what the command is actually going to do.

You're right, I'll change it.

>> +static void set_pull_upstream(const char *repo, const char **refspecs_name)
>> +{
>> +	unsigned char sha1[GIT_SHA1_RAWSZ];
>
> The trend in the codebase is to use object_id instead of these char
> sha1[] arrays. See the output of "git log --grep object_id" for details.

I'll dig into it, thanks.

>> +	strbuf_init(&buf, 0);
>> +	refspec = parse_fetch_refspec(nr_refspec, refspecs_name);
>> +
>> +	for (i = 0; i < nr_refspec; i++) {
>> +		if (refspec[i].pattern) {
>> +			warning(_("upstream cannot be set for patterns"));
>> +			continue;
>> +		}
>> +
>> +		branch = branch_get(refspec[i].dst);
>> +		if (!branch || !ref_exists(branch->refname)) {
>> +			if (!refspec[i].dst || !*refspec[i].dst)
>> +				warning(_("could not set upstream of HEAD when "
>> +					"it does not point to any branch."));
>> +			else
>> +				warning(_("cannot set upstream: "
>> +					"'%s' is not a branch"), refspec[i].dst);
>
> Inconsistent message: "could not"/"cannot".

I copied/pasted the first warning from another portion of code in order 
to avoid useless translation work, but it isn't really relevant in that 
case.

> For this kind of code, it greatly helps to have comments refering to the
> input syntax for each branch of each conditionals. I'm not familiar with
> this part of the code, but if I understood correctly, the above would be
>
> if ()
> 	/* refspec: <branch>: */
>         warning(...);
> else
> 	/* refspec: <branch>:<no-such-branch> */
>         warning(...);

Good idea, some part aren't so easy to link to the input.

> I think it would make sense to actually raise an error (i.e. set the
> exit status of the "git pull" process to a non-zero value) when such
> issue occur. OTOH, "git push --set-upstream" does not even issue a
> warning when trying to --set-upstream to a tag for example, so not
> raising an error is consistant.

Since the idea is to have a similar behavior to `git push 
--set-upstream`, I think it's not relevant to raise an error when `git 
push` doesn't. And for a minor feature, it seems disproportionate to 
consider a `git pull -u ...` as a failure when the entire process 
succeeded except for the upstream part.

>>  int cmd_pull(int argc, const char **argv, const char *prefix)
>>  {
>> +	int ret;
>>  	const char *repo, **refspecs;
>>  	struct sha1_array merge_heads = SHA1_ARRAY_INIT;
>>  	unsigned char orig_head[GIT_SHA1_RAWSZ], curr_head[GIT_SHA1_RAWSZ];
>> @@ -918,11 +1013,16 @@ int cmd_pull(int argc, const char **argv, const char *prefix)
>>  	if (is_null_sha1(orig_head)) {
>>  		if (merge_heads.nr > 1)
>>  			die(_("Cannot merge multiple branches into empty head."));
>> -		return pull_into_void(*merge_heads.sha1, curr_head);
>> +		ret = pull_into_void(*merge_heads.sha1, curr_head);
>>  	} else if (opt_rebase) {
>>  		if (merge_heads.nr > 1)
>>  			die(_("Cannot rebase onto multiple branches."));
>> -		return run_rebase(curr_head, *merge_heads.sha1, rebase_fork_point);
>> +		ret = run_rebase(curr_head, *merge_heads.sha1, rebase_fork_point);
>>  	} else
>> -		return run_merge();
>> +		ret = run_merge();
>> +
>> +	if (opt_set_upstream && ret < 128)
>
> Shouldn't this be "ret == 0"?

The feature activates after rebasing/merging. When there are merge 
conflicts, `run_merge` doesn't return a non-null error code and we want 
to set upstream even in that case.
On the other hand, when merge fails (e.g. histories aren't related) it 
stops itself by invoking `die` and returning a >= 128 error code. We 
don't want to proceed in that case.

>> +check_config () {
>
> Perhaps "check_upstream" is more expressive.

I took this procedure from t5523-push-upstream, but you're right it's 
more relevant.

>> +	(echo "$2"; echo "$3") >expect
>
> What happened to Junio's suggestion with test_write_lines?

Oups,  small oversight.

>> +test_expect_success 'setup repos' '
>> +	git init parent &&
>> +	(
>> +		cd parent &&
>> +		echo content >file &&
>> +		git add file &&
>> +		git commit -am one &&
>
> test_commit can make this code simpler.
>
>> +		echo content_modified >file &&
>> +		git commit -am "file modification"
>
> Likewise.
>

Thanks, I'll take a look at it.

>> +test_expect_success 'pull -u with a tag should not work' '
>> +	git checkout master &&
>> +	test_config_unchanged git pull -u upstream initial_tag
>> +'
>> +
>> +test_expect_success 'pull -u on detached head should not work' '
>> +	git checkout HEAD^0 &&
>> +	test_config_unchanged git pull -u upstream master2 &&
>> +	git checkout -
>> +'
>
> For all these "test_config_unchanged", it would be nice to check that
> the error message is the right one too.

Ok, I'll add checks on that.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH v2] pull: add --set-upstream
  2016-06-06  9:34 ` [RFC/PATCH v2] pull: add --set-upstream Erwan Mathoniere
  2016-06-06 15:54   ` Matthieu Moy
@ 2016-06-06 16:29   ` Philip Oakley
  2016-06-07 13:42     ` Erwan Mathoniere
  1 sibling, 1 reply; 12+ messages in thread
From: Philip Oakley @ 2016-06-06 16:29 UTC (permalink / raw)
  To: Erwan Mathoniere, git
  Cc: jordan.de-gea, samuel.groot, erwan.mathoniere, tom.russello,
	gitster, Matthieu Moy

From: "Erwan Mathoniere" <erwan.mathoniere@grenoble-inp.org>
> Implement `git pull [--set-upstream | -u] <remote> <refspecs>` that set
> tracking to the remote branch the user just pulled from.
>
> After successfully pulling from `<remote>`, for each `<refspec>`
> described in format `<remote_branch>:<local_branch>`, set
> `branch.<local_branch>.remote` to `<remote>` and
> `branch.<local_branch>.merge` to `refs/heads/<remote_branch>`. If
> `<refspec>` lacks `<local_branch>` in the previous format or directly
> refers to a branch, use the current branch as `<local_branch>` in the
> above configuration setting.
>
> `git push` has already its `--set-upstream`, it makes sense to have its
> symmetrical for `git pull`.
>
> For a beginner, when trying to use argumentless `git pull` without
> tracking information set, advising to use
> `git branch --set-upstream-to` to set upstream can be quite confusing.
> Using this `git pull --set-upstream` is easier and more natural.
>
> Signed-off-by: Erwan Mathoniere <erwan.mathoniere@grenoble-inp.org>
> Signed-off-by: Jordan De Gea <jordan.de-gea@grenoble-inp.org>
> Signed-off-by: Matthieu Moy <matthieu.moy@grenoble-inp.fr>
> ---
>
> Changes from v1:
> - Code reshaped to :
>  * warn + no-op when pulling from or to something that isn't a branch
> or a configured remote
>  * set upstream only after successfully merging/rebasing
> - More relevant documentation
> - Tests reshaped to be more independent from each others
> - More tests (tags, detached heads, non-configured remote...)
>
>
> For now, the documentation is quite hard to understand, but I didn't
> figure how to explain without using too technical words. Should it stay
> as it is or should I write something similar the above commit message?
>
> Allowing to set non-configured repository as upstream isn't easy to
> handle since the type of refspec must be checked and this is done by
> verifying the existence of the remote-tracking branch at
> `refs/remotes/<remote>/<branch>`.
>
>
> Documentation/git-pull.txt |  18 +++++
> builtin/pull.c             | 106 ++++++++++++++++++++++++++++-
> t/t5544-pull-upstream.sh   | 164 
> +++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 285 insertions(+), 3 deletions(-)
> create mode 100755 t/t5544-pull-upstream.sh
>
> diff --git a/Documentation/git-pull.txt b/Documentation/git-pull.txt
> index d033b25..6ae5e58 100644
> --- a/Documentation/git-pull.txt
> +++ b/Documentation/git-pull.txt
> @@ -93,6 +93,24 @@ OPTIONS
>  has to be called afterwards to bring the work tree up to date with the
>  merge result.
>
> +-u::
> +--set-upstream::
> + After successfully pulling from explicitly given <repository> and

s/from explicitly/from an explicitly/

> + <refspecs>, set the configuration of the local branches pulled on, so

s/branches pulled on/branches that were pulled/

> + that each one tracks the remote branch pulled from. If a configuration
> + already exists, it is overwriten. For example, with `git pull -u origin
> + branch` the current branch will track `branch` from `origin`.
> ++
> +If two or more branches are pulled on the same local branch, only the 
> last one
> +in arguments will be tracked.

Is this specific to this pull --setupstream or a general worning ? i.e. that 
a second entry is created in the config file, or that only the last branch 
refspec will be added?

> ++
> +The given <repository> must be a configured remote. Can only set tracking 
> to
> +remote branches (e.g. can't set upstream to remote HEAD).
> ++
> +Works symmetrically as `--set-upstream` for linkgit:git-push[1]. Allow 
> using
> +argumentless linkgit:git-pull[1] and other commands.  For more 
> information, see
> +`branch.<name>.merge` in linkgit:git-config[1].
> +
> Options related to merging
> ~~~~~~~~~~~~~~~~~~~~~~~~~~
>
> diff --git a/builtin/pull.c b/builtin/pull.c
[snip] 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH v2] pull: add --set-upstream
  2016-06-06 16:29   ` Philip Oakley
@ 2016-06-07 13:42     ` Erwan Mathoniere
  0 siblings, 0 replies; 12+ messages in thread
From: Erwan Mathoniere @ 2016-06-07 13:42 UTC (permalink / raw)
  To: Philip Oakley, git
  Cc: jordan.de-gea, samuel.groot, tom.russello, gitster, Matthieu Moy

On 06/06/2016 18:29, Philip Oakley wrote:
>> + that each one tracks the remote branch pulled from. If a configuration
>> + already exists, it is overwriten. For example, with `git pull -u origin
>> + branch` the current branch will track `branch` from `origin`.
>> ++
>> +If two or more branches are pulled on the same local branch, only the
>> last one
>> +in arguments will be tracked.
>
> Is this specific to this pull --setupstream or a general worning ? i.e.
> that a second entry is created in the config file, or that only the last
> branch refspec will be added?

Only the last branch will be added. More precisely, its behavior is just 
like `git push --set-upstream`. If you do `git push --set-upstream 
master:master master:other`, git will change its configuration twice and 
will print out:

$ git push --set-upstream origin master:master master:other
[...]
Branch master set up to track remote branch master from origin.
Branch master set up to track remote branch other from origin.

And at the end, "master" will only track "other" from origin and both 
"branch.master.{merge, remote}" will be set once.
So for now, `git pull --set-upstream` does the same and for example, on 
master:

$ git pull --set-upstream origin master other
[...]
Branch master set up to track remote branch master from origin.
Branch master set up to track remote branch other from origin.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2016-06-07 13:42 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-05-25 15:25 [RFC/PATCH] pull: set-upstream implementation Erwan Mathoniere
2016-05-25 18:09 ` Junio C Hamano
2016-05-29 20:00   ` Erwan Mathoniere
2016-06-06  9:34 ` [RFC/PATCH v2] pull: add --set-upstream Erwan Mathoniere
2016-06-06 15:54   ` Matthieu Moy
2016-06-06 19:06     ` Junio C Hamano
2016-06-07  7:06       ` Matthieu Moy
2016-06-07 12:54         ` Erwan Mathoniere
2016-06-07 13:15       ` Erwan Mathoniere
2016-06-07 12:43     ` Erwan Mathoniere
2016-06-06 16:29   ` Philip Oakley
2016-06-07 13:42     ` Erwan Mathoniere

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).