git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] git-fetch: avoid local fetching from alternate (again)
@ 2007-11-07  2:41 Shawn O. Pearce
  2007-11-07  6:24 ` Junio C Hamano
  2007-11-07  8:12 ` Johannes Sixt
  0 siblings, 2 replies; 7+ messages in thread
From: Shawn O. Pearce @ 2007-11-07  2:41 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught
git-fetch to avoid copying objects when we are fetching from
a repository that is already registered as an alternate object
database.  In such a case there is no reason to copy any objects
as we can already obtain them through the alternate.

However we need to ensure the objects are all reachable, so we
run `git rev-list --objects $theirs --not --all` to verify this.
If any object is missing or unreadable then we need to instead copy
the objects from the remote.  When a missing object is detected
the git-rev-list process will exit with a non-zero exit status,
making this condition quite easy to detect.

Although git-fetch is currently a builtin (and so is rev-list) we
really cannot invoke the traverse_objects() API at this point in the
transport code.  The object walker within traverse_objects() calls
die() as soon as it finds an object it cannot read.  If that happens
we want to resume the fetch process by calling do_fetch_pack(),
instead of terminating.  To get aroaund this we spawn git-rev-list
into a background process to prevent a die() from killing the
foreground fetch process.

We aren't interested in the output of rev-list (a list of SHA-1
object names that are reachable) or its errors (a "spurious" error
about an object not being found as we need to copy it) so we redirect
both stdout and stderr to /dev/null.

Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
 run-command.c |    6 ++++--
 run-command.h |    1 +
 transport.c   |   48 +++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/run-command.c b/run-command.c
index d99a6c4..476d00c 100644
--- a/run-command.c
+++ b/run-command.c
@@ -41,7 +41,7 @@ int start_command(struct child_process *cmd)
 		cmd->close_out = 1;
 	}
 
-	need_err = cmd->err < 0;
+	need_err = !cmd->no_stderr && cmd->err < 0;
 	if (need_err) {
 		if (pipe(fderr) < 0) {
 			if (need_in)
@@ -87,7 +87,9 @@ int start_command(struct child_process *cmd)
 			close(cmd->out);
 		}
 
-		if (need_err) {
+		if (cmd->no_stderr)
+			dup_devnull(2);
+		else if (need_err) {
 			dup2(fderr[1], 2);
 			close_pair(fderr);
 		}
diff --git a/run-command.h b/run-command.h
index 94e1e9d..1fc781d 100644
--- a/run-command.h
+++ b/run-command.h
@@ -23,6 +23,7 @@ struct child_process {
 	unsigned close_out:1;
 	unsigned no_stdin:1;
 	unsigned no_stdout:1;
+	unsigned no_stderr:1;
 	unsigned git_cmd:1; /* if this is to be git sub-command */
 	unsigned stdout_to_stderr:1;
 };
diff --git a/transport.c b/transport.c
index f4577b7..8505a84 100644
--- a/transport.c
+++ b/transport.c
@@ -615,17 +615,56 @@ static struct ref *get_refs_via_connect(struct transport *transport)
 	return refs;
 }
 
+static int fetch_local_nocopy(struct transport *transport,
+			       int nr_heads, struct ref **to_fetch)
+{
+	struct stat sb;
+	struct child_process revlist;
+	char **argv;
+	int i, j, err;
+
+	if (stat(transport->url, &sb) || !S_ISDIR(sb.st_mode))
+		return -1;
+
+	i = 0;
+	argv = xmalloc(sizeof(*argv) * (nr_heads + 5));
+	argv[i++] = xstrdup("rev-list");
+	argv[i++] = xstrdup("--objects");
+	for (j = 0; j < nr_heads; j++)
+		argv[i++] = xstrdup(sha1_to_hex(to_fetch[j]->old_sha1));
+	argv[i++] = xstrdup("--not");
+	argv[i++] = xstrdup("--all");
+	argv[i++] = NULL;
+
+	memset(&revlist, 0, sizeof(revlist));
+	revlist.argv = (const char**)argv;
+	revlist.git_cmd = 1;
+	revlist.no_stdin = 1;
+	revlist.no_stdout = 1;
+	revlist.no_stderr = 1;
+	err = start_command(&revlist);
+	if (!err)
+		err |= finish_command(&revlist);
+
+	for (i = 0; argv[i]; i++)
+		free(argv[i]);
+	free(argv);
+	return err;
+}
+
 static int fetch_refs_via_pack(struct transport *transport,
 			       int nr_heads, struct ref **to_fetch)
 {
 	struct git_transport_data *data = transport->data;
-	char **heads = xmalloc(nr_heads * sizeof(*heads));
-	char **origh = xmalloc(nr_heads * sizeof(*origh));
+	char **heads, **origh;
 	struct ref *refs;
-	char *dest = xstrdup(transport->url);
+	char *dest;
 	struct fetch_pack_args args;
 	int i;
 
+	if (!fetch_local_nocopy(transport, nr_heads, to_fetch))
+		return 0;
+
 	memset(&args, 0, sizeof(args));
 	args.uploadpack = data->uploadpack;
 	args.keep_pack = data->keep;
@@ -634,6 +673,9 @@ static int fetch_refs_via_pack(struct transport *transport,
 	args.verbose = transport->verbose > 0;
 	args.depth = data->depth;
 
+	heads = xmalloc(nr_heads * sizeof(*heads));
+	origh = xmalloc(nr_heads * sizeof(*origh));
+	dest = xstrdup(transport->url);
 	for (i = 0; i < nr_heads; i++)
 		origh[i] = heads[i] = xstrdup(to_fetch[i]->name);
 	refs = fetch_pack(&args, dest, nr_heads, heads, &transport->pack_lockfile);
-- 
1.5.3.5.1590.gfadfad

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] git-fetch: avoid local fetching from alternate (again)
  2007-11-07  2:41 [PATCH] git-fetch: avoid local fetching from alternate (again) Shawn O. Pearce
@ 2007-11-07  6:24 ` Junio C Hamano
  2007-11-07  7:45   ` Junio C Hamano
                     ` (2 more replies)
  2007-11-07  8:12 ` Johannes Sixt
  1 sibling, 3 replies; 7+ messages in thread
From: Junio C Hamano @ 2007-11-07  6:24 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: git

"Shawn O. Pearce" <spearce@spearce.org> writes:

> Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught
> git-fetch to avoid copying objects when we are fetching from
> a repository that is already registered as an alternate object
> database.  In such a case there is no reason to copy any objects
> as we can already obtain them through the alternate.

Well spotted.  It would be a good idea to commit the big comment
from contrib/examples/git-fetch.sh to fetch_local_nocopy()
function, which would have made us realize that the patch does
not refrain from applying this optimization even when shallow
is in effect.  But I think that is actually a good change.

The run-command change and the main part of the fix are
logically independent.

The regression the patch fixes should be testable with a
script.  Please have a new test for it.

Thanks.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] git-fetch: avoid local fetching from alternate (again)
  2007-11-07  6:24 ` Junio C Hamano
@ 2007-11-07  7:45   ` Junio C Hamano
  2007-11-07 20:32   ` Junio C Hamano
  2007-11-08  8:04   ` Shawn O. Pearce
  2 siblings, 0 replies; 7+ messages in thread
From: Junio C Hamano @ 2007-11-07  7:45 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: git

Junio C Hamano <gitster@pobox.com> writes:

> "Shawn O. Pearce" <spearce@spearce.org> writes:
>
>> Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught
>> git-fetch to avoid copying objects when we are fetching from
>> a repository that is already registered as an alternate object
>> database.  In such a case there is no reason to copy any objects
>> as we can already obtain them through the alternate.
>
> Well spotted.  It would be a good idea to commit the big comment
> from contrib/examples/git-fetch.sh to fetch_local_nocopy()
> function, which would have made us realize that the patch does
> not refrain from applying this optimization even when shallow
> is in effect.  But I think that is actually a good change.

Having thought about this further by writing that comment myself
(patch attached), I suspect that the test at the beginning of
the function to see if we are talking to another local
repository is not necessary.  Even if we are _not_ fetching from
the remote, we could have all the necessary objects connected,
albeit the chance of that is rather slim.  But that means the
rev-list test will error out rather quickly because objects near
the tips are more likely to be missing, and if we are talking
about a true remote connection, networking cost will most likely
outweigh the cost to do this checking.

---

 transport.c |   14 ++++++++++++++
 1 files changed, 14 insertions(+), 0 deletions(-)

diff --git a/transport.c b/transport.c
index 0604dc6..a887491 100644
--- a/transport.c
+++ b/transport.c
@@ -614,6 +614,20 @@ static struct ref *get_refs_via_connect(const struct transport *transport)
 	return refs;
 }
 
+/*
+ * We would want to bypass the object transfer altogether if
+ * everything we are going to fetch already exists and connected
+ * locally.
+ *
+ * The refs we are going to fetch are in to_fetch (nr_heads in
+ * total).  If running
+ *
+ *  $ git-rev-list --objects to_fetch[0] to_fetch[1] ... --not --all
+ *
+ * does not error out, that means everything reachable from the
+ * refs we are going to fetch exists and is connected to some of
+ * our existing refs.
+ */
 static int fetch_local_nocopy(struct transport *transport,
 			       int nr_heads, struct ref **to_fetch)
 {

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] git-fetch: avoid local fetching from alternate (again)
  2007-11-07  2:41 [PATCH] git-fetch: avoid local fetching from alternate (again) Shawn O. Pearce
  2007-11-07  6:24 ` Junio C Hamano
@ 2007-11-07  8:12 ` Johannes Sixt
  1 sibling, 0 replies; 7+ messages in thread
From: Johannes Sixt @ 2007-11-07  8:12 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: Junio C Hamano, git

Shawn O. Pearce schrieb:
> +	err = start_command(&revlist);
> +	if (!err)
> +		err |= finish_command(&revlist);

There's a short-hand:

	err = run_command(&revlist);

-- Hannes

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] git-fetch: avoid local fetching from alternate (again)
  2007-11-07  6:24 ` Junio C Hamano
  2007-11-07  7:45   ` Junio C Hamano
@ 2007-11-07 20:32   ` Junio C Hamano
  2007-11-08  7:35     ` Shawn O. Pearce
  2007-11-08  8:04   ` Shawn O. Pearce
  2 siblings, 1 reply; 7+ messages in thread
From: Junio C Hamano @ 2007-11-07 20:32 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: git

Junio C Hamano <gitster@pobox.com> writes:

> Well spotted.  It would be a good idea to commit the big comment
> from contrib/examples/git-fetch.sh to fetch_local_nocopy()
> function, which would have made us realize that the patch does
> not refrain from applying this optimization even when shallow
> is in effect.  But I think that is actually a good change.

I take this back.  This regresses badly.

Why?

Because the optimization is useless when we are trying to deepen
the shallow history.  When you are trying to deepen a shallow
history and the tips of remotes haven't moved since you fetched
from there the last time, you have everything near the tip, and
becuse your history is shallow, your ancestry chain is
cauterized to make it appear that the history is complete.  The
rev-list reachability test would not fail as we expect.

The breakage can be seen with t5500.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] git-fetch: avoid local fetching from alternate (again)
  2007-11-07 20:32   ` Junio C Hamano
@ 2007-11-08  7:35     ` Shawn O. Pearce
  0 siblings, 0 replies; 7+ messages in thread
From: Shawn O. Pearce @ 2007-11-08  7:35 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Junio C Hamano <gitster@pobox.com> wrote:
> Junio C Hamano <gitster@pobox.com> writes:
> 
> > Well spotted.  It would be a good idea to commit the big comment
> > from contrib/examples/git-fetch.sh to fetch_local_nocopy()
> > function, which would have made us realize that the patch does
> > not refrain from applying this optimization even when shallow
> > is in effect.  But I think that is actually a good change.
> 
> I take this back.  This regresses badly.
> 
> Why?

Whoops.  Good catch.  This is why you had a check for the shallow
history file in git-fetch.sh before you took this optimization path.
My fault for not including it in this patch.
 
> Because the optimization is useless when we are trying to deepen
> the shallow history.  When you are trying to deepen a shallow
> history and the tips of remotes haven't moved since you fetched
> from there the last time, you have everything near the tip, and
> becuse your history is shallow, your ancestry chain is
> cauterized to make it appear that the history is complete.  The
> rev-list reachability test would not fail as we expect.

What about just inserting a check to see if --depth was supplied
to git-fetch on the command line?  If so then we are deepening the
history and we just bypass the rev-list "fast path" test.

I will be posting an updated patch (series now) shortly.

-- 
Shawn.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] git-fetch: avoid local fetching from alternate (again)
  2007-11-07  6:24 ` Junio C Hamano
  2007-11-07  7:45   ` Junio C Hamano
  2007-11-07 20:32   ` Junio C Hamano
@ 2007-11-08  8:04   ` Shawn O. Pearce
  2 siblings, 0 replies; 7+ messages in thread
From: Shawn O. Pearce @ 2007-11-08  8:04 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Junio C Hamano <gitster@pobox.com> wrote:
> "Shawn O. Pearce" <spearce@spearce.org> writes:
> 
> > Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught
> > git-fetch to avoid copying objects when we are fetching from
> > a repository that is already registered as an alternate object
> > database.  In such a case there is no reason to copy any objects
> > as we can already obtain them through the alternate.
> 
> The regression the patch fixes should be testable with a
> script.  Please have a new test for it.

Hmmph.  t5502-quickfetch should have covered this.  It obviously
wasn't testing the right thing here.  I'll figure out why and post
a patch to fix t5502.

-- 
Shawn.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2007-11-08  8:05 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-11-07  2:41 [PATCH] git-fetch: avoid local fetching from alternate (again) Shawn O. Pearce
2007-11-07  6:24 ` Junio C Hamano
2007-11-07  7:45   ` Junio C Hamano
2007-11-07 20:32   ` Junio C Hamano
2007-11-08  7:35     ` Shawn O. Pearce
2007-11-08  8:04   ` Shawn O. Pearce
2007-11-07  8:12 ` Johannes Sixt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).