git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] Reduce the number of connects when fetching
@ 2007-11-04 21:28 Daniel Barkalow
  2007-11-06  1:51 ` Junio C Hamano
  0 siblings, 1 reply; 6+ messages in thread
From: Daniel Barkalow @ 2007-11-04 21:28 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

This shares the connection between getting the remote ref list and
getting objects in the first batch. (A second connection is still used
to follow tags)

It passes all of the tests, and should be fine, but I don't entirely 
understand the git protocol, so I'd like people who know it better to 
check this over. In particular, I don't know if there's a way to have the 
connection end up in a state where objects for more refs can be requested 
after some refs have been requested and the resulting objects read.

The idea is to keep the open connection in the data for the transport in 
between getting the list of refs and doing anything further. This 
therefore moves the connection-handling aspects outside of fetch-pack() 
and handles them primarily in transport.c.

This is on top of current next.
---
 builtin-fetch-pack.c |   74 ++++++++++++++++++++++++++-----------------------
 fetch-pack.h         |    2 +
 transport.c          |   41 +++++++++++++++++++--------
 3 files changed, 70 insertions(+), 47 deletions(-)

diff --git a/builtin-fetch-pack.c b/builtin-fetch-pack.c
index 862652b..7783c05 100644
--- a/builtin-fetch-pack.c
+++ b/builtin-fetch-pack.c
@@ -7,6 +7,7 @@
 #include "pack.h"
 #include "sideband.h"
 #include "fetch-pack.h"
+#include "remote.h"
 #include "run-command.h"
 
 static int transfer_unpack_limit = -1;
@@ -558,14 +559,14 @@ static int get_pack(int xd[2], char **pack_lockfile)
 }
 
 static struct ref *do_fetch_pack(int fd[2],
+		const struct ref *orig_ref,
 		int nr_match,
 		char **match,
 		char **pack_lockfile)
 {
-	struct ref *ref;
+	struct ref *ref = copy_ref_list(orig_ref);
 	unsigned char sha1[20];
 
-	get_remote_heads(fd[0], &ref, 0, NULL, 0);
 	if (is_repository_shallow() && !server_supports("shallow"))
 		die("Server does not support shallow clients");
 	if (server_supports("multi_ack")) {
@@ -583,10 +584,6 @@ static struct ref *do_fetch_pack(int fd[2],
 			fprintf(stderr, "Server supports side-band\n");
 		use_sideband = 1;
 	}
-	if (!ref) {
-		packet_flush(fd[1]);
-		die("no matching remote head");
-	}
 	if (everything_local(&ref, nr_match, match)) {
 		packet_flush(fd[1]);
 		goto all_done;
@@ -660,7 +657,7 @@ static void fetch_pack_setup(void)
 int cmd_fetch_pack(int argc, const char **argv, const char *prefix)
 {
 	int i, ret, nr_heads;
-	struct ref *ref;
+	struct ref *ref = NULL;
 	char *dest = NULL, **heads;
 
 	nr_heads = 0;
@@ -716,9 +713,34 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix)
 	if (!dest)
 		usage(fetch_pack_usage);
 
-	ref = fetch_pack(&args, dest, nr_heads, heads, NULL);
+	int fd[2];
+	struct child_process *conn = git_connect(fd, (char *)dest, args.uploadpack,
+                          args.verbose ? CONNECT_VERBOSE : 0);
+	if (conn) {
+		get_remote_heads(fd[0], &ref, 0, NULL, 0);
+
+		ref = fetch_pack(&args, fd, conn, ref, dest, nr_heads, heads, NULL);
+		close(fd[0]);
+		close(fd[1]);
+		if (finish_connect(conn))
+			ref = NULL;
+	} else {
+		ref = NULL;
+	}
 	ret = !ref;
 
+	if (!ret && nr_heads) {
+		/* If the heads to pull were given, we should have
+		 * consumed all of them by matching the remote.
+		 * Otherwise, 'git-fetch remote no-such-ref' would
+		 * silently succeed without issuing an error.
+		 */
+		for (i = 0; i < nr_heads; i++)
+			if (heads[i] && heads[i][0]) {
+				error("no such remote ref %s", heads[i]);
+				ret = 1;
+			}
+	}
 	while (ref) {
 		printf("%s %s\n",
 		       sha1_to_hex(ref->old_sha1), ref->name);
@@ -729,16 +751,15 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix)
 }
 
 struct ref *fetch_pack(struct fetch_pack_args *my_args,
+		       int fd[], pid_t pid,
+		       const struct ref *ref,
 		const char *dest,
 		int nr_heads,
 		char **heads,
 		char **pack_lockfile)
 {
-	int i, ret;
-	int fd[2];
-	struct child_process *conn;
-	struct ref *ref;
 	struct stat st;
+	struct ref *ref_cpy;
 
 	fetch_pack_setup();
 	memcpy(&args, my_args, sizeof(args));
@@ -747,29 +768,15 @@ struct ref *fetch_pack(struct fetch_pack_args *my_args,
 			st.st_mtime = 0;
 	}
 
-	conn = git_connect(fd, (char *)dest, args.uploadpack,
-                          args.verbose ? CONNECT_VERBOSE : 0);
 	if (heads && nr_heads)
 		nr_heads = remove_duplicates(nr_heads, heads);
-	ref = do_fetch_pack(fd, nr_heads, heads, pack_lockfile);
-	close(fd[0]);
-	close(fd[1]);
-	ret = finish_connect(conn);
-
-	if (!ret && nr_heads) {
-		/* If the heads to pull were given, we should have
-		 * consumed all of them by matching the remote.
-		 * Otherwise, 'git-fetch remote no-such-ref' would
-		 * silently succeed without issuing an error.
-		 */
-		for (i = 0; i < nr_heads; i++)
-			if (heads[i] && heads[i][0]) {
-				error("no such remote ref %s", heads[i]);
-				ret = 1;
-			}
+	if (!ref) {
+		packet_flush(fd[1]);
+		die("no matching remote head");
 	}
+	ref_cpy = do_fetch_pack(fd, ref, nr_heads, heads, pack_lockfile);
 
-	if (!ret && args.depth > 0) {
+	if (args.depth > 0) {
 		struct cache_time mtime;
 		char *shallow = git_path("shallow");
 		int fd;
@@ -798,8 +805,5 @@ struct ref *fetch_pack(struct fetch_pack_args *my_args,
 		}
 	}
 
-	if (ret)
-		ref = NULL;
-
-	return ref;
+	return ref_cpy;
 }
diff --git a/fetch-pack.h b/fetch-pack.h
index a7888ea..8d35ef6 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -16,6 +16,8 @@ struct fetch_pack_args
 };
 
 struct ref *fetch_pack(struct fetch_pack_args *args,
+		int fd[], struct child_process *conn,
+		const struct ref *ref,
 		const char *dest,
 		int nr_heads,
 		char **heads,
diff --git a/transport.c b/transport.c
index f4577b7..175cf2c 100644
--- a/transport.c
+++ b/transport.c
@@ -567,6 +567,8 @@ struct git_transport_data {
 	unsigned thin : 1;
 	unsigned keep : 1;
 	int depth;
+	struct child_process *conn;
+	int fd[2];
 	const char *uploadpack;
 	const char *receivepack;
 };
@@ -597,20 +599,20 @@ static int set_git_option(struct transport *connection,
 	return 1;
 }
 
+static int connect_setup(struct transport *transport)
+{
+	struct git_transport_data *data = transport->data;
+	data->conn = git_connect(data->fd, transport->url, data->uploadpack, 0);
+	return 0;
+}
+
 static struct ref *get_refs_via_connect(struct transport *transport)
 {
 	struct git_transport_data *data = transport->data;
 	struct ref *refs;
-	int fd[2];
-	char *dest = xstrdup(transport->url);
-	struct child_process *conn = git_connect(fd, dest, data->uploadpack, 0);
 
-	get_remote_heads(fd[0], &refs, 0, NULL, 0);
-	packet_flush(fd[1]);
-
-	finish_connect(conn);
-
-	free(dest);
+	connect_setup(transport);
+	get_remote_heads(data->fd[0], &refs, 0, NULL, 0);
 
 	return refs;
 }
@@ -621,7 +623,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 	struct git_transport_data *data = transport->data;
 	char **heads = xmalloc(nr_heads * sizeof(*heads));
 	char **origh = xmalloc(nr_heads * sizeof(*origh));
-	struct ref *refs;
+	const struct ref *refs;
 	char *dest = xstrdup(transport->url);
 	struct fetch_pack_args args;
 	int i;
@@ -636,13 +638,27 @@ static int fetch_refs_via_pack(struct transport *transport,
 
 	for (i = 0; i < nr_heads; i++)
 		origh[i] = heads[i] = xstrdup(to_fetch[i]->name);
-	refs = fetch_pack(&args, dest, nr_heads, heads, &transport->pack_lockfile);
+
+	refs = transport_get_remote_refs(transport);
+	if (!data->conn) {
+		struct ref *refs_tmp;
+		connect_setup(transport);
+		get_remote_heads(data->fd[0], &refs_tmp, 0, NULL, 0);
+		free_refs(refs_tmp);
+	}
+
+	refs = fetch_pack(&args, data->fd, data->conn, transport->remote_refs, 
+			  dest, nr_heads, heads, &transport->pack_lockfile);
+	close(data->fd[0]);
+	close(data->fd[1]);
+	if (finish_connect(data->conn))
+		refs = NULL;
+	data->conn = NULL;
 
 	for (i = 0; i < nr_heads; i++)
 		free(origh[i]);
 	free(origh);
 	free(heads);
-	free_refs(refs);
 	free(dest);
 	return 0;
 }
@@ -723,6 +739,7 @@ struct transport *transport_get(struct remote *remote, const char *url)
 		ret->disconnect = disconnect_git;
 
 		data->thin = 1;
+		data->conn = NULL;
 		data->uploadpack = "git-upload-pack";
 		if (remote && remote->uploadpack)
 			data->uploadpack = remote->uploadpack;
-- 
1.5.3.4.1206.g5f96

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] Reduce the number of connects when fetching
  2007-11-04 21:28 [RFC PATCH] Reduce the number of connects when fetching Daniel Barkalow
@ 2007-11-06  1:51 ` Junio C Hamano
  2007-11-06  3:04   ` Daniel Barkalow
  2007-11-06  8:13   ` Johannes Sixt
  0 siblings, 2 replies; 6+ messages in thread
From: Junio C Hamano @ 2007-11-06  1:51 UTC (permalink / raw)
  To: Daniel Barkalow; +Cc: git

Daniel Barkalow <barkalow@iabervon.org> writes:

> The idea is to keep the open connection in the data for the transport in 
> between getting the list of refs and doing anything further. This 
> therefore moves the connection-handling aspects outside of fetch-pack() 
> and handles them primarily in transport.c.

The idea is very sound.  The scripted version of git-fetch used
a separate ls-remote only because peek-remote and fetch-pack
were separate programs.

> ... In particular, I don't know if there's a way to have the 
> connection end up in a state where objects for more refs can be requested 
> after some refs have been requested and the resulting objects read.

The upload-pack protocol goes "S: here are what I have, C: I
want these, C: I have these, S: ok, continue, C: I have these,
S: ok, continue, C: I have these, S: ok, I've heard enough, C:
done, S: packfile is here", so after packfile generation starts
there is nothing further the downloader can say.

Otherwise you would be able to do the tag following using the
same connection, but that is unfortunately not a case.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] Reduce the number of connects when fetching
  2007-11-06  1:51 ` Junio C Hamano
@ 2007-11-06  3:04   ` Daniel Barkalow
  2007-11-06  5:03     ` Junio C Hamano
  2007-11-06  8:13   ` Johannes Sixt
  1 sibling, 1 reply; 6+ messages in thread
From: Daniel Barkalow @ 2007-11-06  3:04 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Mon, 5 Nov 2007, Junio C Hamano wrote:

> Daniel Barkalow <barkalow@iabervon.org> writes:
> 
> > The idea is to keep the open connection in the data for the transport in 
> > between getting the list of refs and doing anything further. This 
> > therefore moves the connection-handling aspects outside of fetch-pack() 
> > and handles them primarily in transport.c.
> 
> The idea is very sound.  The scripted version of git-fetch used
> a separate ls-remote only because peek-remote and fetch-pack
> were separate programs.

I figured that had to be the case, due to the way the protocol acts at the 
beginning.

> > ... In particular, I don't know if there's a way to have the 
> > connection end up in a state where objects for more refs can be requested 
> > after some refs have been requested and the resulting objects read.
> 
> The upload-pack protocol goes "S: here are what I have, C: I
> want these, C: I have these, S: ok, continue, C: I have these,
> S: ok, continue, C: I have these, S: ok, I've heard enough, C:
> done, S: packfile is here", so after packfile generation starts
> there is nothing further the downloader can say.
> 
> Otherwise you would be able to do the tag following using the
> same connection, but that is unfortunately not a case.

It would be nice if this could continue: "C: I also want these, S: ok, 
heard enough, C: done, S: another packfile is here"; we should be able to 
identify the end of the packfile on both ends to resume doing other 
things.

Or, maybe, "C: I also want these single objects, S: here's a thin pack of 
them", since it's exclusively tags pointing to objects we have just 
gotten.

	-Daniel
*This .sig left intentionally blank*

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] Reduce the number of connects when fetching
  2007-11-06  3:04   ` Daniel Barkalow
@ 2007-11-06  5:03     ` Junio C Hamano
  0 siblings, 0 replies; 6+ messages in thread
From: Junio C Hamano @ 2007-11-06  5:03 UTC (permalink / raw)
  To: Daniel Barkalow; +Cc: git

Daniel Barkalow <barkalow@iabervon.org> writes:

> On Mon, 5 Nov 2007, Junio C Hamano wrote:
>
>> The upload-pack protocol goes "S: here are what I have, C: I
>> want these, C: I have these, S: ok, continue, C: I have these,
>> S: ok, continue, C: I have these, S: ok, I've heard enough, C:
>> done, S: packfile is here", so after packfile generation starts
>> there is nothing further the downloader can say.
>> 
>> Otherwise you would be able to do the tag following using the
>> same connection, but that is unfortunately not a case.
>
> It would be nice if this could continue...

You would need a protocol extension for this, as the protocol
defines all the remainder after the have-ack exchange to be
intended for unpack-objects, not just the data for a single pack
that immediately follows the exchange.  Mysteriously,
unpack-objects even has code to write out the remainder after
the pack data.

The protocol extension would probably need to depend on the
existence of sideband.  Make the sending side signal the end of
the pack data over the sideband after sending a pack, and then
both sides can go back to the "C: I want these too, S: Ok, here
it is" exchange.  You may optionally want to do another have-ack
exchange here, but for the purpose of tag following I suspect
there is no need for that.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] Reduce the number of connects when fetching
  2007-11-06  1:51 ` Junio C Hamano
  2007-11-06  3:04   ` Daniel Barkalow
@ 2007-11-06  8:13   ` Johannes Sixt
  2007-11-06  8:34     ` Junio C Hamano
  1 sibling, 1 reply; 6+ messages in thread
From: Johannes Sixt @ 2007-11-06  8:13 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Daniel Barkalow, git

Junio C Hamano schrieb:
> Daniel Barkalow <barkalow@iabervon.org> writes:
>> ... In particular, I don't know if there's a way to have the 
>> connection end up in a state where objects for more refs can be requested 
>> after some refs have been requested and the resulting objects read.
> 
> The upload-pack protocol goes "S: here are what I have, C: I
> want these, C: I have these, S: ok, continue, C: I have these,
> S: ok, continue, C: I have these, S: ok, I've heard enough, C:
> done, S: packfile is here", so after packfile generation starts
> there is nothing further the downloader can say.
> 
> Otherwise you would be able to do the tag following using the
> same connection, but that is unfortunately not a case.

How about:

S: here are what I have
C: I want these
C: want tags   <-- new
C: I have these
S: ok, continue
C: I have these
S: ok, continue
C: I have these
S: ok, these are the tags  <-- new
S: I've heard enough
C: done
S: packfile is here

The tags that the server provides are those (and only those[*]) that 
reference objects in the packfile that it's going to send.

[*] This way the client doesn't have to figure out which tags it wants; as a 
side-effect it won't accidentally fetch tags for objects that it happens to 
have in the repository, but aren't reachable from any ref (like what used to 
happen).

-- Hannes

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] Reduce the number of connects when fetching
  2007-11-06  8:13   ` Johannes Sixt
@ 2007-11-06  8:34     ` Junio C Hamano
  0 siblings, 0 replies; 6+ messages in thread
From: Junio C Hamano @ 2007-11-06  8:34 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: Daniel Barkalow, git

Johannes Sixt <j.sixt@viscovery.net> writes:

> The tags that the server provides are those (and only those[*]) that
> reference objects in the packfile that it's going to send.
>
> [*] This way the client doesn't have to figure out which tags it
> wants; ...

Yes, but that shifts the burden to the sending side which is
always bad.  We want to make the client work as much as possible
when it is practical.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2007-11-06  8:34 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-11-04 21:28 [RFC PATCH] Reduce the number of connects when fetching Daniel Barkalow
2007-11-06  1:51 ` Junio C Hamano
2007-11-06  3:04   ` Daniel Barkalow
2007-11-06  5:03     ` Junio C Hamano
2007-11-06  8:13   ` Johannes Sixt
2007-11-06  8:34     ` Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).