git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jonathan Tan <jonathantanmy@google.com>
To: git@vger.kernel.org
Cc: Jonathan Tan <jonathantanmy@google.com>
Subject: [RFC 1/4] server-endpoint: serve refs without advertisement
Date: Mon, 10 Apr 2017 13:46:07 -0700	[thread overview]
Message-ID: <247c6b74c9d375f3d34996c1e4465761ee9d49a1.1491851452.git.jonathantanmy@google.com> (raw)
In-Reply-To: <cover.1491851452.git.jonathantanmy@google.com>
In-Reply-To: <cover.1491851452.git.jonathantanmy@google.com>

Introduce a new binary that can act as an endpoint to serve refs without
first sending the ref advertisement (a list of all ref names and
associated hashes that the server contains). For very large
repositories, including an internal Android repository with more than
700000 refs, this would save tens of megabytes of network bandwidth
during each fetch.

This endpoint handles ref namespaces and "uploadpack.hiderefs" by
itself, and handles other functionality by invoking upload-pack and
acting as an intermediary (therefore having to know the relatively
minute details of the fetch-pack/upload-pack protocol).

Note: There is still an issue with the handling of "deepen" lines. The
documentation for the pack protocol states that "deepen 0" is the same
as not specifying any depth, but upload-pack seems to not accept "deepen
0". I'm not sure if it's better to change the documentation or change
the code - I generally prefer to change the code in such cases, but
treating "deepen 0" (and similar things like "deepen 000") differently
from other "deepen"s requires multiple components to know about this
special case (upload-pack, fetch-pack, and now server-endpoint) so I'm
inclined to just forbid it (like in the current code).

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 .gitignore        |   1 +
 Makefile          |   2 +
 server-endpoint.c | 228 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 231 insertions(+)
 create mode 100644 server-endpoint.c

diff --git a/.gitignore b/.gitignore
index 833ef3b0b..761e06d2c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -140,6 +140,7 @@
 /git-rm
 /git-send-email
 /git-send-pack
+/git-server-endpoint
 /git-sh-i18n
 /git-sh-i18n--envsubst
 /git-sh-setup
diff --git a/Makefile b/Makefile
index c80fec292..0d3813772 100644
--- a/Makefile
+++ b/Makefile
@@ -603,6 +603,7 @@ PROGRAM_OBJS += shell.o
 PROGRAM_OBJS += show-index.o
 PROGRAM_OBJS += upload-pack.o
 PROGRAM_OBJS += remote-testsvn.o
+PROGRAM_OBJS += server-endpoint.o
 
 # Binary suffix, set to .exe for Windows builds
 X =
@@ -673,6 +674,7 @@ BINDIR_PROGRAMS_NEED_X += git-upload-pack
 BINDIR_PROGRAMS_NEED_X += git-receive-pack
 BINDIR_PROGRAMS_NEED_X += git-upload-archive
 BINDIR_PROGRAMS_NEED_X += git-shell
+BINDIR_PROGRAMS_NEED_X += git-server-endpoint
 
 BINDIR_PROGRAMS_NO_X += git-cvsserver
 
diff --git a/server-endpoint.c b/server-endpoint.c
new file mode 100644
index 000000000..a9c0c7c94
--- /dev/null
+++ b/server-endpoint.c
@@ -0,0 +1,228 @@
+#include "cache.h"
+#include "pkt-line.h"
+#include "refs.h"
+#include "revision.h"
+#include "run-command.h"
+
+static const char * const server_endpoint_usage[] = {
+	N_("git server-endpoint [<options>] <dir>"),
+	NULL
+};
+
+static const char *capabilities = "multi_ack_detailed side-band-64k shallow";
+
+struct handle_want_data {
+	int upload_pack_in_fd;
+	int capabilities_sent;
+	struct string_list sent_namespaced_names;
+};
+
+static int send_want(const char *namespaced_name, const struct object_id *oid,
+		     int flags, void *handle_want_data)
+{
+	struct handle_want_data *data = handle_want_data;
+
+	if (ref_is_hidden(strip_namespace(namespaced_name), namespaced_name))
+		return 0;
+	if (string_list_lookup(&data->sent_namespaced_names, namespaced_name))
+		return 0;
+
+	string_list_insert(&data->sent_namespaced_names, namespaced_name);
+
+	if (data->capabilities_sent) {
+		packet_write_fmt(data->upload_pack_in_fd, "want %s\n",
+				 oid_to_hex(oid));
+	} else {
+		packet_write_fmt(data->upload_pack_in_fd, "want %s%s\n",
+				 oid_to_hex(oid), capabilities);
+		data->capabilities_sent = 1;
+	}
+
+	return 0;
+}
+
+static void handle_want(const char *arg, struct handle_want_data *data) {
+	char *namespaced_name = xstrfmt("%s%s", get_git_namespace(), arg);
+	if (has_glob_specials(arg)) {
+		for_each_glob_ref(send_want, namespaced_name, data);
+	} else {
+		struct object_id oid;
+		if (!read_ref(namespaced_name, oid.hash))
+			send_want(namespaced_name, &oid, 0, data);
+	}
+	free(namespaced_name);
+}
+
+static int fetch_ref(int stateless_rpc)
+{
+	struct child_process cmd = CHILD_PROCESS_INIT;
+	static const char *argv[] = {
+		"upload-pack", ".", NULL, NULL
+	};
+	struct handle_want_data handle_want_data = {0, 0, STRING_LIST_INIT_DUP};
+
+	char *line;
+	int size;
+
+	int upload_pack_will_respond = 0;
+	int wanted_refs_sent = 0;
+
+	if (stateless_rpc)
+		argv[2] = "--stateless-rpc";
+	cmd.argv = argv;
+	cmd.git_cmd = 1;
+	cmd.in = -1;
+	cmd.out = -1;
+
+	if (start_command(&cmd))
+		goto error;
+
+	handle_want_data.upload_pack_in_fd = cmd.in;
+
+	if (!stateless_rpc) {
+		/* Drain the initial ref advertisement (until flush-pkt). */
+		while (packet_read_line(cmd.out, NULL))
+			;
+	}
+
+	/* Send the wants. Upload-pack will not respond to this unless a depth
+	 * request is made. */
+	while ((line = packet_read_line(0, NULL))) {
+		const char *arg;
+		if (skip_prefix(line, "want ", &arg)) {
+			handle_want(arg, &handle_want_data);
+		} else if (starts_with(line, "shallow ")) {
+			packet_write_fmt(cmd.in, "%s", line);
+		} else if (starts_with(line, "deepen ") ||
+			   starts_with(line, "deepen-since ") ||
+			   starts_with(line, "deepen-not ")) {
+			packet_write_fmt(cmd.in, "%s", line);
+			upload_pack_will_respond = 1;
+		}
+	}
+	packet_flush(cmd.in);
+
+	if (upload_pack_will_respond) {
+		while ((line = packet_read_line(cmd.out, NULL))) {
+			packet_write_fmt(1, "%s", line);
+		}
+		packet_flush(1);
+	}
+
+	/* Continue to copy the conversation. */
+	do {
+		char buffer[LARGE_PACKET_DATA_MAX];
+		char size_buffer[5]; /* 4 bytes + NUL */
+		int done_received = 0;
+		int ready_received = 0;
+		int options = PACKET_READ_CHOMP_NEWLINE;
+
+		while ((line = packet_read_line(0, NULL))) {
+			packet_write_fmt(cmd.in, "%s", line);
+			if (!strcmp(line, "done")) {
+				done_received = 1;
+				/* "done" also marks the end of the request. */
+				goto after_flush;
+			}
+		}
+		packet_flush(cmd.in);
+after_flush:
+		while ((size = packet_read(cmd.out, NULL, NULL, buffer,
+					   sizeof(buffer), options))) {
+			int send_wanted_refs = 0;
+			if (!wanted_refs_sent) {
+				if ((done_received || ready_received) &&
+				    size == strlen("ACK ") + GIT_SHA1_HEXSZ &&
+				    starts_with(buffer, "ACK "))
+					send_wanted_refs = 1;
+				else if (done_received && !strcmp(buffer, "NAK"))
+					send_wanted_refs = 1;
+				else if (size == strlen("ACK  ready") + GIT_SHA1_HEXSZ &&
+					 starts_with(buffer, "ACK ") &&
+					 !strcmp(buffer + strlen("ACK  ") + GIT_SHA1_HEXSZ, "ready"))
+					ready_received = 1;
+			}
+			if (send_wanted_refs) {
+				struct string_list_item *item;
+				for_each_string_list_item(item,
+							  &handle_want_data.sent_namespaced_names) {
+					struct object_id oid;
+					if (read_ref(item->string, oid.hash))
+						die("something happened");
+					packet_write_fmt(1, "wanted %s %s",
+							 oid_to_hex(&oid),
+							 strip_namespace(item->string));
+				}
+				wanted_refs_sent = 1;
+				/* Do not chomp any more characters because
+				 * binary data (packfile) is about to be sent.
+				 */
+				options = 0;
+			}
+			sprintf(size_buffer, "%04x", size + 4);
+			write_or_die(1, size_buffer, 4);
+			write_or_die(1, buffer, size);
+			if (!wanted_refs_sent && !strcmp(buffer, "NAK")) {
+				/* NAK before we send wanted refs marks the end
+				 * of the response. */
+				goto after_flush_2;
+			}
+		}
+		packet_flush(1);
+after_flush_2:
+		;
+	} while (!stateless_rpc && !wanted_refs_sent);
+
+	close(cmd.in);
+	cmd.in = -1;
+	close(cmd.out);
+	cmd.out = -1;
+
+	if (finish_command(&cmd))
+		return -1;
+
+	return 0;
+
+error:
+
+	if (cmd.in >= 0)
+		close(cmd.in);
+	if (cmd.out >= 0)
+		close(cmd.out);
+	return -1;
+}
+
+static int server_endpoint_config(const char *var, const char *value, void *unused)
+{
+	return parse_hide_refs_config(var, value, "uploadpack");
+}
+
+int cmd_main(int argc, const char **argv)
+{
+	int stateless_rpc = 0;
+
+	struct option options[] = {
+		OPT_BOOL(0, "stateless-rpc", &stateless_rpc,
+			 N_("quit after a single request/response exchange")),
+		OPT_END()
+	};
+
+	char *line;
+
+	packet_trace_identity("server-endpoint");
+	check_replace_refs = 0;
+
+	argc = parse_options(argc, argv, NULL, options, server_endpoint_usage, 0);
+
+	if (argc != 1)
+		die("must have 1 arg");
+
+	if (!enter_repo(argv[0], 0))
+		die("does not appear to be a git repository");
+	git_config(server_endpoint_config, NULL);
+
+	line = packet_read_line(0, NULL);
+	if (!strcmp(line, "fetch-refs"))
+		return fetch_ref(stateless_rpc);
+	die("only fetch-refs is supported");
+}
-- 
2.12.2.715.g7642488e1d-goog


  reply	other threads:[~2017-04-10 20:46 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-10 20:46 [RFC 0/4] Implementation of fetch-blobs and fetch-refs Jonathan Tan
2017-04-10 20:46 ` Jonathan Tan [this message]
2017-04-10 20:46 ` [RFC 2/4] fetch-pack: refactor "want" pkt-line generation Jonathan Tan
2017-04-10 20:46 ` [RFC 3/4] fetch-pack: support new server endpoint Jonathan Tan
2017-04-10 20:46 ` [RFC 4/4] server-endpoint: serve blobs by hash Jonathan Tan
2017-06-30 22:41 ` [RFC 0/4] Implementation of fetch-blobs and fetch-refs Stefan Beller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=247c6b74c9d375f3d34996c1e4465761ee9d49a1.1491851452.git.jonathantanmy@google.com \
    --to=jonathantanmy@google.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).