From: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, me@ttaylorr.com, aevar@gmail.com,
newren@gmail.com, Derrick Stolee <derrickstolee@github.com>,
Derrick Stolee <derrickstolee@github.com>
Subject: [PATCH 08/25] bundle: implement 'fetch' command for direct bundles
Date: Wed, 23 Feb 2022 18:30:46 +0000 [thread overview]
Message-ID: <b993e7b47104f1d1740ab39d7ef3dc5bbccecbfc.1645641063.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1160.git.1645641063.gitgitgadget@gmail.com>
From: Derrick Stolee <derrickstolee@github.com>
The 'git bundle fetch <uri>' command will be used to download one or
more bundles from a specified '<uri>'. The implementation being added
here focuses only on downloading a file from '<uri>' and unbundling it
if it is a valid bundle file.
If it is not a bundle file, then we currently die(), but a later change
will attempt to interpret it as a table of contents with possibly
multiple bundles listed, along with other metadata for each bundle.
That explains a bit why cmd_bundle_fetch() has three steps carefully
commented, including a "stack" that currently can only hold one bundle.
We will later update this while loop to push onto the stack when
necessary.
RFC-TODO: Add documentation to Documentation/git-bundle.txt
RFC-TODO: Add direct tests of 'git bundle fetch' when the URI is a
bundle file.
RFC-TODO: Split out the docs and subcommand boilerplate into its own
commit.
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
builtin/bundle.c | 261 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 261 insertions(+)
diff --git a/builtin/bundle.c b/builtin/bundle.c
index 8187b7df739..0e06f1756d1 100644
--- a/builtin/bundle.c
+++ b/builtin/bundle.c
@@ -3,6 +3,10 @@
#include "parse-options.h"
#include "cache.h"
#include "bundle.h"
+#include "run-command.h"
+#include "hashmap.h"
+#include "object-store.h"
+#include "refs.h"
/*
* Basic handler for bundle files to connect repositories via sneakernet.
@@ -13,6 +17,7 @@
static const char * const builtin_bundle_usage[] = {
N_("git bundle create [<options>] <file> <git-rev-list args>"),
+ N_("git bundle fetch [<options>] <uri>"),
N_("git bundle list-heads <file> [<refname>...]"),
N_("git bundle unbundle <file> [<refname>...]"),
N_("git bundle verify [<options>] <file>"),
@@ -24,6 +29,11 @@ static const char * const builtin_bundle_create_usage[] = {
NULL
};
+static const char * const builtin_bundle_fetch_usage[] = {
+ N_("git bundle fetch [--filter=<spec>] <uri>"),
+ NULL
+};
+
static const char * const builtin_bundle_list_heads_usage[] = {
N_("git bundle list-heads <file> [<refname>...]"),
NULL
@@ -131,6 +141,255 @@ cleanup:
return ret;
}
+/**
+ * The remote_bundle_info struct contains the necessary data for
+ * the list of bundles advertised by a table of contents. If the
+ * bundle URI instead contains a single bundle, then this struct
+ * can represent a single bundle without a 'uri' but with a
+ * tempfile storing its current location on disk.
+ */
+struct remote_bundle_info {
+ struct hashmap_entry ent;
+
+ /**
+ * The 'id' is a name given to the bundle for reference
+ * by other bundle infos.
+ */
+ char *id;
+
+ /**
+ * The 'uri' is the location of the remote bundle so
+ * it can be downloaded on-demand. This will be NULL
+ * if there was no table of contents.
+ */
+ char *uri;
+
+ /**
+ * The 'next_id' string, if non-NULL, contains the 'id'
+ * for a bundle that contains the prerequisites for this
+ * bundle. Used by table of contents to allow fetching
+ * a portion of a repository incrementally.
+ */
+ char *next_id;
+
+ /**
+ * A table of contents can include a timestamp for the
+ * bundle as a heuristic for describing a list of bundles
+ * in order of recency.
+ */
+ timestamp_t timestamp;
+
+ /**
+ * If the bundle has been downloaded, then 'file' is a
+ * filename storing its contents. Otherwise, 'file' is
+ * an empty string.
+ */
+ struct strbuf file;
+
+ /**
+ * The 'stack_next' pointer allows this struct to form
+ * a stack.
+ */
+ struct remote_bundle_info *stack_next;
+};
+
+static void download_uri_to_file(const char *uri, const char *file)
+{
+ struct child_process cp = CHILD_PROCESS_INIT;
+ FILE *child_in;
+
+ strvec_pushl(&cp.args, "git-remote-https", "origin", uri, NULL);
+ cp.in = -1;
+ cp.out = -1;
+
+ if (start_command(&cp))
+ die(_("failed to start remote helper"));
+
+ child_in = fdopen(cp.in, "w");
+ if (!child_in)
+ die(_("cannot write to child process"));
+
+ fprintf(child_in, "get %s %s\n\n", uri, file);
+ fclose(child_in);
+
+ if (finish_command(&cp))
+ die(_("remote helper failed"));
+}
+
+static void find_temp_filename(struct strbuf *name)
+{
+ int fd;
+ /*
+ * Find a temporray filename that is available. This is briefly
+ * racy, but unlikely to collide.
+ */
+ fd = odb_mkstemp(name, "bundles/tmp_uri_XXXXXX");
+ if (fd < 0)
+ die(_("failed to create temporary file"));
+ close(fd);
+ unlink(name->buf);
+}
+
+static void unbundle_fetched_bundle(struct remote_bundle_info *info)
+{
+ struct child_process cp = CHILD_PROCESS_INIT;
+ FILE *f;
+ struct strbuf line = STRBUF_INIT;
+ struct strbuf bundle_ref = STRBUF_INIT;
+ size_t bundle_prefix_len;
+
+ strvec_pushl(&cp.args, "bundle", "unbundle",
+ info->file.buf, NULL);
+ cp.git_cmd = 1;
+ cp.out = -1;
+
+ if (start_command(&cp))
+ die(_("failed to start 'unbundle' process"));
+
+ strbuf_addstr(&bundle_ref, "refs/bundles/");
+ bundle_prefix_len = bundle_ref.len;
+
+ f = fdopen(cp.out, "r");
+ while (strbuf_getline(&line, f) != EOF) {
+ struct object_id oid, old_oid;
+ const char *refname, *branch_name, *end;
+ char *space;
+ int has_old;
+
+ strbuf_trim_trailing_newline(&line);
+
+ space = strchr(line.buf, ' ');
+
+ if (!space)
+ continue;
+
+ refname = space + 1;
+ *space = '\0';
+ parse_oid_hex(line.buf, &oid, &end);
+
+ if (!skip_prefix(refname, "refs/heads/", &branch_name))
+ continue;
+
+ strbuf_setlen(&bundle_ref, bundle_prefix_len);
+ strbuf_addstr(&bundle_ref, branch_name);
+
+ has_old = !read_ref(bundle_ref.buf, &old_oid);
+
+ update_ref("bundle fetch", bundle_ref.buf, &oid,
+ has_old ? &old_oid : NULL,
+ REF_SKIP_OID_VERIFICATION,
+ UPDATE_REFS_MSG_ON_ERR);
+ }
+
+ if (finish_command(&cp))
+ die(_("failed to unbundle bundle from '%s'"), info->uri);
+
+ unlink_or_warn(info->file.buf);
+}
+
+static int cmd_bundle_fetch(int argc, const char **argv, const char *prefix)
+{
+ int ret = 0;
+ int progress = isatty(2);
+ char *bundle_uri;
+ struct remote_bundle_info first_file = {
+ .file = STRBUF_INIT,
+ };
+ struct remote_bundle_info *stack = NULL;
+
+ struct option options[] = {
+ OPT_BOOL(0, "progress", &progress,
+ N_("show progress meter")),
+ OPT_END()
+ };
+
+ argc = parse_options_cmd_bundle(argc, argv, prefix,
+ builtin_bundle_fetch_usage, options, &bundle_uri);
+
+ if (!startup_info->have_repository)
+ die(_("'fetch' requires a repository"));
+
+ /*
+ * Step 1: determine protocol for uri, and download contents to
+ * a temporary location.
+ */
+ first_file.uri = bundle_uri;
+ find_temp_filename(&first_file.file);
+ download_uri_to_file(bundle_uri, first_file.file.buf);
+
+ /*
+ * Step 2: Check if the file is a bundle (if so, add it to the
+ * stack and move to step 3).
+ */
+
+ if (is_bundle(first_file.file.buf, 1)) {
+ /* The simple case: only one file, no stack to worry about. */
+ stack = &first_file;
+ } else {
+ /* TODO: Expect and parse a table of contents. */
+ die(_("unexpected data at bundle URI"));
+ }
+
+ /*
+ * Step 3: For each bundle in the stack:
+ * i. If not downloaded to a temporary file, download it.
+ * ii. Once downloaded, check that its prerequisites are in
+ * the object database. If not, then push its dependent
+ * bundle onto the stack. (Fail if no such bundle exists.)
+ * iii. If all prerequisites are present, then unbundle the
+ * temporary file and pop the bundle from the stack.
+ */
+ while (stack) {
+ int valid = 1;
+ int bundle_fd;
+ struct string_list_item *prereq;
+ struct bundle_header header = BUNDLE_HEADER_INIT;
+
+ if (!stack->file.len) {
+ find_temp_filename(&stack->file);
+ download_uri_to_file(stack->uri, stack->file.buf);
+ if (!is_bundle(stack->file.buf, 1))
+ die(_("file downloaded from '%s' is not a bundle"), stack->uri);
+ }
+
+ bundle_header_init(&header);
+ bundle_fd = read_bundle_header(stack->file.buf, &header);
+ if (bundle_fd < 0)
+ die(_("failed to read bundle from '%s'"), stack->uri);
+
+ for_each_string_list_item(prereq, &header.prerequisites) {
+ struct object_info info = OBJECT_INFO_INIT;
+ struct object_id *oid = prereq->util;
+
+ if (oid_object_info_extended(the_repository, oid, &info,
+ OBJECT_INFO_QUICK)) {
+ valid = 0;
+ break;
+ }
+ }
+
+ close(bundle_fd);
+ bundle_header_release(&header);
+
+ if (valid) {
+ unbundle_fetched_bundle(stack);
+ } else if (stack->next_id) {
+ /*
+ * Load the next bundle from the hashtable and
+ * push it onto the stack.
+ */
+ } else {
+ die(_("bundle from '%s' has missing prerequisites and no dependent bundle"),
+ stack->uri);
+ }
+
+ stack = stack->stack_next;
+ }
+
+ free(bundle_uri);
+ return ret;
+}
+
static int cmd_bundle_list_heads(int argc, const char **argv, const char *prefix) {
struct bundle_header header = BUNDLE_HEADER_INIT;
int bundle_fd = -1;
@@ -209,6 +468,8 @@ int cmd_bundle(int argc, const char **argv, const char *prefix)
else if (!strcmp(argv[0], "create"))
result = cmd_bundle_create(argc, argv, prefix);
+ else if (!strcmp(argv[0], "fetch"))
+ result = cmd_bundle_fetch(argc, argv, prefix);
else if (!strcmp(argv[0], "list-heads"))
result = cmd_bundle_list_heads(argc, argv, prefix);
else if (!strcmp(argv[0], "unbundle"))
--
gitgitgadget
next prev parent reply other threads:[~2022-02-23 18:31 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-23 18:30 [PATCH 00/25] [RFC] Bundle URIs Derrick Stolee via GitGitGadget
2022-02-23 18:30 ` [PATCH 01/25] docs: document bundle URI standard Derrick Stolee via GitGitGadget
2022-03-02 2:28 ` Elijah Newren
2022-03-02 14:06 ` Derrick Stolee
2022-03-03 2:19 ` Elijah Newren
2022-03-03 2:44 ` Derrick Stolee
2022-02-23 18:30 ` [PATCH 02/25] bundle: alphabetize subcommands better Derrick Stolee via GitGitGadget
2022-03-11 13:47 ` Ævar Arnfjörð Bjarmason
2022-02-23 18:30 ` [PATCH 03/25] dir: extract starts_with_dot[_dot]_slash() Derrick Stolee via GitGitGadget
2022-02-23 18:30 ` [PATCH 04/25] remote: move relative_url() Derrick Stolee via GitGitGadget
2022-02-23 18:30 ` [PATCH 05/25] remote: allow relative_url() to return an absolute url Derrick Stolee via GitGitGadget
2022-02-23 18:30 ` [PATCH 06/25] http: make http_get_file() external Derrick Stolee via GitGitGadget
2022-02-23 18:30 ` [PATCH 07/25] remote-curl: add 'get' capability Derrick Stolee via GitGitGadget
2022-02-23 18:30 ` Derrick Stolee via GitGitGadget [this message]
2022-02-23 18:30 ` [PATCH 09/25] bundle: parse table of contents during 'fetch' Derrick Stolee via GitGitGadget
2022-02-23 18:30 ` [PATCH 10/25] bundle: add --filter option to 'fetch' Derrick Stolee via GitGitGadget
2022-03-11 13:44 ` Ævar Arnfjörð Bjarmason
2022-02-23 18:30 ` [PATCH 11/25] bundle: allow relative URLs in table of contents Derrick Stolee via GitGitGadget
2022-03-11 13:42 ` Ævar Arnfjörð Bjarmason
2022-02-23 18:30 ` [PATCH 12/25] bundle: make it easy to call 'git bundle fetch' Derrick Stolee via GitGitGadget
2022-02-23 18:30 ` [PATCH 13/25] clone: add --bundle-uri option Derrick Stolee via GitGitGadget
2022-02-23 18:30 ` [PATCH 14/25] clone: --bundle-uri cannot be combined with --depth Derrick Stolee via GitGitGadget
2022-02-23 18:30 ` [PATCH 15/25] config: add git_config_get_timestamp() Derrick Stolee via GitGitGadget
2022-03-11 13:32 ` Ævar Arnfjörð Bjarmason
2022-02-23 18:30 ` [PATCH 16/25] bundle: only fetch bundles if timestamp is new Derrick Stolee via GitGitGadget
2022-02-23 18:30 ` [PATCH 17/25] fetch: fetch bundles before fetching original data Derrick Stolee via GitGitGadget
2022-02-23 18:30 ` [PATCH 18/25] connect.c: refactor sending of agent & object-format Ævar Arnfjörð Bjarmason via GitGitGadget
2022-02-23 18:30 ` [PATCH 19/25] protocol-caps: implement cap_features() Derrick Stolee via GitGitGadget
2022-02-23 18:30 ` [PATCH 20/25] serve: understand but do not advertise 'features' capability Derrick Stolee via GitGitGadget
2022-02-23 18:30 ` [PATCH 21/25] serve: advertise 'features' when config exists Derrick Stolee via GitGitGadget
2022-02-23 18:31 ` [PATCH 22/25] connect: implement get_recommended_features() Derrick Stolee via GitGitGadget
2022-02-23 18:31 ` [PATCH 23/25] transport: add connections for 'features' capability Derrick Stolee via GitGitGadget
2022-02-23 18:31 ` [PATCH 24/25] clone: use server-recommended bundle URI Derrick Stolee via GitGitGadget
2022-02-23 18:31 ` [PATCH 25/25] t5601: basic bundle URI test Derrick Stolee via GitGitGadget
2022-02-23 22:17 ` [PATCH 00/25] [RFC] Bundle URIs Ævar Arnfjörð Bjarmason
2022-02-24 14:11 ` Derrick Stolee
2022-03-04 13:30 ` Derrick Stolee
2022-03-04 14:49 ` Ævar Arnfjörð Bjarmason
2022-03-04 15:12 ` Derrick Stolee
2022-03-08 17:15 ` Derrick Stolee
2022-03-10 14:45 ` Johannes Schindelin
2022-04-07 19:08 ` Derrick Stolee
2022-04-08 9:15 ` Ævar Arnfjörð Bjarmason
2022-04-08 13:13 ` Derrick Stolee
2022-04-08 18:26 ` Junio C Hamano
2022-03-08 8:18 ` Teng Long
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=b993e7b47104f1d1740ab39d7ef3dc5bbccecbfc.1645641063.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=aevar@gmail.com \
--cc=derrickstolee@github.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=me@ttaylorr.com \
--cc=newren@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.