* [PATCH] builtin/clone: teach git-clone(1) the --ref= argument
@ 2024-09-27 8:54 Toon Claes
2024-09-27 19:20 ` Junio C Hamano
0 siblings, 1 reply; 6+ messages in thread
From: Toon Claes @ 2024-09-27 8:54 UTC (permalink / raw)
To: git; +Cc: Toon Claes
Add option `--ref` to git-clone(1). This enables the user to clone and
checkout the given named reference. It's pretty similar to --branch and
while --branch takes a branch name or tag name, it doesn't take a fully
qualified reference. This allows the user to clone a reference that
doesn't start with refs/heads or refs/tags. This can be useful when the
server uses custom references.
Allow the user to clone a certain ref, similar to --branch.
Signed-off-by: Toon Claes <toon@iotcl.com>
---
Documentation/git-clone.txt | 9 ++++-
builtin/clone.c | 67 ++++++++++++++++++++++++++++++++-----
t/t5612-clone-refspec.sh | 35 +++++++++++++++++++
3 files changed, 102 insertions(+), 9 deletions(-)
diff --git a/Documentation/git-clone.txt b/Documentation/git-clone.txt
index 8e925db7e9..7f75454aae 100644
--- a/Documentation/git-clone.txt
+++ b/Documentation/git-clone.txt
@@ -215,6 +215,13 @@ objects from the source repository into a pack in the cloned repository.
`--branch` can also take tags and detaches the HEAD at that commit
in the resulting repository.
+`--ref` _<name>_::
+ This detaches HEAD and makes it point to the commit where the _<name>_
+ reference is pointing to. In a non-bare repository, this is the ref that
+ will be checked out.
+ Can be used in conjunction with `--single-branch` and `--no-tags` to
+ clone only the given ref. Cannot be combined with `--branch`.
+
`-u` _<upload-pack>_::
`--upload-pack` _<upload-pack>_::
When given, and the repository to clone from is accessed
@@ -259,7 +266,7 @@ corresponding `--mirror` and `--no-tags` options instead.
`--`[`no-`]`single-branch`::
Clone only the history leading to the tip of a single branch,
- either specified by the `--branch` option or the primary
+ either specified by the `--branch` or `--ref` option or the primary
branch remote's `HEAD` points at.
Further fetches into the resulting repository will only update the
remote-tracking branch for the branch this option was used for the
diff --git a/builtin/clone.c b/builtin/clone.c
index e77339c847..384923703d 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -69,6 +69,7 @@ static char *option_template, *option_depth, *option_since;
static char *option_origin = NULL;
static char *remote_name = NULL;
static char *option_branch = NULL;
+static char *option_ref = NULL;
static struct string_list option_not = STRING_LIST_INIT_NODUP;
static const char *real_git_dir;
static const char *ref_format;
@@ -141,6 +142,8 @@ static struct option builtin_clone_options[] = {
N_("use <name> instead of 'origin' to track upstream")),
OPT_STRING('b', "branch", &option_branch, N_("branch"),
N_("checkout <branch> instead of the remote's HEAD")),
+ OPT_STRING(0, "ref", &option_ref, N_("ref"),
+ N_("checkout <ref> (detached) instead of the remote's HEAD")),
OPT_STRING('u', "upload-pack", &option_upload_pack, N_("path"),
N_("path to git-upload-pack on the remote")),
OPT_STRING(0, "depth", &option_depth, N_("depth"),
@@ -531,32 +534,64 @@ static struct ref *wanted_peer_refs(const struct ref *refs,
if (option_single_branch) {
struct ref *remote_head = NULL;
- if (!option_branch)
+ if (!option_branch && !option_ref)
remote_head = guess_remote_head(head, refs, 0);
else {
free_one_ref(head);
local_refs = head = NULL;
tail = &local_refs;
- remote_head = copy_ref(find_remote_branch(refs, option_branch));
+ if (option_branch)
+ remote_head = copy_ref(find_remote_branch(refs, option_branch));
+ else
+ remote_head = copy_ref(find_ref_by_name(refs, option_ref));
}
if (!remote_head && option_branch)
warning(_("Could not find remote branch %s to clone."),
option_branch);
+ else if (!remote_head && option_ref)
+ warning(_("Could not find remote ref %s to clone."),
+ option_ref);
else {
int i;
for (i = 0; i < refspec->nr; i++)
get_fetch_map(remote_head, &refspec->items[i],
&tail, 0);
- /* if --branch=tag, pull the requested tag explicitly */
- get_fetch_map(remote_head, &tag_refspec, &tail, 0);
+ if (option_ref) {
+ struct strbuf spec = STRBUF_INIT;
+ struct refspec_item ref_refspec;
+
+ strbuf_addf(&spec, "%s:%s", option_ref, option_ref);
+ refspec_item_init(&ref_refspec, spec.buf, 0);
+
+ get_fetch_map(remote_head, &ref_refspec, &tail, 0);
+
+ refspec_item_clear(&ref_refspec);
+ strbuf_release(&spec);
+ } else {
+ /* if --branch=tag, pull the requested tag explicitly */
+ get_fetch_map(remote_head, &tag_refspec, &tail, 0);
+ }
}
free_refs(remote_head);
} else {
int i;
for (i = 0; i < refspec->nr; i++)
get_fetch_map(refs, &refspec->items[i], &tail, 0);
+
+ if (option_ref) {
+ struct strbuf spec = STRBUF_INIT;
+ struct refspec_item ref_refspec;
+
+ strbuf_addf(&spec, "%s:%s", option_ref, option_ref);
+ refspec_item_init(&ref_refspec, spec.buf, 0);
+
+ get_fetch_map(refs, &ref_refspec, &tail, 0);
+
+ refspec_item_clear(&ref_refspec);
+ strbuf_release(&spec);
+ }
}
if (!option_mirror && !option_single_branch && !option_no_tags)
@@ -684,10 +719,15 @@ static void update_head(const struct ref *our, const struct ref *remote,
} else if (our) {
struct commit *c = lookup_commit_reference(the_repository,
&our->old_oid);
- /* --branch specifies a non-branch (i.e. tags), detach HEAD */
- refs_update_ref(get_main_ref_store(the_repository), msg,
- "HEAD", &c->object.oid, NULL, REF_NO_DEREF,
- UPDATE_REFS_DIE_ON_ERR);
+ if (c)
+ /* --branch specifies a non-branch (i.e. tags), detach HEAD */
+ refs_update_ref(get_main_ref_store(the_repository), msg,
+ "HEAD", &c->object.oid, NULL, REF_NO_DEREF,
+ UPDATE_REFS_DIE_ON_ERR);
+ else
+ refs_update_ref(get_main_ref_store(the_repository), msg,
+ "HEAD", &our->old_oid, NULL, REF_NO_DEREF,
+ UPDATE_REFS_DIE_ON_ERR);
} else if (remote) {
/*
* We know remote HEAD points to a non-branch, or
@@ -898,6 +938,9 @@ static void write_refspec_config(const char *src_ref_prefix,
else
strbuf_addf(&value, "+%s:%s%s", our_head_points_at->name,
branch_top->buf, option_branch);
+ } else if (option_ref) {
+ strbuf_addf(&value, "+%s:%s", our_head_points_at->name,
+ our_head_points_at->name);
} else if (remote_head_points_at) {
const char *head = remote_head_points_at->name;
if (!skip_prefix(head, "refs/heads/", &head))
@@ -1383,6 +1426,9 @@ int cmd_clone(int argc,
if (option_branch)
expand_ref_prefix(&transport_ls_refs_options.ref_prefixes,
option_branch);
+ if (option_ref)
+ strvec_push(&transport_ls_refs_options.ref_prefixes,
+ option_ref);
if (!option_no_tags)
strvec_push(&transport_ls_refs_options.ref_prefixes,
"refs/tags/");
@@ -1468,6 +1514,11 @@ int cmd_clone(int argc,
if (!our_head_points_at)
die(_("Remote branch %s not found in upstream %s"),
option_branch, remote_name);
+ } else if (option_ref) {
+ our_head_points_at = find_ref_by_name(mapped_refs, option_ref);
+ if (!our_head_points_at)
+ die(_("Remote ref %s not found in upstream %s"),
+ option_ref, remote_name);
} else if (remote_head_points_at) {
our_head_points_at = remote_head_points_at;
} else if (remote_head) {
diff --git a/t/t5612-clone-refspec.sh b/t/t5612-clone-refspec.sh
index 72762de977..51452bdd6f 100755
--- a/t/t5612-clone-refspec.sh
+++ b/t/t5612-clone-refspec.sh
@@ -17,6 +17,7 @@ test_expect_success 'setup' '
git tag two &&
echo three >file &&
git commit -a -m three &&
+ git update-ref refs/some/three HEAD &&
git checkout -b side &&
echo four >file &&
git commit -a -m four &&
@@ -236,4 +237,38 @@ test_expect_success '--single-branch with detached' '
test_must_be_empty actual
'
+test_expect_success 'with --ref' '
+ git clone --ref=refs/some/three . dir_ref &&
+ git -C dir_ref for-each-ref refs > refs &&
+ sed -e "/HEAD$/d" \
+ -e "s|/remotes/origin/|/heads/|" refs >actual &&
+ git for-each-ref refs >expect &&
+ test_cmp expect actual
+'
+
+test_expect_success 'with --ref and --no-tags' '
+ git clone --ref=refs/some/three --no-tags . dir_ref_notags &&
+ git -C dir_ref_notags for-each-ref refs > refs &&
+ sed -e "/HEAD$/d" \
+ -e "s|/remotes/origin/|/heads/|" refs >actual &&
+ git for-each-ref refs/heads >expect &&
+ git for-each-ref refs/some >>expect &&
+ test_cmp expect actual
+'
+
+test_expect_success '--single-branch with --ref' '
+ git clone --single-branch --ref=refs/some/three . dir_single_ref &&
+ git -C dir_single_ref for-each-ref refs > actual &&
+ git for-each-ref refs/some >expect &&
+ git for-each-ref refs/tags >>expect &&
+ test_cmp expect actual
+'
+
+test_expect_success '--single-branch with --ref and --no-tags' '
+ git clone --single-branch --ref=refs/some/three --no-tags . dir_single_ref_notags &&
+ git -C dir_single_ref_notags for-each-ref refs > actual &&
+ git for-each-ref refs/some >expect &&
+ test_cmp expect actual
+'
+
test_done
--
2.46.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] builtin/clone: teach git-clone(1) the --ref= argument
2024-09-27 8:54 [PATCH] builtin/clone: teach git-clone(1) the --ref= argument Toon Claes
@ 2024-09-27 19:20 ` Junio C Hamano
2024-09-27 19:29 ` brian m. carlson
0 siblings, 1 reply; 6+ messages in thread
From: Junio C Hamano @ 2024-09-27 19:20 UTC (permalink / raw)
To: Toon Claes; +Cc: git
Toon Claes <toon@iotcl.com> writes:
> Add option `--ref` to git-clone(1). This enables the user to clone and
> checkout the given named reference.
Define "checkout the reference". A ref that is outside of
"refs/heads/" MUST NOT be pointed by HEAD, so giving a tag with this
option should result in an initial working tree on a detached HEAD.
If it is what the new option does (which is similar to the way how
we handle a tag when it was given with --branch), we should be more
explicit that the ref is *not* checked out, but a deteached HEAD is
created to point at the commit referenced. I think the documentation
part of this patch does a much better job at it.
> It's pretty similar to --branch and
> while --branch takes a branch name or tag name, it doesn't take a fully
> qualified reference. This allows the user to clone a reference that
> doesn't start with refs/heads or refs/tags. This can be useful when the
> server uses custom references.
"when the server uses custom references" is a rather weak
explanation.
The answer to "Doctor, it hurts when I turn my elbow in this
unnatural direction" is usually "Well, do not do it then". The
answer to "Doctor, I cannot use the --branch option because I use
non branches to keep track of some histories" should be the same.
Why do you want to turn your elbow in an unnatural angle in the
first place?
Stepping one level higher.
The usual way to compose a log message of this project is to
- Give an observation on how the current system work in the present
tense (so no need to say "Currently X is Y", just "X is Y"), and
discuss what you perceive as a problem in it.
- Propose a solution (optional---often, problem description
trivially leads to an obvious solution in reader's minds).
- Give commands to the codebase to "become like so".
in this order. The first paragraph that describes what the commit
wants to do is better moved down to the last. Justify why you want
to turn your elbow in a way other people do not usually do (or why
you want unusual hierarchies on the server side), explain that it
hurts (or say --branches is limited to branches and tags), and then
finally give a solution.
> +`--ref` _<name>_::
> + This detaches HEAD and makes it point to the commit where the _<name>_
> + reference is pointing to. In a non-bare repository, this is the ref that
> + will be checked out.
> + Can be used in conjunction with `--single-branch` and `--no-tags` to
> + clone only the given ref. Cannot be combined with `--branch`.
OK, so it is an error to give both --branch and --ref at the same
time?
If you say "git clone --branch foo --branch bar ...", wouldn't the
usual last-one-wins rule apply? Shouldn't this work the same way,
meaning "git clone --branch foo --ref refs/some/one ..." would
behave just like "git clone --ref refs/some/one ...", without
treating the "foo" branch any specially?
What if --ref is not pointing at a commit-ish? If I in any
repository with non-empty history checked out do this:
$ git update-ref refs/some/one HEAD:
can you clone from this repository with
$ git clone --ref=refs/some/one $URL
and what happens at the end?
> diff --git a/builtin/clone.c b/builtin/clone.c
> index e77339c847..384923703d 100644
> --- a/builtin/clone.c
> +++ b/builtin/clone.c
> @@ -69,6 +69,7 @@ static char *option_template, *option_depth, *option_since;
> static char *option_origin = NULL;
> static char *remote_name = NULL;
> static char *option_branch = NULL;
> +static char *option_ref = NULL;
If you want to make "With --branch and --ref, the last one wins",
this would not be sufficient.
> @@ -141,6 +142,8 @@ static struct option builtin_clone_options[] = {
> N_("use <name> instead of 'origin' to track upstream")),
> OPT_STRING('b', "branch", &option_branch, N_("branch"),
> N_("checkout <branch> instead of the remote's HEAD")),
> + OPT_STRING(0, "ref", &option_ref, N_("ref"),
> + N_("checkout <ref> (detached) instead of the remote's HEAD")),
Ditto.
> @@ -531,32 +534,64 @@ static struct ref *wanted_peer_refs(const struct ref *refs,
> if (option_single_branch) {
> struct ref *remote_head = NULL;
>
> - if (!option_branch)
> + if (!option_branch && !option_ref)
> remote_head = guess_remote_head(head, refs, 0);
> else {
> free_one_ref(head);
> local_refs = head = NULL;
> tail = &local_refs;
> - remote_head = copy_ref(find_remote_branch(refs, option_branch));
> + if (option_branch)
> + remote_head = copy_ref(find_remote_branch(refs, option_branch));
> + else
> + remote_head = copy_ref(find_ref_by_name(refs, option_ref));
This makes --ref ignored when --branch is given. Intended?
> diff --git a/t/t5612-clone-refspec.sh b/t/t5612-clone-refspec.sh
> index 72762de977..51452bdd6f 100755
> --- a/t/t5612-clone-refspec.sh
> +++ b/t/t5612-clone-refspec.sh
> @@ -17,6 +17,7 @@ test_expect_success 'setup' '
> git tag two &&
> echo three >file &&
> git commit -a -m three &&
> + git update-ref refs/some/three HEAD &&
> git checkout -b side &&
> echo four >file &&
> git commit -a -m four &&
> @@ -236,4 +237,38 @@ test_expect_success '--single-branch with detached' '
> test_must_be_empty actual
> '
>
> +test_expect_success 'with --ref' '
> + git clone --ref=refs/some/three . dir_ref &&
> + git -C dir_ref for-each-ref refs > refs &&
Lose SP between ">" and "refs".
> + sed -e "/HEAD$/d" \
> + -e "s|/remotes/origin/|/heads/|" refs >actual &&
> + git for-each-ref refs >expect &&
> + test_cmp expect actual
> +'
> +
> +test_expect_success 'with --ref and --no-tags' '
> + git clone --ref=refs/some/three --no-tags . dir_ref_notags &&
> + git -C dir_ref_notags for-each-ref refs > refs &&
> + sed -e "/HEAD$/d" \
> + -e "s|/remotes/origin/|/heads/|" refs >actual &&
> + git for-each-ref refs/heads >expect &&
> + git for-each-ref refs/some >>expect &&
> + test_cmp expect actual
> +'
> +
> +test_expect_success '--single-branch with --ref' '
> + git clone --single-branch --ref=refs/some/three . dir_single_ref &&
> + git -C dir_single_ref for-each-ref refs > actual &&
> + git for-each-ref refs/some >expect &&
I would expect refs/some/three and nothing else comes from refs/some
hierarchy, as the clone was done with "--ref=refs/some/three". Is
there a particular reason why this for-each-ref is looser and take
anything under "refs/some/"?
> + git for-each-ref refs/tags >>expect &&
This is because the above is not giving --no-tags, I guess.
> + test_cmp expect actual
> +'
> +
> +test_expect_success '--single-branch with --ref and --no-tags' '
> + git clone --single-branch --ref=refs/some/three --no-tags . dir_single_ref_notags &&
> + git -C dir_single_ref_notags for-each-ref refs > actual &&
> + git for-each-ref refs/some >expect &&
Same comment on the looseness of this one. Lack of refs/tags is
expected and it is a good thing to test that.
> + test_cmp expect actual
> +'
There may be others, but three cases that are not tested but should
be I spotted offhand are:
* "clone --ref=refs/some/three --no-ref" acts as if we gave no
"--refs" at all.
* interaction when "--ref=refs/some/three" and "--branch=main" are
given at the same time.
* the command gracefully fail when the given ref points at a tree
or a blob.
Thanks.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] builtin/clone: teach git-clone(1) the --ref= argument
2024-09-27 19:20 ` Junio C Hamano
@ 2024-09-27 19:29 ` brian m. carlson
2024-09-27 20:05 ` Junio C Hamano
2024-09-27 20:10 ` Kristoffer Haugsbakk
0 siblings, 2 replies; 6+ messages in thread
From: brian m. carlson @ 2024-09-27 19:29 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Toon Claes, git
[-- Attachment #1: Type: text/plain, Size: 2067 bytes --]
On 2024-09-27 at 19:20:14, Junio C Hamano wrote:
> Toon Claes <toon@iotcl.com> writes:
>
> > It's pretty similar to --branch and
> > while --branch takes a branch name or tag name, it doesn't take a fully
> > qualified reference. This allows the user to clone a reference that
> > doesn't start with refs/heads or refs/tags. This can be useful when the
> > server uses custom references.
>
> "when the server uses custom references" is a rather weak
> explanation.
>
> The answer to "Doctor, it hurts when I turn my elbow in this
> unnatural direction" is usually "Well, do not do it then". The
> answer to "Doctor, I cannot use the --branch option because I use
> non branches to keep track of some histories" should be the same.
> Why do you want to turn your elbow in an unnatural angle in the
> first place?
I can't speak for what Toon intended here, but GitHub uses some
references under `refs/pull` that are used for tracking pull requests.
We even have some in the Git repository on GitHub:
% git ls-remote upstream refs/pull/* | head -n 5
f0d0fd3a5985d5e588da1e1d11c85fba0ae132f8 refs/pull/10/head
c8198f6c2c9fc529b25988dfaf5865bae5320cb5 refs/pull/10/merge
d604669e32e847c2ba5010c89895dd707ba45f55 refs/pull/100/head
55ab0c9399879683b4cc6e1baea5dc41484ca52f refs/pull/100/merge
08d39e0bb5b9dbd16e9e4c2250e75848718c453b refs/pull/1000/head
These are not kept under `refs/heads` because `refs/heads` belongs to
the user, but it is generally useful to check them out in case of very
large changes or changes with complex binary files which don't render
well in the web interface (among other reasons) that might need to be
inspected for code review. So I think this is a generally useful
feature, although I agree that perhaps the commit message might explain
the benefits in a more concrete way for those who don't already
understand the utility of the feature (such as our illustrious
maintainer).
--
brian m. carlson (they/them or he/him)
Toronto, Ontario, CA
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 262 bytes --]
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] builtin/clone: teach git-clone(1) the --ref= argument
2024-09-27 19:29 ` brian m. carlson
@ 2024-09-27 20:05 ` Junio C Hamano
2024-09-27 20:10 ` Kristoffer Haugsbakk
1 sibling, 0 replies; 6+ messages in thread
From: Junio C Hamano @ 2024-09-27 20:05 UTC (permalink / raw)
To: brian m. carlson; +Cc: Toon Claes, git
"brian m. carlson" <sandals@crustytoothpaste.net> writes:
> I can't speak for what Toon intended here, but GitHub uses some
> references under `refs/pull` that are used for tracking pull requests.
Ah, that is an excellent example that can be used to strengthen the
motivation part of this commit.
A user may want to clone only "refs/pull/$n/head" to take a
peek, but unlike refs/heads/ and refs/tags/, for which the
"--branch" option can be used, there is no good simple way to do
so with "git clone".
would make a good intro paragraph for a proposed log message.
> So I think this is a generally useful feature, although I agree
> that perhaps the commit message might explain the benefits in a
> more concrete way for those who don't already understand the
> utility of the feature (such as our illustrious maintainer).
Yes, not seeing refs/pull/ without being told was slight lack of
imagination on my part, but it is a good idea to spell out the
motivation in concrete terms when we can.
Thanks.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] builtin/clone: teach git-clone(1) the --ref= argument
2024-09-27 19:29 ` brian m. carlson
2024-09-27 20:05 ` Junio C Hamano
@ 2024-09-27 20:10 ` Kristoffer Haugsbakk
1 sibling, 0 replies; 6+ messages in thread
From: Kristoffer Haugsbakk @ 2024-09-27 20:10 UTC (permalink / raw)
To: brian m. carlson, Junio C Hamano; +Cc: Toon Claes, git
On Fri, Sep 27, 2024, at 21:29, brian m. carlson wrote:
> On 2024-09-27 at 19:20:14, Junio C Hamano wrote:
>> Toon Claes <toon@iotcl.com> writes:
>>
>> > It's pretty similar to --branch and
>> > while --branch takes a branch name or tag name, it doesn't take a fully
>> > qualified reference. This allows the user to clone a reference that
>> > doesn't start with refs/heads or refs/tags. This can be useful when the
>> > server uses custom references.
>>
>> "when the server uses custom references" is a rather weak
>> explanation.
>>
>> The answer to "Doctor, it hurts when I turn my elbow in this
>> unnatural direction" is usually "Well, do not do it then". The
>> answer to "Doctor, I cannot use the --branch option because I use
>> non branches to keep track of some histories" should be the same.
>> Why do you want to turn your elbow in an unnatural angle in the
>> first place?
>
> I can't speak for what Toon intended here, but GitHub uses some
> references under `refs/pull` that are used for tracking pull requests.
> We even have some in the Git repository on GitHub:
>
> % git ls-remote upstream refs/pull/* | head -n 5
> f0d0fd3a5985d5e588da1e1d11c85fba0ae132f8 refs/pull/10/head
> c8198f6c2c9fc529b25988dfaf5865bae5320cb5 refs/pull/10/merge
> d604669e32e847c2ba5010c89895dd707ba45f55 refs/pull/100/head
> 55ab0c9399879683b4cc6e1baea5dc41484ca52f refs/pull/100/merge
> 08d39e0bb5b9dbd16e9e4c2250e75848718c453b refs/pull/1000/head
>
> These are not kept under `refs/heads` because `refs/heads` belongs to
> the user, but it is generally useful to check them out in case of very
> large changes or changes with complex binary files which don't render
> well in the web interface (among other reasons) that might need to be
> inspected for code review. So I think this is a generally useful
> feature, although I agree that perhaps the commit message might explain
> the benefits in a more concrete way for those who don't already
> understand the utility of the feature (such as our illustrious
> maintainer).
I’ve seen a few times that a change is proposed with a commit message
that says that it allows you to do X. And plenty of motivation is
provided in a narrow (technical) sense of how X makes things nicer.
But without explaining why you’d want to do X. Then someone needs to
ask the the submitter why. Then they say that they need it at
<organization> to do something specific.
It’s certainly nice to have all that information in the commit message.
--
Kristoffer Haugsbakk
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH] fetch: use bundle URIs when having creationToken heuristic
@ 2024-07-22 8:07 Toon Claes
2024-09-27 9:04 ` [PATCH] builtin/clone: teach git-clone(1) the --ref= argument Toon Claes
0 siblings, 1 reply; 6+ messages in thread
From: Toon Claes @ 2024-07-22 8:07 UTC (permalink / raw)
To: git; +Cc: Toon Claes
At the moment, bundle URIs are only used on git-clone(1). For a clone
the use of bundle URI is trivial, because repository is empty so
downloading bundles will never result in downloading objects that are in
the repository already.
For git-fetch(1), this more complicated to use bundle URI. We want to
avoid downloading bundles that only contains objects that are in the
local repository already.
One way to achieve this is possible when the "creationToken" heuristic
is used for bundle URIs. With this heuristic, the server sends along a
creationToken for each bundle. When a client downloads bundles with such
creationToken, the highest known value is written to
`fetch.bundleCreationToken` in the git-config. The next time bundles are
advertised by the server, bundles with a lower creationToken value are
ignored. This was already implemented by 7903efb717 (bundle-uri:
download in creationToken order, 2023-01-31) in
fetch_bundles_by_token().
Using the creationToken heuristic is optional, but without it the
client has no idea which bundles are new and which only have objects the
client already has.
With this knowledge, make git-fetch(1) get bundle URI information from
the server, but only use bundle URIs if the creationToken heuristic is
used.
The code added in builtin/fetch.c is very similar to the code in
builtin/clone.c, so while at it make sure we always clean up the bundles
list advertised by the server.
Signed-off-by: Toon Claes <toon@iotcl.com>
---
builtin/clone.c | 13 +++++-----
builtin/fetch.c | 17 +++++++++++++
t/t5584-fetch-bundle-uri.sh | 49 +++++++++++++++++++++++++++++++++++++
3 files changed, 73 insertions(+), 6 deletions(-)
create mode 100755 t/t5584-fetch-bundle-uri.sh
diff --git a/builtin/clone.c b/builtin/clone.c
index af6017d41a..29f0470aea 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -1406,9 +1406,9 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
git_config_set_gently("fetch.bundleuri", bundle_uri);
} else {
/*
- * Populate transport->got_remote_bundle_uri and
- * transport->bundle_uri. We might get nothing.
- */
+ * Populate transport->bundles. We might get nothing or fail
+ * trying, but clone can continue without bundles as well.
+ */
transport_get_remote_bundle_uri(transport);
if (transport->bundles &&
@@ -1419,10 +1419,11 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
else if (fetch_bundle_list(the_repository,
transport->bundles))
warning(_("failed to fetch advertised bundles"));
- } else {
- clear_bundle_list(transport->bundles);
- FREE_AND_NULL(transport->bundles);
}
+
+ clear_bundle_list(transport->bundles);
+ if (transport->bundles)
+ FREE_AND_NULL(transport->bundles);
}
if (refs)
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 693f02b958..1e944f81af 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1694,6 +1694,23 @@ static int do_fetch(struct transport *transport,
retcode = 1;
}
+ /*
+ * Populate transport->bundles. We might get nothing or fail
+ * trying, but fetch can continue without bundles as well.
+ */
+ transport_get_remote_bundle_uri(transport);
+
+ if (transport->bundles &&
+ hashmap_get_size(&transport->bundles->bundles) &&
+ (transport->bundles->heuristic == BUNDLE_HEURISTIC_CREATIONTOKEN)) {
+ if (fetch_bundle_list(the_repository, transport->bundles))
+ warning(_("failed to fetch advertised bundles"));
+ }
+
+ clear_bundle_list(transport->bundles);
+ if (transport->bundles)
+ FREE_AND_NULL(transport->bundles);
+
if (fetch_and_consume_refs(&display_state, transport, transaction, ref_map,
&fetch_head, config)) {
retcode = 1;
diff --git a/t/t5584-fetch-bundle-uri.sh b/t/t5584-fetch-bundle-uri.sh
new file mode 100755
index 0000000000..6c2383646e
--- /dev/null
+++ b/t/t5584-fetch-bundle-uri.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+
+test_description='test use of bundle URI in "git fetch"'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+. ./test-lib.sh
+
+test_expect_success 'set up repos and bundles' '
+ git init source &&
+ test_commit -C source A &&
+ git clone --no-local source go-A-to-C &&
+ test_commit -C source B &&
+ git clone --no-local source go-B-to-C &&
+ git clone --no-local source go-B-to-D &&
+ git -C source bundle create B.bundle main &&
+ test_commit -C source C &&
+ git -C source bundle create B-to-C.bundle B..main &&
+ git -C source config uploadpack.advertiseBundleURIs true &&
+ git -C source config bundle.version 1 &&
+ git -C source config bundle.mode all &&
+ git -C source config bundle.heuristic creationToken &&
+ git -C source config bundle.bundle-B.uri "file://$(pwd)/source/B.bundle" &&
+ git -C source config bundle.bundle-B.creationToken 1 &&
+ git -C source config bundle.bundle-B-to-C.uri "file://$(pwd)/source/B-to-C.bundle" &&
+ git -C source config bundle.bundle-B-to-C.creationToken 2
+'
+
+test_expect_success 'fetches one bundle URI to get up-to-date' '
+ git -C go-B-to-C -c transfer.bundleURI=true fetch origin &&
+ test 1 = $(ls go-B-to-C/.git/objects/bundles | wc -l) &&
+ test 2 = $(git -C go-B-to-C config fetch.bundleCreationToken)
+'
+
+test_expect_success 'fetches two bundle URIs to get up-to-date' '
+ git -C go-A-to-C -c transfer.bundleURI=true fetch origin &&
+ test 2 = $(ls go-A-to-C/.git/objects/bundles | wc -l) &&
+ test 2 = $(git -C go-A-to-C config fetch.bundleCreationToken)
+'
+
+test_expect_success 'fetches one bundle URI and objects from remote' '
+ test_commit -C source D &&
+ git -C go-B-to-D -c transfer.bundleURI=true fetch origin &&
+ test 1 = $(ls go-B-to-D/.git/objects/bundles | wc -l) &&
+ test 2 = $(git -C go-B-to-D config fetch.bundleCreationToken)
+'
+
+test_done
--
2.45.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2024-09-27 20:10 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-09-27 8:54 [PATCH] builtin/clone: teach git-clone(1) the --ref= argument Toon Claes
2024-09-27 19:20 ` Junio C Hamano
2024-09-27 19:29 ` brian m. carlson
2024-09-27 20:05 ` Junio C Hamano
2024-09-27 20:10 ` Kristoffer Haugsbakk
-- strict thread matches above, loose matches on Subject: below --
2024-07-22 8:07 [PATCH] fetch: use bundle URIs when having creationToken heuristic Toon Claes
2024-09-27 9:04 ` [PATCH] builtin/clone: teach git-clone(1) the --ref= argument Toon Claes
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).