* [PATCH] remote: introduce config to set prefetch refs @ 2024-09-09 9:47 Shubham Kanodia via GitGitGadget 2024-09-09 9:51 ` Shubham Kanodia ` (2 more replies) 0 siblings, 3 replies; 27+ messages in thread From: Shubham Kanodia via GitGitGadget @ 2024-09-09 9:47 UTC (permalink / raw) To: git Cc: Patrick Steinhardt [ ], Junio C Hamano [ ], Derrick Stolee [ ], Shubham Kanodia, Shubham Kanodia From: Shubham Kanodia <shubham.kanodia10@gmail.com> This commit introduces a new configuration option, remote.<name>.prefetchref, which allows users to specify specific ref patterns to be prefetched during a git fetch --prefetch operation. The new option accepts a space-separated list of ref patterns. When the --prefetch option is used with git fetch, only the refs matching these patterns will be prefetched, instead of the default behavior of prefetching all fetchable refs. Example usage in .git/config: [remote "origin"] prefetchref = "refs/heads/main refs/heads/feature/*" This change allows users to optimize their prefetch operations, potentially reducing network traffic and improving performance for large repositories with many refs. Signed-off-by: Shubham Kanodia <shubham.kanodia10@gmail.com> --- remote: introduce config to set prefetch refs Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1782%2Fpastelsky%2Fsk%2Fremote-prefetchref-v1 Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1782/pastelsky/sk/remote-prefetchref-v1 Pull-Request: https://github.com/gitgitgadget/git/pull/1782 Documentation/config/remote.txt | 6 +++ builtin/fetch.c | 29 +++++++++++++- remote.c | 8 ++++ remote.h | 3 ++ t/t7900-maintenance.sh | 70 +++++++++++++++++++++++++++++++++ 5 files changed, 115 insertions(+), 1 deletion(-) diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt index 8efc53e836d..b25d76dd3b1 100644 --- a/Documentation/config/remote.txt +++ b/Documentation/config/remote.txt @@ -33,6 +33,12 @@ remote.<name>.fetch:: The default set of "refspec" for linkgit:git-fetch[1]. See linkgit:git-fetch[1]. +remote.<name>.prefetchref:: + Specify the refs to be prefetched when fetching from this remote. + The value is a space-separated list of ref patterns (e.g., "refs/heads/master refs/heads/develop*"). + These patterns are used as the source part of the refspecs for prefetching. + This can be used to optimize fetch operations by specifying exactly which refs should be prefetched. + remote.<name>.push:: The default set of "refspec" for linkgit:git-push[1]. See linkgit:git-push[1]. diff --git a/builtin/fetch.c b/builtin/fetch.c index b2b5aee5bf2..6e584fa2ebb 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -434,6 +434,30 @@ static void find_non_local_tags(const struct ref *refs, oidset_clear(&fetch_oids); } +static void apply_prefetch_refspec(struct remote *remote, struct refspec *rs) +{ + if (remote->prefetch_refs.nr > 0) { + int i; + for (i = 0; i < remote->prefetch_refs.nr; i++) { + const char *src = remote->prefetch_refs.items[i].string; + struct strbuf dst = STRBUF_INIT; + + strbuf_addf(&dst, "refs/prefetch/%s/", remote->name); + if (starts_with(src, "refs/heads/")) { + strbuf_addstr(&dst, src + 11); + } else if (starts_with(src, "refs/")) { + strbuf_addstr(&dst, src + 5); + } else { + strbuf_addstr(&dst, src); + } + + refspec_appendf(rs, "%s:%s", src, dst.buf); + strbuf_release(&dst); + } + } +} + + static void filter_prefetch_refspec(struct refspec *rs) { int i; @@ -502,8 +526,11 @@ static struct ref *get_ref_map(struct remote *remote, int existing_refs_populated = 0; filter_prefetch_refspec(rs); - if (remote) + if (remote) { filter_prefetch_refspec(&remote->fetch); + if (prefetch) + apply_prefetch_refspec(remote, rs); + } if (rs->nr) { struct refspec *fetch_refspec; diff --git a/remote.c b/remote.c index 8f3dee13186..b46d62b2c47 100644 --- a/remote.c +++ b/remote.c @@ -141,6 +141,7 @@ static struct remote *make_remote(struct remote_state *remote_state, ret->prune = -1; /* unspecified */ ret->prune_tags = -1; /* unspecified */ ret->name = xstrndup(name, len); + string_list_init_dup(&ret->prefetch_refs); refspec_init(&ret->push, REFSPEC_PUSH); refspec_init(&ret->fetch, REFSPEC_FETCH); @@ -166,6 +167,7 @@ static void remote_clear(struct remote *remote) free((char *)remote->uploadpack); FREE_AND_NULL(remote->http_proxy); FREE_AND_NULL(remote->http_proxy_authmethod); + string_list_clear(&remote->prefetch_refs, 0); } static void add_merge(struct branch *branch, const char *name) @@ -456,6 +458,12 @@ static int handle_config(const char *key, const char *value, remote->prune = git_config_bool(key, value); else if (!strcmp(subkey, "prunetags")) remote->prune_tags = git_config_bool(key, value); + else if (!strcmp(subkey, "prefetchref")) { + if (!value) + return config_error_nonbool(key); + string_list_split(&remote->prefetch_refs, value, ' ', -1); + return 0; + } else if (!strcmp(subkey, "url")) { if (!value) return config_error_nonbool(key); diff --git a/remote.h b/remote.h index b901b56746d..c18e68e0d8d 100644 --- a/remote.h +++ b/remote.h @@ -5,6 +5,7 @@ #include "hashmap.h" #include "refspec.h" #include "strvec.h" +#include "string-list.h" struct option; struct transport_ls_refs_options; @@ -77,6 +78,8 @@ struct remote { struct refspec fetch; + struct string_list prefetch_refs; + /* * The setting for whether to fetch tags (as a separate rule from the * configured refspecs); diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index abae7a97546..2ad5b922d83 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -245,6 +245,76 @@ test_expect_success 'prefetch multiple remotes' ' test_subcommand git fetch remote2 $fetchargs <skip-remote1.txt ' +test_expect_success 'prefetch only acts on remote.<name>.prefetchref refs if present' ' + test_create_repo prefetch-test-mixed-patterns && + ( + cd prefetch-test-mixed-patterns && + test_commit initial && + git clone . clone1 && + git clone . clone2 && + + git remote add remote1 "file://$(pwd)/clone1" && + git remote add remote2 "file://$(pwd)/clone2" && + + # Set single prefetchref pattern for remote1 and multiple for remote2 + git config remote.remote1.prefetchref "refs/heads/foo" && + git config remote.remote2.prefetchref "refs/heads/feature/* refs/heads/topic" && + + # Create branches in clone1 and push + ( + cd clone1 && + git checkout -b foo && + test_commit foo-commit && + git checkout -b feature/a && + test_commit feature-a-commit && + git checkout -b other && + test_commit other-commit && + git push origin foo feature/a other + ) && + + # Create branches in clone2 and push + ( + cd clone2 && + git checkout -b topic && + test_commit master-commit && + git checkout -b feature/x && + test_commit feature-x-commit && + git checkout -b feature/y && + test_commit feature-y-commit && + git checkout -b dev && + test_commit dev-commit && + git push origin topic feature/x feature/y dev + ) && + + # Run maintenance prefetch task + GIT_TRACE2_EVENT="$(pwd)/prefetch.txt" git maintenance run --task=prefetch 2>/dev/null && + + # Check that only specified refs were prefetched + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && + test_subcommand git fetch remote1 $fetchargs <prefetch.txt && + test_subcommand git fetch remote2 $fetchargs <prefetch.txt && + ls -R .git/refs/prefetch && + + # Verify that only specified refs are in the prefetch refs for remote1 + git rev-parse refs/prefetch/remotes/remote1/foo && + test_must_fail git rev-parse refs/prefetch/remotes/remote1/feature/a && + test_must_fail git rev-parse refs/prefetch/remotes/remote1/other && + + # Verify that only specified refs are in the prefetch refs for remote2 + git rev-parse refs/prefetch/remotes/remote2/feature/x && + git rev-parse refs/prefetch/remotes/remote2/feature/y && + git rev-parse refs/prefetch/remotes/remote2/topic && + test_must_fail git rev-parse refs/prefetch/remotes/remote2/dev && + + # Fetch all refs and compare + git fetch --all && + test_cmp_rev refs/remotes/remote1/foo refs/prefetch/remotes/remote1/foo && + test_cmp_rev refs/remotes/remote2/feature/x refs/prefetch/remotes/remote2/feature/x && + test_cmp_rev refs/remotes/remote2/feature/y refs/prefetch/remotes/remote2/feature/y && + test_cmp_rev refs/remotes/remote2/topic refs/prefetch/remotes/remote2/topic + ) +' + test_expect_success 'loose-objects task' ' # Repack everything so we know the state of the object dir git repack -adk && base-commit: 2e7b89e038c0c888acf61f1b4ee5a43d4dd5e94c -- gitgitgadget ^ permalink raw reply related [flat|nested] 27+ messages in thread
* Re: [PATCH] remote: introduce config to set prefetch refs 2024-09-09 9:47 [PATCH] remote: introduce config to set prefetch refs Shubham Kanodia via GitGitGadget @ 2024-09-09 9:51 ` Shubham Kanodia 2024-09-09 16:42 ` Junio C Hamano 2024-09-15 14:08 ` [PATCH v2] " Shubham Kanodia via GitGitGadget 2 siblings, 0 replies; 27+ messages in thread From: Shubham Kanodia @ 2024-09-09 9:51 UTC (permalink / raw) To: Shubham Kanodia via GitGitGadget Cc: git, Patrick Steinhardt [ ], Junio C Hamano [ ], Derrick Stolee [ ], Phillip Wood On Mon, Sep 9, 2024 at 3:17 PM Shubham Kanodia via GitGitGadget <gitgitgadget@gmail.com> wrote: > > From: Shubham Kanodia <shubham.kanodia10@gmail.com> > > This commit introduces a new configuration option, > remote.<name>.prefetchref, which allows users to specify specific > ref patterns to be prefetched during a git fetch --prefetch > operation. > > The new option accepts a space-separated list of ref patterns. > When the --prefetch option is used with git fetch, only the refs > matching these patterns will be prefetched, instead of the > default behavior of prefetching all fetchable refs. > > Example usage in .git/config: > [remote "origin"] > prefetchref = "refs/heads/main refs/heads/feature/*" > > This change allows users to optimize their prefetch operations, potentially > reducing network traffic and improving performance for large repositories > with many refs. > > Signed-off-by: Shubham Kanodia <shubham.kanodia10@gmail.com> > --- > remote: introduce config to set prefetch refs > > Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1782%2Fpastelsky%2Fsk%2Fremote-prefetchref-v1 > Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1782/pastelsky/sk/remote-prefetchref-v1 > Pull-Request: https://github.com/gitgitgadget/git/pull/1782 > > Documentation/config/remote.txt | 6 +++ > builtin/fetch.c | 29 +++++++++++++- > remote.c | 8 ++++ > remote.h | 3 ++ > t/t7900-maintenance.sh | 70 +++++++++++++++++++++++++++++++++ > 5 files changed, 115 insertions(+), 1 deletion(-) > > diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt > index 8efc53e836d..b25d76dd3b1 100644 > --- a/Documentation/config/remote.txt > +++ b/Documentation/config/remote.txt > @@ -33,6 +33,12 @@ remote.<name>.fetch:: > The default set of "refspec" for linkgit:git-fetch[1]. See > linkgit:git-fetch[1]. > > +remote.<name>.prefetchref:: > + Specify the refs to be prefetched when fetching from this remote. > + The value is a space-separated list of ref patterns (e.g., "refs/heads/master refs/heads/develop*"). > + These patterns are used as the source part of the refspecs for prefetching. > + This can be used to optimize fetch operations by specifying exactly which refs should be prefetched. > + > remote.<name>.push:: > The default set of "refspec" for linkgit:git-push[1]. See > linkgit:git-push[1]. > diff --git a/builtin/fetch.c b/builtin/fetch.c > index b2b5aee5bf2..6e584fa2ebb 100644 > --- a/builtin/fetch.c > +++ b/builtin/fetch.c > @@ -434,6 +434,30 @@ static void find_non_local_tags(const struct ref *refs, > oidset_clear(&fetch_oids); > } > > +static void apply_prefetch_refspec(struct remote *remote, struct refspec *rs) > +{ > + if (remote->prefetch_refs.nr > 0) { > + int i; > + for (i = 0; i < remote->prefetch_refs.nr; i++) { > + const char *src = remote->prefetch_refs.items[i].string; > + struct strbuf dst = STRBUF_INIT; > + > + strbuf_addf(&dst, "refs/prefetch/%s/", remote->name); > + if (starts_with(src, "refs/heads/")) { > + strbuf_addstr(&dst, src + 11); > + } else if (starts_with(src, "refs/")) { > + strbuf_addstr(&dst, src + 5); > + } else { > + strbuf_addstr(&dst, src); > + } > + > + refspec_appendf(rs, "%s:%s", src, dst.buf); > + strbuf_release(&dst); > + } > + } > +} > + > + > static void filter_prefetch_refspec(struct refspec *rs) > { > int i; > @@ -502,8 +526,11 @@ static struct ref *get_ref_map(struct remote *remote, > int existing_refs_populated = 0; > > filter_prefetch_refspec(rs); > - if (remote) > + if (remote) { > filter_prefetch_refspec(&remote->fetch); > + if (prefetch) > + apply_prefetch_refspec(remote, rs); > + } > > if (rs->nr) { > struct refspec *fetch_refspec; > diff --git a/remote.c b/remote.c > index 8f3dee13186..b46d62b2c47 100644 > --- a/remote.c > +++ b/remote.c > @@ -141,6 +141,7 @@ static struct remote *make_remote(struct remote_state *remote_state, > ret->prune = -1; /* unspecified */ > ret->prune_tags = -1; /* unspecified */ > ret->name = xstrndup(name, len); > + string_list_init_dup(&ret->prefetch_refs); > refspec_init(&ret->push, REFSPEC_PUSH); > refspec_init(&ret->fetch, REFSPEC_FETCH); > > @@ -166,6 +167,7 @@ static void remote_clear(struct remote *remote) > free((char *)remote->uploadpack); > FREE_AND_NULL(remote->http_proxy); > FREE_AND_NULL(remote->http_proxy_authmethod); > + string_list_clear(&remote->prefetch_refs, 0); > } > > static void add_merge(struct branch *branch, const char *name) > @@ -456,6 +458,12 @@ static int handle_config(const char *key, const char *value, > remote->prune = git_config_bool(key, value); > else if (!strcmp(subkey, "prunetags")) > remote->prune_tags = git_config_bool(key, value); > + else if (!strcmp(subkey, "prefetchref")) { > + if (!value) > + return config_error_nonbool(key); > + string_list_split(&remote->prefetch_refs, value, ' ', -1); > + return 0; > + } > else if (!strcmp(subkey, "url")) { > if (!value) > return config_error_nonbool(key); > diff --git a/remote.h b/remote.h > index b901b56746d..c18e68e0d8d 100644 > --- a/remote.h > +++ b/remote.h > @@ -5,6 +5,7 @@ > #include "hashmap.h" > #include "refspec.h" > #include "strvec.h" > +#include "string-list.h" > > struct option; > struct transport_ls_refs_options; > @@ -77,6 +78,8 @@ struct remote { > > struct refspec fetch; > > + struct string_list prefetch_refs; > + > /* > * The setting for whether to fetch tags (as a separate rule from the > * configured refspecs); > diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh > index abae7a97546..2ad5b922d83 100755 > --- a/t/t7900-maintenance.sh > +++ b/t/t7900-maintenance.sh > @@ -245,6 +245,76 @@ test_expect_success 'prefetch multiple remotes' ' > test_subcommand git fetch remote2 $fetchargs <skip-remote1.txt > ' > > +test_expect_success 'prefetch only acts on remote.<name>.prefetchref refs if present' ' > + test_create_repo prefetch-test-mixed-patterns && > + ( > + cd prefetch-test-mixed-patterns && > + test_commit initial && > + git clone . clone1 && > + git clone . clone2 && > + > + git remote add remote1 "file://$(pwd)/clone1" && > + git remote add remote2 "file://$(pwd)/clone2" && > + > + # Set single prefetchref pattern for remote1 and multiple for remote2 > + git config remote.remote1.prefetchref "refs/heads/foo" && > + git config remote.remote2.prefetchref "refs/heads/feature/* refs/heads/topic" && > + > + # Create branches in clone1 and push > + ( > + cd clone1 && > + git checkout -b foo && > + test_commit foo-commit && > + git checkout -b feature/a && > + test_commit feature-a-commit && > + git checkout -b other && > + test_commit other-commit && > + git push origin foo feature/a other > + ) && > + > + # Create branches in clone2 and push > + ( > + cd clone2 && > + git checkout -b topic && > + test_commit master-commit && > + git checkout -b feature/x && > + test_commit feature-x-commit && > + git checkout -b feature/y && > + test_commit feature-y-commit && > + git checkout -b dev && > + test_commit dev-commit && > + git push origin topic feature/x feature/y dev > + ) && > + > + # Run maintenance prefetch task > + GIT_TRACE2_EVENT="$(pwd)/prefetch.txt" git maintenance run --task=prefetch 2>/dev/null && > + > + # Check that only specified refs were prefetched > + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && > + test_subcommand git fetch remote1 $fetchargs <prefetch.txt && > + test_subcommand git fetch remote2 $fetchargs <prefetch.txt && > + ls -R .git/refs/prefetch && > + > + # Verify that only specified refs are in the prefetch refs for remote1 > + git rev-parse refs/prefetch/remotes/remote1/foo && > + test_must_fail git rev-parse refs/prefetch/remotes/remote1/feature/a && > + test_must_fail git rev-parse refs/prefetch/remotes/remote1/other && > + > + # Verify that only specified refs are in the prefetch refs for remote2 > + git rev-parse refs/prefetch/remotes/remote2/feature/x && > + git rev-parse refs/prefetch/remotes/remote2/feature/y && > + git rev-parse refs/prefetch/remotes/remote2/topic && > + test_must_fail git rev-parse refs/prefetch/remotes/remote2/dev && > + > + # Fetch all refs and compare > + git fetch --all && > + test_cmp_rev refs/remotes/remote1/foo refs/prefetch/remotes/remote1/foo && > + test_cmp_rev refs/remotes/remote2/feature/x refs/prefetch/remotes/remote2/feature/x && > + test_cmp_rev refs/remotes/remote2/feature/y refs/prefetch/remotes/remote2/feature/y && > + test_cmp_rev refs/remotes/remote2/topic refs/prefetch/remotes/remote2/topic > + ) > +' > + > test_expect_success 'loose-objects task' ' > # Repack everything so we know the state of the object dir > git repack -adk && > > base-commit: 2e7b89e038c0c888acf61f1b4ee5a43d4dd5e94c > -- > gitgitgadget This is a continuation of my work, as we deemed that adding a `remote.<remote-name>.prefetch` was unnecessary given that there were already ways to stop fetching refs from a given remote using `skipFetchAll`. Looking at getting early feedback on the direction & implementation here since this isn't as straightforward as my last contribution. I would appreciate any thoughts! The config's name is tentatively `prefetchref,` but I'm open to suggestions. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] remote: introduce config to set prefetch refs 2024-09-09 9:47 [PATCH] remote: introduce config to set prefetch refs Shubham Kanodia via GitGitGadget 2024-09-09 9:51 ` Shubham Kanodia @ 2024-09-09 16:42 ` Junio C Hamano 2024-09-09 18:21 ` Shubham Kanodia 2024-09-15 14:08 ` [PATCH v2] " Shubham Kanodia via GitGitGadget 2 siblings, 1 reply; 27+ messages in thread From: Junio C Hamano @ 2024-09-09 16:42 UTC (permalink / raw) To: Shubham Kanodia via GitGitGadget Cc: git, Patrick Steinhardt [ ], Derrick Stolee [ ], Shubham Kanodia "Shubham Kanodia via GitGitGadget" <gitgitgadget@gmail.com> writes: > +static void apply_prefetch_refspec(struct remote *remote, struct refspec *rs) > +{ > + if (remote->prefetch_refs.nr > 0) { > + int i; > + for (i = 0; i < remote->prefetch_refs.nr; i++) { > + const char *src = remote->prefetch_refs.items[i].string; > + struct strbuf dst = STRBUF_INIT; > + > + strbuf_addf(&dst, "refs/prefetch/%s/", remote->name); > + if (starts_with(src, "refs/heads/")) { > + strbuf_addstr(&dst, src + 11); > + } else if (starts_with(src, "refs/")) { > + strbuf_addstr(&dst, src + 5); > + } else { > + strbuf_addstr(&dst, src); > + } > + > + refspec_appendf(rs, "%s:%s", src, dst.buf); > + strbuf_release(&dst); > + } > + } > +} > static void filter_prefetch_refspec(struct refspec *rs) > { > int i; > @@ -502,8 +526,11 @@ static struct ref *get_ref_map(struct remote *remote, > int existing_refs_populated = 0; > > filter_prefetch_refspec(rs); > - if (remote) > + if (remote) { > filter_prefetch_refspec(&remote->fetch); > + if (prefetch) > + apply_prefetch_refspec(remote, rs); > + } Hmph, a separate helper function with a separate loop was something I did not expect to see. Looking at the filter_prefetch_refspec(), it already limits what it prefetched to what we usually fetch from the remote, and filteres out the tag namespace. I was hoping that this will _extend_ that existing mechanism, as if by default we have prefetch refspec "!refs/tags/*", which can be replaced by the configuration to say "!refs/tags/* !refs/changes/*", or positive ones like "refs/heads/*". The filtering semantics should be * a refspec whose src matches negated pattern (like !refs/tags/*) is rejected. * if the prefetch_refs has *only* positive patterns, then a refspec whose src does not match *any* of the pattern is rejected. * a refspec that is not rejected is prefetched. But the above still allows what filter_prefetch_refspec() does by default, without any way to narrow it down, and then adds its own entries. This is a half-tangent, but while studying for this topic, I noticed that filter_prefetch_refspec() triggers O(n) loop every time a fetch refspec is skipped. It all comes from 2e03115d (fetch: add --prefetch option, 2021-04-16) but rewriting the loop to use two pointers into the array seemed trivial and the result seemed a bit more readable. Your "further limit the prefetched refs with configuration" feature would probably replace this part of the updated code: + /* skip ones that do not store, or store in refs/tags */ + if (!rs->items[scan].dst || + (rs->items[scan].src && + starts_with(rs->items[scan].src, + ref_namespace[NAMESPACE_TAGS].ref))) { That is, instead of "skip ones that do not store, or store in refs/tags", filtering using the configured value (probably implemented as a helper function) would be used as the condition of the if statement. Thoughts? builtin/fetch.c | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git c/builtin/fetch.c w/builtin/fetch.c index 693f02b958..219302ed67 100644 --- c/builtin/fetch.c +++ w/builtin/fetch.c @@ -436,37 +436,38 @@ static void find_non_local_tags(const struct ref *refs, static void filter_prefetch_refspec(struct refspec *rs) { - int i; + int scan, store; if (!prefetch) return; - for (i = 0; i < rs->nr; i++) { + /* + * scan refspec items with 'scan', and decide to either + * mangle and store it in 'store', or omit it from the result. + */ + for (scan = store = 0; scan < rs->nr; scan++, store++) { struct strbuf new_dst = STRBUF_INIT; char *old_dst; const char *sub = NULL; - if (rs->items[i].negative) - continue; - if (!rs->items[i].dst || - (rs->items[i].src && - starts_with(rs->items[i].src, - ref_namespace[NAMESPACE_TAGS].ref))) { - int j; - - free(rs->items[i].src); - free(rs->items[i].dst); - - for (j = i + 1; j < rs->nr; j++) { - rs->items[j - 1] = rs->items[j]; - rs->raw[j - 1] = rs->raw[j]; - } - rs->nr--; - i--; + /* negative ones are kept as-is */ + if (rs->items[scan].negative) { + if (store != scan) + rs->items[store] = rs->items[scan]; continue; } - old_dst = rs->items[i].dst; + /* skip ones that do not store, or store in refs/tags */ + if (!rs->items[scan].dst || + (rs->items[scan].src && + starts_with(rs->items[scan].src, + ref_namespace[NAMESPACE_TAGS].ref))) { + refspec_item_clear(&rs->items[scan]); + store--; /* compensate for loop's auto increment */ + continue; + } + + old_dst = rs->items[scan].dst; strbuf_addstr(&new_dst, ref_namespace[NAMESPACE_PREFETCH].ref); /* @@ -478,11 +479,12 @@ static void filter_prefetch_refspec(struct refspec *rs) sub = old_dst; strbuf_addstr(&new_dst, sub); - rs->items[i].dst = strbuf_detach(&new_dst, NULL); - rs->items[i].force = 1; + rs->items[store].dst = strbuf_detach(&new_dst, NULL); + rs->items[store].force = 1; free(old_dst); } + rs->nr = store; } static struct ref *get_ref_map(struct remote *remote, ^ permalink raw reply related [flat|nested] 27+ messages in thread
* Re: [PATCH] remote: introduce config to set prefetch refs 2024-09-09 16:42 ` Junio C Hamano @ 2024-09-09 18:21 ` Shubham Kanodia 2024-09-09 18:33 ` Junio C Hamano 0 siblings, 1 reply; 27+ messages in thread From: Shubham Kanodia @ 2024-09-09 18:21 UTC (permalink / raw) To: Junio C Hamano Cc: Shubham Kanodia via GitGitGadget, git, Patrick Steinhardt [ ], Derrick Stolee [ ] On Mon, Sep 9, 2024 at 10:12 PM Junio C Hamano <gitster@pobox.com> wrote: > > "Shubham Kanodia via GitGitGadget" <gitgitgadget@gmail.com> writes: > > > +static void apply_prefetch_refspec(struct remote *remote, struct refspec *rs) > > +{ > > + if (remote->prefetch_refs.nr > 0) { > > + int i; > > + for (i = 0; i < remote->prefetch_refs.nr; i++) { > > + const char *src = remote->prefetch_refs.items[i].string; > > + struct strbuf dst = STRBUF_INIT; > > + > > + strbuf_addf(&dst, "refs/prefetch/%s/", remote->name); > > + if (starts_with(src, "refs/heads/")) { > > + strbuf_addstr(&dst, src + 11); > > + } else if (starts_with(src, "refs/")) { > > + strbuf_addstr(&dst, src + 5); > > + } else { > > + strbuf_addstr(&dst, src); > > + } > > + > > + refspec_appendf(rs, "%s:%s", src, dst.buf); > > + strbuf_release(&dst); > > + } > > + } > > +} > > static void filter_prefetch_refspec(struct refspec *rs) > > { > > int i; > > @@ -502,8 +526,11 @@ static struct ref *get_ref_map(struct remote *remote, > > int existing_refs_populated = 0; > > > > filter_prefetch_refspec(rs); > > - if (remote) > > + if (remote) { > > filter_prefetch_refspec(&remote->fetch); > > + if (prefetch) > > + apply_prefetch_refspec(remote, rs); > > + } > > Hmph, a separate helper function with a separate loop was something > I did not expect to see. Looking at the filter_prefetch_refspec(), > it already limits what it prefetched to what we usually fetch from > the remote, and filteres out the tag namespace. I was hoping that > this will _extend_ that existing mechanism, as if by default we > have prefetch refspec "!refs/tags/*", which can be replaced by the > configuration to say "!refs/tags/* !refs/changes/*", or positive > ones like "refs/heads/*". The filtering semantics should be > > * a refspec whose src matches negated pattern (like !refs/tags/*) > is rejected. > > * if the prefetch_refs has *only* positive patterns, then a refspec > whose src does not match *any* of the pattern is rejected. > > * a refspec that is not rejected is prefetched. > > But the above still allows what filter_prefetch_refspec() does by > default, without any way to narrow it down, and then adds its own > entries. > > This is a half-tangent, but while studying for this topic, I noticed > that filter_prefetch_refspec() triggers O(n) loop every time a fetch > refspec is skipped. > > It all comes from 2e03115d (fetch: add --prefetch option, > 2021-04-16) but rewriting the loop to use two pointers into the > array seemed trivial and the result seemed a bit more readable. > > Your "further limit the prefetched refs with configuration" feature > would probably replace this part of the updated code: > > + /* skip ones that do not store, or store in refs/tags */ > + if (!rs->items[scan].dst || > + (rs->items[scan].src && > + starts_with(rs->items[scan].src, > + ref_namespace[NAMESPACE_TAGS].ref))) { > > That is, instead of "skip ones that do not store, or store in refs/tags", > filtering using the configured value (probably implemented as a helper > function) would be used as the condition of the if statement. > > Thoughts? > > builtin/fetch.c | 46 ++++++++++++++++++++++++---------------------- > 1 file changed, 24 insertions(+), 22 deletions(-) > > diff --git c/builtin/fetch.c w/builtin/fetch.c > index 693f02b958..219302ed67 100644 > --- c/builtin/fetch.c > +++ w/builtin/fetch.c > @@ -436,37 +436,38 @@ static void find_non_local_tags(const struct ref *refs, > > static void filter_prefetch_refspec(struct refspec *rs) > { > - int i; > + int scan, store; > > if (!prefetch) > return; > > - for (i = 0; i < rs->nr; i++) { > + /* > + * scan refspec items with 'scan', and decide to either > + * mangle and store it in 'store', or omit it from the result. > + */ > + for (scan = store = 0; scan < rs->nr; scan++, store++) { > struct strbuf new_dst = STRBUF_INIT; > char *old_dst; > const char *sub = NULL; > > - if (rs->items[i].negative) > - continue; > - if (!rs->items[i].dst || > - (rs->items[i].src && > - starts_with(rs->items[i].src, > - ref_namespace[NAMESPACE_TAGS].ref))) { > - int j; > - > - free(rs->items[i].src); > - free(rs->items[i].dst); > - > - for (j = i + 1; j < rs->nr; j++) { > - rs->items[j - 1] = rs->items[j]; > - rs->raw[j - 1] = rs->raw[j]; > - } > - rs->nr--; > - i--; > + /* negative ones are kept as-is */ > + if (rs->items[scan].negative) { > + if (store != scan) > + rs->items[store] = rs->items[scan]; > continue; > } > > - old_dst = rs->items[i].dst; > + /* skip ones that do not store, or store in refs/tags */ > + if (!rs->items[scan].dst || > + (rs->items[scan].src && > + starts_with(rs->items[scan].src, > + ref_namespace[NAMESPACE_TAGS].ref))) { > + refspec_item_clear(&rs->items[scan]); > + store--; /* compensate for loop's auto increment */ > + continue; > + } > + > + old_dst = rs->items[scan].dst; > strbuf_addstr(&new_dst, ref_namespace[NAMESPACE_PREFETCH].ref); > > /* > @@ -478,11 +479,12 @@ static void filter_prefetch_refspec(struct refspec *rs) > sub = old_dst; > strbuf_addstr(&new_dst, sub); > > - rs->items[i].dst = strbuf_detach(&new_dst, NULL); > - rs->items[i].force = 1; > + rs->items[store].dst = strbuf_detach(&new_dst, NULL); > + rs->items[store].force = 1; > > free(old_dst); > } > + rs->nr = store; > } > > static struct ref *get_ref_map(struct remote *remote, How should we handle the related `remote.<remote-name>.fetch` config? In an earlier discussion, it was discussed that — `.prefetchref` should override `.fetch` completely (instead of patching it) which makes sense to me. At the moment my reading is that `filter_prefetch_refspec` still filters / modifies `remote->fetch`. ``` if (remote) { filter_prefetch_refspec(&remote->fetch); } ``` So we'll need to clear refspecs and re-populate perhaps? ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] remote: introduce config to set prefetch refs 2024-09-09 18:21 ` Shubham Kanodia @ 2024-09-09 18:33 ` Junio C Hamano 2024-09-13 6:16 ` Shubham Kanodia 0 siblings, 1 reply; 27+ messages in thread From: Junio C Hamano @ 2024-09-09 18:33 UTC (permalink / raw) To: Shubham Kanodia Cc: Shubham Kanodia via GitGitGadget, git, Patrick Steinhardt [ ], Derrick Stolee [ ] Shubham Kanodia <shubham.kanodia10@gmail.com> writes: > How should we handle the related `remote.<remote-name>.fetch` config? The get_ref_map() helper is what the rest of the code interacts with configured refspec. remote->fetch is handled there and goes through the same filter_prefetch_refspec(). > In an earlier discussion, it was discussed that — > `.prefetchref` should override `.fetch` completely (instead of > patching it) which makes sense to me. Maybe it made sense to you back when it was discussed, but after seeing the current code (before applying this patch), specifically what happens in filter_prefetch_refspec(), it no longer makes much sense to me. Especially it is nonsense to allow .prefetchref to widen the set of refs that are being prefetched beyond what is normally fetched, so we should look at a design that easily allows such a configuration with strong suspicion, I would think. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] remote: introduce config to set prefetch refs 2024-09-09 18:33 ` Junio C Hamano @ 2024-09-13 6:16 ` Shubham Kanodia 2024-09-13 16:58 ` Junio C Hamano 0 siblings, 1 reply; 27+ messages in thread From: Shubham Kanodia @ 2024-09-13 6:16 UTC (permalink / raw) To: Junio C Hamano Cc: Shubham Kanodia via GitGitGadget, git, Patrick Steinhardt [ ], Derrick Stolee [ ] On Tue, Sep 10, 2024 at 12:03 AM Junio C Hamano <gitster@pobox.com> wrote: > > Shubham Kanodia <shubham.kanodia10@gmail.com> writes: > > > How should we handle the related `remote.<remote-name>.fetch` config? > > The get_ref_map() helper is what the rest of the code interacts with > configured refspec. remote->fetch is handled there and goes through > the same filter_prefetch_refspec(). > > > In an earlier discussion, it was discussed that — > > `.prefetchref` should override `.fetch` completely (instead of > > patching it) which makes sense to me. > > Maybe it made sense to you back when it was discussed, but after > seeing the current code (before applying this patch), specifically > what happens in filter_prefetch_refspec(), it no longer makes much > sense to me. > > Especially it is nonsense to allow .prefetchref to widen the set of > refs that are being prefetched beyond what is normally fetched, so > we should look at a design that easily allows such a configuration > with strong suspicion, I would think. Ideally, a repository should be able to specify (say): remote.origin.fetch=+refs/heads/*:refs/remotes/origin/* remote.origin.prefetchref=refs/heads/main This configuration would maintain the normal behavior for fetches, but only prefetch the main branch. The rationale for this is that the main branch typically serves as the HEAD from which future branches will be forked in an active repository. Regarding: > If prefetch_refs contains only positive patterns, then a refspec whose source > doesn't match any of these patterns is rejected. Simply rejecting a source refspec pattern in `remote.<remote>.fetch` wouldn't achieve our goal here. Ideally, we'd need to create a subset of it. What we're looking for is essentially a pattern intersection between the (fetch) `refs/heads/*` and the (prefetchref) `refs/heads/main`, which in this case would result in `refs/heads/main`. However, if I understand correctly, performing such pattern intersections isn't straightforward in the `filter_prefetch_refspec` function (let me know if there' prior art for pattern intersection) I also believe that allowing negative refs might complicate things without providing a clear use case. For instance, how would we handle the intersection of `fetch` and `prefetchref` if `prefetchref` contained both positive and negative patterns? Given that both `fetch` and `prefetchref` could have multiple patterns, it might be simpler and more intuitive for users if we adopt an "A wins over B" approach. However, I'm interested in hearing your thoughts on this matter. Perhaps I should link to the earlier discussion here — Message ID (CAG=Um+1wTbXn_RN+LOCrpZpSNR_QF582PszxNyhz5anVHtBp+w@mail.gmail.com) ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] remote: introduce config to set prefetch refs 2024-09-13 6:16 ` Shubham Kanodia @ 2024-09-13 16:58 ` Junio C Hamano 2024-09-14 19:35 ` Shubham Kanodia 0 siblings, 1 reply; 27+ messages in thread From: Junio C Hamano @ 2024-09-13 16:58 UTC (permalink / raw) To: Shubham Kanodia Cc: Shubham Kanodia via GitGitGadget, git, Patrick Steinhardt [ ], Derrick Stolee [ ] Shubham Kanodia <shubham.kanodia10@gmail.com> writes: > Ideally, a repository should be able to specify (say): > > remote.origin.fetch=+refs/heads/*:refs/remotes/origin/* > remote.origin.prefetchref=refs/heads/main > > This configuration would maintain the normal behavior for fetches, but > only prefetch the main branch. > The rationale for this is that the main branch typically serves as the > HEAD from which future branches will be forked in an active > repository. Oh, that is 100% agreeable. All I wanted to caution you about was what should happen when remote.origin.prefetchref in the above is replaced to something like: [remote "origin"] fetch = +refs/heads/*:refs/remotes/origin/* prefetchref = refs/notes/* That is, if your refspec used for your real fetch (i.e. "git fetch" without the "--prefetch" option) does not fetch anything from "refs/notes/" hierarchy, prefetching from the hierarchy does not help the real fetch. I do not have a strong preference between marking it as an error and silently ignoring the prefetch but leaning towards the latter, and that is why my suggestion to implement this new "prefetchref" as something that extends the existing filter_prefetch_refspec(), which already filters out refspec that fetches from the refs/tags/ namespace (and the ones that do not store by having NULL in the .dst side). > Regarding: > >> If prefetch_refs contains only positive patterns, then a refspec whose source >> doesn't match any of these patterns is rejected. > > Simply rejecting a source refspec pattern in `remote.<remote>.fetch` > wouldn't achieve our goal here. I used the verb "reject" to mean "filter out", just like a refspec with left-hand-side that begins with "refs/tags/" is filtered out in the current filter_prefetch_refspec(). And that is exactly what we want to achieve our goal here. IOW, you would * read their ref advertisement, and pick only the ones that have a matching pattern in the left-hand-side of a remote.$name.fetch element. With a more recent protocol, remote.$name.fetch may have already participated in narrowing what they advertise to begin with, but the end result is the same. * give it to filter_prefetch_refspec(). * filter_prefetch_refspec() inspects the refspec elements, and rejects ones with no right-hand-side, and ones with left-hand-side that begin with refs/tags/. The current code without your patch already works this way up to this point. * We extend the above filtering so that in addition to the two kinds we currently reject, reject the ones that do not match the prefetchref criteria. This is what is needed to implement "prefetchref configuration limits the set of refs that get prefetched". And what you quoted is a beginning of how "prefetchref configuration limits". It cannot be "add to what filter_prefetch_refspec() did", like done by the implementation in the patch we are discussing. If your configuration were this: [remote "origin"] fetch = +refs/heads/*:refs/remotes/origin/* you would want a way to say things like (1) I want to prefetch everything I usually fetch (2) Among the ones I usually fetch, I only want to prefetch master and next branches. (3) I want to prefetch only refs/heads/jk/* branches, but not refs/heads/jk/wip/* branches. (4) I want to prefetch everything I usually fetch, except for refs/heads/wip/* branches. The case (1) is the simplest. You will leave .prefetchref empty. For the case (2), you would write something like [remote "origin"] prefetchref = refs/heads/master prefetchref = refs/heads/next So, when your prefetchref has all positive patterns, after the existing conditional in filter_prefetch_refspec() passes a refspec whose right-hand-side (i.e., .dst) is not NULL and whose left-hand-side (i.e., .src) does not begin with "refs/tags/", you further inspect and make sure it matches one of these prefetchref patterns. In example (2), if they advertised master, next, and seen branches, refs/heads/seen would be filtered out because it matches neither of the two patterns, so we would end up prefetching master and next branches. For the case (3), you would want to say something like [remote "origin"] prefetchref = refs/heads/jk/* prefetchref = !refs/heads/jk/wip/* Now your prefetchref has some negative pattern. When filtering what the existing conditional in filter_prefetch_refspec() passed, you'd inspect the refspec element and see if it matches any of the positive patterns, and also if it does not match any of the negative ones. refs/heads/next does not match any positive ones and gets rejected. refs/heads/jk/main does match the positive pattern 'refs/heads/jk/*', and it does not match the negative pattern 'refs/heads/jk/wip/*', so it passes and will get prefetched. For the case (4), you would write something like [remote "origin"] prefetchref = !refs/heads/wip/* There is no positive pattern, so if you blindly apply the rule you used for (3) above, everything will get rejected, which is not what you want. refs/heads/main does not match any positive patterns (because there are no positive patterns given), but it does not match any negative ones, so it passes and will get prefetched. The condition to implement the above four cases (which I think covers all the cases we care about, but I won't guarantee it is exhaustive---you'd need to sanity check) would be - If there is 1 or more positive prefetchref patterns, the refspec element must match one of them to be considered for the next rule. Otherwise, it will not be prefetched. - If the refspec element matches any of negative prefetchref patterns, it will not be prefetched. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] remote: introduce config to set prefetch refs 2024-09-13 16:58 ` Junio C Hamano @ 2024-09-14 19:35 ` Shubham Kanodia 2024-09-14 20:11 ` Junio C Hamano 0 siblings, 1 reply; 27+ messages in thread From: Shubham Kanodia @ 2024-09-14 19:35 UTC (permalink / raw) To: Junio C Hamano Cc: Shubham Kanodia via GitGitGadget, git, Patrick Steinhardt [ ], Derrick Stolee [ ] On Fri, Sep 13, 2024 at 10:28 PM Junio C Hamano <gitster@pobox.com> wrote: > > Shubham Kanodia <shubham.kanodia10@gmail.com> writes: > > > Ideally, a repository should be able to specify (say): > > > > remote.origin.fetch=+refs/heads/*:refs/remotes/origin/* > > remote.origin.prefetchref=refs/heads/main > > > > This configuration would maintain the normal behavior for fetches, but > > only prefetch the main branch. > > The rationale for this is that the main branch typically serves as the > > HEAD from which future branches will be forked in an active > > repository. > > Oh, that is 100% agreeable. All I wanted to caution you about was > what should happen when remote.origin.prefetchref in the above is > replaced to something like: > > [remote "origin"] > fetch = +refs/heads/*:refs/remotes/origin/* > prefetchref = refs/notes/* > > That is, if your refspec used for your real fetch (i.e. "git fetch" > without the "--prefetch" option) does not fetch anything from > "refs/notes/" hierarchy, prefetching from the hierarchy does not > help the real fetch. I do not have a strong preference between > marking it as an error and silently ignoring the prefetch but > leaning towards the latter, and that is why my suggestion to > implement this new "prefetchref" as something that extends the > existing filter_prefetch_refspec(), which already filters out > refspec that fetches from the refs/tags/ namespace (and the ones > that do not store by having NULL in the .dst side). > > > Regarding: > > > >> If prefetch_refs contains only positive patterns, then a refspec whose source > >> doesn't match any of these patterns is rejected. > > > > Simply rejecting a source refspec pattern in `remote.<remote>.fetch` > > wouldn't achieve our goal here. > > I used the verb "reject" to mean "filter out", just like a refspec > with left-hand-side that begins with "refs/tags/" is filtered out > in the current filter_prefetch_refspec(). And that is exactly what > we want to achieve our goal here. > > IOW, you would > > * read their ref advertisement, and pick only the ones that have a > matching pattern in the left-hand-side of a remote.$name.fetch > element. With a more recent protocol, remote.$name.fetch may > have already participated in narrowing what they advertise to > begin with, but the end result is the same. > > * give it to filter_prefetch_refspec(). > > * filter_prefetch_refspec() inspects the refspec elements, and > rejects ones with no right-hand-side, and ones with > left-hand-side that begin with refs/tags/. The current code > without your patch already works this way up to this point. > > * We extend the above filtering so that in addition to the two > kinds we currently reject, reject the ones that do not match the > prefetchref criteria. This is what is needed to implement > "prefetchref configuration limits the set of refs that get > prefetched". > > And what you quoted is a beginning of how "prefetchref configuration > limits". It cannot be "add to what filter_prefetch_refspec() did", > like done by the implementation in the patch we are discussing. > > If your configuration were this: > > [remote "origin"] > fetch = +refs/heads/*:refs/remotes/origin/* > > you would want a way to say things like > > (1) I want to prefetch everything I usually fetch > > (2) Among the ones I usually fetch, I only want to prefetch master > and next branches. > > (3) I want to prefetch only refs/heads/jk/* branches, but not > refs/heads/jk/wip/* branches. > > (4) I want to prefetch everything I usually fetch, except for > refs/heads/wip/* branches. > > The case (1) is the simplest. You will leave .prefetchref empty. > > For the case (2), you would write something like > > [remote "origin"] > prefetchref = refs/heads/master > prefetchref = refs/heads/next > > So, when your prefetchref has all positive patterns, after the > existing conditional in filter_prefetch_refspec() passes a refspec > whose right-hand-side (i.e., .dst) is not NULL and whose > left-hand-side (i.e., .src) does not begin with "refs/tags/", you > further inspect and make sure it matches one of these prefetchref > patterns. In example (2), if they advertised master, next, and seen > branches, refs/heads/seen would be filtered out because it matches > neither of the two patterns, so we would end up prefetching master > and next branches. > > For the case (3), you would want to say something like > > [remote "origin"] > prefetchref = refs/heads/jk/* > prefetchref = !refs/heads/jk/wip/* > > Now your prefetchref has some negative pattern. When filtering what > the existing conditional in filter_prefetch_refspec() passed, you'd > inspect the refspec element and see if it matches any of the > positive patterns, and also if it does not match any of the negative > ones. refs/heads/next does not match any positive ones and gets > rejected. refs/heads/jk/main does match the positive pattern > 'refs/heads/jk/*', and it does not match the negative pattern > 'refs/heads/jk/wip/*', so it passes and will get prefetched. > > For the case (4), you would write something like > > [remote "origin"] > prefetchref = !refs/heads/wip/* > > There is no positive pattern, so if you blindly apply the rule you > used for (3) above, everything will get rejected, which is not what > you want. refs/heads/main does not match any positive patterns > (because there are no positive patterns given), but it does not > match any negative ones, so it passes and will get prefetched. > > The condition to implement the above four cases (which I think > covers all the cases we care about, but I won't guarantee it is > exhaustive---you'd need to sanity check) would be > > - If there is 1 or more positive prefetchref patterns, the refspec > element must match one of them to be considered for the next > rule. Otherwise, it will not be prefetched. > > - If the refspec element matches any of negative prefetchref > patterns, it will not be prefetched. > If we're trying to determine if a pattern (remote.<remote>.prefetchref) is a subset of another or not (remote.<remote>.fetch) (to not accidentally expand the scope beyond `fetch`), we'd need a function that does that pattern-to-pattern. Are you aware of any existing functions that do so? I see `wildmatch`, but that is only used for pattern to full-text comparisons. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] remote: introduce config to set prefetch refs 2024-09-14 19:35 ` Shubham Kanodia @ 2024-09-14 20:11 ` Junio C Hamano 2024-09-15 14:06 ` Shubham Kanodia 2024-09-15 16:12 ` Junio C Hamano 0 siblings, 2 replies; 27+ messages in thread From: Junio C Hamano @ 2024-09-14 20:11 UTC (permalink / raw) To: Shubham Kanodia Cc: Shubham Kanodia via GitGitGadget, git, Patrick Steinhardt [ ], Derrick Stolee [ ] Shubham Kanodia <shubham.kanodia10@gmail.com> writes: > If we're trying to determine if a pattern > (remote.<remote>.prefetchref) is a subset of another or not > (remote.<remote>.fetch) (to not accidentally expand the scope beyond > `fetch`), > we'd need a function that does that pattern-to-pattern. Are you aware > of any existing functions that do so? There is no such computation for this application. Such a computation might become needed if you wanted to complain that the user gave .prefetchref pattern that would never match what .fetch patterns would allow to pass. But there is no such need. You will first get the advertised refs from the remote. Existing logic filteres them down to what matches configured remote.$name.fetch variable. filter_prefetch_refspec() may further reduces the result by removing those whose .src side begins with "refs/tags/". Now you only look at what survived the above existing filtering, and further narrow it down by picking only ones that match the prefetch condition. If the refspec that survived the filtering by the fetch refspec (and existing logic in filter_prefetch_refspec()) does not satisfy the prefetch condition, it won't be prefetched. Since you are using .prefetch ONLY TO narrow the result down, by definition, you are not adding anything what .fetch configuration would not have fetched. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] remote: introduce config to set prefetch refs 2024-09-14 20:11 ` Junio C Hamano @ 2024-09-15 14:06 ` Shubham Kanodia 2024-09-15 16:12 ` Junio C Hamano 1 sibling, 0 replies; 27+ messages in thread From: Shubham Kanodia @ 2024-09-15 14:06 UTC (permalink / raw) To: Junio C Hamano Cc: Shubham Kanodia via GitGitGadget, git, Patrick Steinhardt [ ], Derrick Stolee [ ] On Sun, Sep 15, 2024 at 1:41 AM Junio C Hamano <gitster@pobox.com> wrote: > > Shubham Kanodia <shubham.kanodia10@gmail.com> writes: > > > If we're trying to determine if a pattern > > (remote.<remote>.prefetchref) is a subset of another or not > > (remote.<remote>.fetch) (to not accidentally expand the scope beyond > > `fetch`), > > we'd need a function that does that pattern-to-pattern. Are you aware > > of any existing functions that do so? > > There is no such computation for this application. Such a > computation might become needed if you wanted to complain that the > user gave .prefetchref pattern that would never match what .fetch > patterns would allow to pass. But there is no such need. > > You will first get the advertised refs from the remote. > > Existing logic filteres them down to what matches configured > remote.$name.fetch variable. filter_prefetch_refspec() may further > reduces the result by removing those whose .src side begins with > "refs/tags/". > > Now you only look at what survived the above existing filtering, and > further narrow it down by picking only ones that match the prefetch > condition. If the refspec that survived the filtering by the fetch > refspec (and existing logic in filter_prefetch_refspec()) does not > satisfy the prefetch condition, it won't be prefetched. > > Since you are using .prefetch ONLY TO narrow the result down, by > definition, you are not adding anything what .fetch configuration > would not have fetched. > > Ah I see — I assumed you expected all filtering for `prefetch` (existing & new) to happen inside `filter_prefetch_refspec`. But that threw me off, because `filter_prefetch_refspec` doesn't deal with advertised refs from remote, and only patterns. Let me know if the diff in the following mail is closer to what you were expecting? ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] remote: introduce config to set prefetch refs 2024-09-14 20:11 ` Junio C Hamano 2024-09-15 14:06 ` Shubham Kanodia @ 2024-09-15 16:12 ` Junio C Hamano 2024-09-16 4:34 ` Shubham Kanodia 1 sibling, 1 reply; 27+ messages in thread From: Junio C Hamano @ 2024-09-15 16:12 UTC (permalink / raw) To: Shubham Kanodia Cc: Shubham Kanodia via GitGitGadget, git, Patrick Steinhardt [ ], Derrick Stolee [ ] Junio C Hamano <gitster@pobox.com> writes: > Existing logic filteres them down to what matches configured > remote.$name.fetch variable. filter_prefetch_refspec() may further > reduces the result by removing those whose .src side begins with > "refs/tags/". > > Now you only look at what survived the above existing filtering, and > further narrow it down by picking only ones that match the prefetch > condition. If the refspec that survived the filtering by the fetch > refspec (and existing logic in filter_prefetch_refspec()) does not > satisfy the prefetch condition, it won't be prefetched. Sorry, but I misread the code. By the time filter_prefetch_refspec() is called by get_ref_map(), this caller has "remote_refs" linked list that describes each ref it is going to fetch, so conceptually what is left for the prefetch logic to do is to selectively discard the elements on this list that are not worth asking the remote to send new object data for and use the remainder of the list in remote_refs list, and the logic to further limit this list with the prefetchref configuration would fit well here, but filter_prefetch_refspec() does not work on this list at all X-<. So the prefetchref limitation needs to come outside the function. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] remote: introduce config to set prefetch refs 2024-09-15 16:12 ` Junio C Hamano @ 2024-09-16 4:34 ` Shubham Kanodia 0 siblings, 0 replies; 27+ messages in thread From: Shubham Kanodia @ 2024-09-16 4:34 UTC (permalink / raw) To: Junio C Hamano Cc: Shubham Kanodia via GitGitGadget, git, Patrick Steinhardt [ ], Derrick Stolee [ ] On Sun, Sep 15, 2024 at 9:42 PM Junio C Hamano <gitster@pobox.com> wrote: > > Junio C Hamano <gitster@pobox.com> writes: > > > Existing logic filteres them down to what matches configured > > remote.$name.fetch variable. filter_prefetch_refspec() may further > > reduces the result by removing those whose .src side begins with > > "refs/tags/". > > > > Now you only look at what survived the above existing filtering, and > > further narrow it down by picking only ones that match the prefetch > > condition. If the refspec that survived the filtering by the fetch > > refspec (and existing logic in filter_prefetch_refspec()) does not > > satisfy the prefetch condition, it won't be prefetched. > > Sorry, but I misread the code. > > By the time filter_prefetch_refspec() is called by get_ref_map(), > this caller has "remote_refs" linked list that describes each ref it > is going to fetch, so conceptually what is left for the prefetch > logic to do is to selectively discard the elements on this list that > are not worth asking the remote to send new object data for and use > the remainder of the list in remote_refs list, and the logic to > further limit this list with the prefetchref configuration would fit > well here, but filter_prefetch_refspec() does not work on this list > at all X-<. So the prefetchref limitation needs to come outside the > function. Sure, can you take a look at the latest diff (shared in the last reply)? Moved the prefetchref filtering to `get_ref_map` where advertised refs are available. ^ permalink raw reply [flat|nested] 27+ messages in thread
* [PATCH v2] remote: introduce config to set prefetch refs 2024-09-09 9:47 [PATCH] remote: introduce config to set prefetch refs Shubham Kanodia via GitGitGadget 2024-09-09 9:51 ` Shubham Kanodia 2024-09-09 16:42 ` Junio C Hamano @ 2024-09-15 14:08 ` Shubham Kanodia via GitGitGadget 2024-09-19 10:23 ` [PATCH v3] " Shubham Kanodia via GitGitGadget 2 siblings, 1 reply; 27+ messages in thread From: Shubham Kanodia via GitGitGadget @ 2024-09-15 14:08 UTC (permalink / raw) To: git Cc: Patrick Steinhardt [ ], Junio C Hamano [ ], Derrick Stolee [ ], Shubham Kanodia, Shubham Kanodia From: Shubham Kanodia <shubham.kanodia10@gmail.com> This commit introduces a new configuration option, remote.<name>.prefetchref, which allows users to specify specific ref patterns to be prefetched during a git fetch --prefetch operation. The new option accepts a space-separated list of ref patterns. When the --prefetch option is used with git fetch, only the refs matching these patterns will be prefetched, instead of the default behavior of prefetching all fetchable refs. Example usage in .git/config: [remote "origin"] prefetchref = "refs/heads/main refs/heads/feature/*" This change allows users to optimize their prefetch operations, potentially reducing network traffic and improving performance for large repositories with many refs. Signed-off-by: Shubham Kanodia <shubham.kanodia10@gmail.com> --- remote: introduce config to set prefetch refs Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1782%2Fpastelsky%2Fsk%2Fremote-prefetchref-v2 Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1782/pastelsky/sk/remote-prefetchref-v2 Pull-Request: https://github.com/gitgitgadget/git/pull/1782 Range-diff vs v1: 1: 3113d8b8635 ! 1: f9f9e637bfa remote: introduce config to set prefetch refs @@ Documentation/config/remote.txt: remote.<name>.fetch:: linkgit:git-push[1]. ## builtin/fetch.c ## -@@ builtin/fetch.c: static void find_non_local_tags(const struct ref *refs, - oidset_clear(&fetch_oids); +@@ + #include "trace.h" + #include "trace2.h" + #include "bundle-uri.h" ++#include "wildmatch.h" + + #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000) + +@@ builtin/fetch.c: static void filter_prefetch_refspec(struct refspec *rs) + } } -+static void apply_prefetch_refspec(struct remote *remote, struct refspec *rs) ++static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs) +{ -+ if (remote->prefetch_refs.nr > 0) { -+ int i; -+ for (i = 0; i < remote->prefetch_refs.nr; i++) { -+ const char *src = remote->prefetch_refs.items[i].string; -+ struct strbuf dst = STRBUF_INIT; -+ -+ strbuf_addf(&dst, "refs/prefetch/%s/", remote->name); -+ if (starts_with(src, "refs/heads/")) { -+ strbuf_addstr(&dst, src + 11); -+ } else if (starts_with(src, "refs/")) { -+ strbuf_addstr(&dst, src + 5); -+ } else { -+ strbuf_addstr(&dst, src); -+ } ++ int i; ++ int has_positive = 0; ++ int matched_positive = 0; ++ int matched_negative = 0; ++ ++ for (i = 0; i < prefetch_refs->nr; i++) { ++ const char *pattern = prefetch_refs->items[i].string; ++ int is_negative = (*pattern == '!'); ++ ++ if (is_negative) ++ pattern++; ++ else ++ has_positive = 1; + -+ refspec_appendf(rs, "%s:%s", src, dst.buf); -+ strbuf_release(&dst); ++ if (wildmatch(pattern, refname, 0) == 0) { ++ if (is_negative) ++ matched_negative = 1; ++ else ++ matched_positive = 1; + } + } ++ ++ if (!has_positive) ++ return !matched_negative; ++ ++ return matched_positive && !matched_negative; +} + + - static void filter_prefetch_refspec(struct refspec *rs) - { - int i; ++static void ref_remove(struct ref **head, struct ref *to_remove) ++{ ++ struct ref **pp, *p; ++ ++ for (pp = head; (p = *pp) != NULL; pp = &p->next) { ++ if (p == to_remove) { ++ *pp = p->next; ++ return; ++ } ++ } ++} ++ + static struct ref *get_ref_map(struct remote *remote, + const struct ref *remote_refs, + struct refspec *rs, @@ builtin/fetch.c: static struct ref *get_ref_map(struct remote *remote, int existing_refs_populated = 0; filter_prefetch_refspec(rs); -- if (remote) -+ if (remote) { ++ + if (remote) filter_prefetch_refspec(&remote->fetch); -+ if (prefetch) -+ apply_prefetch_refspec(remote, rs); + +@@ builtin/fetch.c: static struct ref *get_ref_map(struct remote *remote, + else + ref_map = apply_negative_refspecs(ref_map, &remote->fetch); + ++ /** ++ * Filter out advertised refs that we don't want to fetch during ++ * prefetch if a prefetchref config is set ++ */ ++ if (prefetch && remote->prefetch_refs.nr) { ++ struct ref *ref, *next; ++ for (ref = ref_map; ref; ref = next) { ++ next = ref->next; ++ ++ if (!matches_prefetch_refs(ref->name, &remote->prefetch_refs)) { ++ ref_remove(&ref_map, ref); ++ free_one_ref(ref); ++ } ++ } + } ++ + ref_map = ref_remove_duplicates(ref_map); - if (rs->nr) { - struct refspec *fetch_refspec; + for (rm = ref_map; rm; rm = rm->next) { ## remote.c ## @@ remote.c: static struct remote *make_remote(struct remote_state *remote_state, @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' ' test_subcommand git fetch remote2 $fetchargs <skip-remote1.txt ' -+test_expect_success 'prefetch only acts on remote.<name>.prefetchref refs if present' ' -+ test_create_repo prefetch-test-mixed-patterns && ++test_expect_success 'prefetch with positive prefetch ref patterns' ' ++ test_create_repo filter-prefetch-positive && + ( -+ cd prefetch-test-mixed-patterns && ++ cd filter-prefetch-positive && + test_commit initial && -+ git clone . clone1 && + git clone . clone2 && -+ -+ git remote add remote1 "file://$(pwd)/clone1" && + git remote add remote2 "file://$(pwd)/clone2" && + -+ # Set single prefetchref pattern for remote1 and multiple for remote2 -+ git config remote.remote1.prefetchref "refs/heads/foo" && -+ git config remote.remote2.prefetchref "refs/heads/feature/* refs/heads/topic" && -+ -+ # Create branches in clone1 and push -+ ( -+ cd clone1 && -+ git checkout -b foo && -+ test_commit foo-commit && -+ git checkout -b feature/a && -+ test_commit feature-a-commit && -+ git checkout -b other && -+ test_commit other-commit && -+ git push origin foo feature/a other -+ ) && -+ -+ # Create branches in clone2 and push -+ ( -+ cd clone2 && -+ git checkout -b topic && -+ test_commit master-commit && -+ git checkout -b feature/x && -+ test_commit feature-x-commit && -+ git checkout -b feature/y && -+ test_commit feature-y-commit && -+ git checkout -b dev && -+ test_commit dev-commit && -+ git push origin topic feature/x feature/y dev -+ ) && -+ -+ # Run maintenance prefetch task -+ GIT_TRACE2_EVENT="$(pwd)/prefetch.txt" git maintenance run --task=prefetch 2>/dev/null && -+ -+ # Check that only specified refs were prefetched ++ cd clone2 && ++ git checkout -b feature && test_commit feature-commit-2 && ++ git checkout -b wip/test && test_commit wip-test-commit-2 && ++ git checkout -b topic/x && test_commit topic-x-commit-2 && ++ git push -f origin feature wip/test topic/x&& ++ cd .. && ++ ++ git config remote.remote2.prefetchref "refs/heads/feature" && + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && -+ test_subcommand git fetch remote1 $fetchargs <prefetch.txt && -+ test_subcommand git fetch remote2 $fetchargs <prefetch.txt && -+ ls -R .git/refs/prefetch && -+ -+ # Verify that only specified refs are in the prefetch refs for remote1 -+ git rev-parse refs/prefetch/remotes/remote1/foo && -+ test_must_fail git rev-parse refs/prefetch/remotes/remote1/feature/a && -+ test_must_fail git rev-parse refs/prefetch/remotes/remote1/other && -+ -+ # Verify that only specified refs are in the prefetch refs for remote2 -+ git rev-parse refs/prefetch/remotes/remote2/feature/x && -+ git rev-parse refs/prefetch/remotes/remote2/feature/y && -+ git rev-parse refs/prefetch/remotes/remote2/topic && -+ test_must_fail git rev-parse refs/prefetch/remotes/remote2/dev && -+ -+ # Fetch all refs and compare -+ git fetch --all && -+ test_cmp_rev refs/remotes/remote1/foo refs/prefetch/remotes/remote1/foo && -+ test_cmp_rev refs/remotes/remote2/feature/x refs/prefetch/remotes/remote2/feature/x && -+ test_cmp_rev refs/remotes/remote2/feature/y refs/prefetch/remotes/remote2/feature/y && -+ test_cmp_rev refs/remotes/remote2/topic refs/prefetch/remotes/remote2/topic ++ GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" git maintenance run --task=prefetch 2>/dev/null && ++ test_subcommand git fetch remote2 $fetchargs <prefetch-positive.txt && ++ ++ git rev-parse refs/prefetch/remotes/remote2/feature && ++ test_must_fail git rev-parse refs/prefetch/remotes/remote2/wip/test && ++ test_must_fail git rev-parse refs/prefetch/remotes/remote2/topic/x ++ ) ++' ++ ++test_expect_success 'prefetch with negative prefetch ref patterns' ' ++ test_create_repo filter-prefetch-negative && ++ ( ++ cd filter-prefetch-negative && ++ test_commit initial && ++ git clone . clone3 && ++ git remote add remote3 "file://$(pwd)/clone3" && ++ cat .git/config && ++ ++ cd clone3 && ++ git checkout -b feature && test_commit feature-commit-3 && ++ git checkout -b wip/test && test_commit wip-test-commit-3 && ++ git checkout -b topic/x && test_commit topic-x-commit-3 && ++ git push -f origin feature wip/test topic/x && ++ cd .. && ++ ++ git config remote.remote3.prefetchref "!refs/heads/wip/*" && ++ fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && ++ GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" git maintenance run --task=prefetch 2>/dev/null && ++ test_subcommand git fetch remote3 $fetchargs <prefetch-negative.txt && ++ git rev-parse refs/prefetch/remotes/remote3/feature && ++ git rev-parse refs/prefetch/remotes/remote3/topic/x && ++ test_must_fail git rev-parse refs/prefetch/remotes/remote3/wip/test ++ ) ++' ++ ++test_expect_success 'prefetch with positive & negative prefetch ref patterns' ' ++ test_create_repo filter-prefetch-mixed && ++ ( ++ cd filter-prefetch-mixed && ++ test_commit initial && ++ git clone . clone4 && ++ git remote add remote4 "file://$(pwd)/clone4" && ++ ++ cd clone4 && ++ git checkout -b feature && test_commit feature-commit-4 && ++ git checkout -b topic/x && test_commit topic-x-commit-4 && ++ git checkout -b topic/y && test_commit topic-y-commit-4 && ++ git push -f origin feature topic/x topic/y && ++ cd .. && ++ ++ git config remote.remote4.prefetchref "refs/heads/topic/* !refs/heads/topic/y" && ++ # git config --add remote.remote4.prefetchref "!refs/topic/y" && ++ cat .git/config && ++ fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && ++ GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" git maintenance run --task=prefetch 2>/dev/null && ++ test_subcommand git fetch remote4 $fetchargs <prefetch-mixed.txt && ++ ++ test_must_fail git rev-parse refs/prefetch/remotes/remote4/feature && ++ test_must_fail git rev-parse refs/prefetch/remotes/remote4/topic/y && ++ git rev-parse refs/prefetch/remotes/remote4/topic/x + ) +' + Documentation/config/remote.txt | 6 +++ builtin/fetch.c | 61 +++++++++++++++++++++++++ remote.c | 8 ++++ remote.h | 3 ++ t/t7900-maintenance.sh | 80 +++++++++++++++++++++++++++++++++ 5 files changed, 158 insertions(+) diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt index 8efc53e836d..b25d76dd3b1 100644 --- a/Documentation/config/remote.txt +++ b/Documentation/config/remote.txt @@ -33,6 +33,12 @@ remote.<name>.fetch:: The default set of "refspec" for linkgit:git-fetch[1]. See linkgit:git-fetch[1]. +remote.<name>.prefetchref:: + Specify the refs to be prefetched when fetching from this remote. + The value is a space-separated list of ref patterns (e.g., "refs/heads/master refs/heads/develop*"). + These patterns are used as the source part of the refspecs for prefetching. + This can be used to optimize fetch operations by specifying exactly which refs should be prefetched. + remote.<name>.push:: The default set of "refspec" for linkgit:git-push[1]. See linkgit:git-push[1]. diff --git a/builtin/fetch.c b/builtin/fetch.c index b2b5aee5bf2..16c8a31c2e1 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -38,6 +38,7 @@ #include "trace.h" #include "trace2.h" #include "bundle-uri.h" +#include "wildmatch.h" #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000) @@ -485,6 +486,49 @@ static void filter_prefetch_refspec(struct refspec *rs) } } +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs) +{ + int i; + int has_positive = 0; + int matched_positive = 0; + int matched_negative = 0; + + for (i = 0; i < prefetch_refs->nr; i++) { + const char *pattern = prefetch_refs->items[i].string; + int is_negative = (*pattern == '!'); + + if (is_negative) + pattern++; + else + has_positive = 1; + + if (wildmatch(pattern, refname, 0) == 0) { + if (is_negative) + matched_negative = 1; + else + matched_positive = 1; + } + } + + if (!has_positive) + return !matched_negative; + + return matched_positive && !matched_negative; +} + + +static void ref_remove(struct ref **head, struct ref *to_remove) +{ + struct ref **pp, *p; + + for (pp = head; (p = *pp) != NULL; pp = &p->next) { + if (p == to_remove) { + *pp = p->next; + return; + } + } +} + static struct ref *get_ref_map(struct remote *remote, const struct ref *remote_refs, struct refspec *rs, @@ -502,6 +546,7 @@ static struct ref *get_ref_map(struct remote *remote, int existing_refs_populated = 0; filter_prefetch_refspec(rs); + if (remote) filter_prefetch_refspec(&remote->fetch); @@ -610,6 +655,22 @@ static struct ref *get_ref_map(struct remote *remote, else ref_map = apply_negative_refspecs(ref_map, &remote->fetch); + /** + * Filter out advertised refs that we don't want to fetch during + * prefetch if a prefetchref config is set + */ + if (prefetch && remote->prefetch_refs.nr) { + struct ref *ref, *next; + for (ref = ref_map; ref; ref = next) { + next = ref->next; + + if (!matches_prefetch_refs(ref->name, &remote->prefetch_refs)) { + ref_remove(&ref_map, ref); + free_one_ref(ref); + } + } + } + ref_map = ref_remove_duplicates(ref_map); for (rm = ref_map; rm; rm = rm->next) { diff --git a/remote.c b/remote.c index 8f3dee13186..b46d62b2c47 100644 --- a/remote.c +++ b/remote.c @@ -141,6 +141,7 @@ static struct remote *make_remote(struct remote_state *remote_state, ret->prune = -1; /* unspecified */ ret->prune_tags = -1; /* unspecified */ ret->name = xstrndup(name, len); + string_list_init_dup(&ret->prefetch_refs); refspec_init(&ret->push, REFSPEC_PUSH); refspec_init(&ret->fetch, REFSPEC_FETCH); @@ -166,6 +167,7 @@ static void remote_clear(struct remote *remote) free((char *)remote->uploadpack); FREE_AND_NULL(remote->http_proxy); FREE_AND_NULL(remote->http_proxy_authmethod); + string_list_clear(&remote->prefetch_refs, 0); } static void add_merge(struct branch *branch, const char *name) @@ -456,6 +458,12 @@ static int handle_config(const char *key, const char *value, remote->prune = git_config_bool(key, value); else if (!strcmp(subkey, "prunetags")) remote->prune_tags = git_config_bool(key, value); + else if (!strcmp(subkey, "prefetchref")) { + if (!value) + return config_error_nonbool(key); + string_list_split(&remote->prefetch_refs, value, ' ', -1); + return 0; + } else if (!strcmp(subkey, "url")) { if (!value) return config_error_nonbool(key); diff --git a/remote.h b/remote.h index b901b56746d..c18e68e0d8d 100644 --- a/remote.h +++ b/remote.h @@ -5,6 +5,7 @@ #include "hashmap.h" #include "refspec.h" #include "strvec.h" +#include "string-list.h" struct option; struct transport_ls_refs_options; @@ -77,6 +78,8 @@ struct remote { struct refspec fetch; + struct string_list prefetch_refs; + /* * The setting for whether to fetch tags (as a separate rule from the * configured refspecs); diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index abae7a97546..5b64257eb7d 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -245,6 +245,86 @@ test_expect_success 'prefetch multiple remotes' ' test_subcommand git fetch remote2 $fetchargs <skip-remote1.txt ' +test_expect_success 'prefetch with positive prefetch ref patterns' ' + test_create_repo filter-prefetch-positive && + ( + cd filter-prefetch-positive && + test_commit initial && + git clone . clone2 && + git remote add remote2 "file://$(pwd)/clone2" && + + cd clone2 && + git checkout -b feature && test_commit feature-commit-2 && + git checkout -b wip/test && test_commit wip-test-commit-2 && + git checkout -b topic/x && test_commit topic-x-commit-2 && + git push -f origin feature wip/test topic/x&& + cd .. && + + git config remote.remote2.prefetchref "refs/heads/feature" && + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && + GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" git maintenance run --task=prefetch 2>/dev/null && + test_subcommand git fetch remote2 $fetchargs <prefetch-positive.txt && + + git rev-parse refs/prefetch/remotes/remote2/feature && + test_must_fail git rev-parse refs/prefetch/remotes/remote2/wip/test && + test_must_fail git rev-parse refs/prefetch/remotes/remote2/topic/x + ) +' + +test_expect_success 'prefetch with negative prefetch ref patterns' ' + test_create_repo filter-prefetch-negative && + ( + cd filter-prefetch-negative && + test_commit initial && + git clone . clone3 && + git remote add remote3 "file://$(pwd)/clone3" && + cat .git/config && + + cd clone3 && + git checkout -b feature && test_commit feature-commit-3 && + git checkout -b wip/test && test_commit wip-test-commit-3 && + git checkout -b topic/x && test_commit topic-x-commit-3 && + git push -f origin feature wip/test topic/x && + cd .. && + + git config remote.remote3.prefetchref "!refs/heads/wip/*" && + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && + GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" git maintenance run --task=prefetch 2>/dev/null && + test_subcommand git fetch remote3 $fetchargs <prefetch-negative.txt && + git rev-parse refs/prefetch/remotes/remote3/feature && + git rev-parse refs/prefetch/remotes/remote3/topic/x && + test_must_fail git rev-parse refs/prefetch/remotes/remote3/wip/test + ) +' + +test_expect_success 'prefetch with positive & negative prefetch ref patterns' ' + test_create_repo filter-prefetch-mixed && + ( + cd filter-prefetch-mixed && + test_commit initial && + git clone . clone4 && + git remote add remote4 "file://$(pwd)/clone4" && + + cd clone4 && + git checkout -b feature && test_commit feature-commit-4 && + git checkout -b topic/x && test_commit topic-x-commit-4 && + git checkout -b topic/y && test_commit topic-y-commit-4 && + git push -f origin feature topic/x topic/y && + cd .. && + + git config remote.remote4.prefetchref "refs/heads/topic/* !refs/heads/topic/y" && + # git config --add remote.remote4.prefetchref "!refs/topic/y" && + cat .git/config && + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && + GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" git maintenance run --task=prefetch 2>/dev/null && + test_subcommand git fetch remote4 $fetchargs <prefetch-mixed.txt && + + test_must_fail git rev-parse refs/prefetch/remotes/remote4/feature && + test_must_fail git rev-parse refs/prefetch/remotes/remote4/topic/y && + git rev-parse refs/prefetch/remotes/remote4/topic/x + ) +' + test_expect_success 'loose-objects task' ' # Repack everything so we know the state of the object dir git repack -adk && base-commit: 2e7b89e038c0c888acf61f1b4ee5a43d4dd5e94c -- gitgitgadget ^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH v3] remote: introduce config to set prefetch refs 2024-09-15 14:08 ` [PATCH v2] " Shubham Kanodia via GitGitGadget @ 2024-09-19 10:23 ` Shubham Kanodia via GitGitGadget 2024-09-23 21:24 ` Junio C Hamano 2024-10-04 20:21 ` [PATCH v4] remote: allow specifying refs to prefetch Shubham Kanodia via GitGitGadget 0 siblings, 2 replies; 27+ messages in thread From: Shubham Kanodia via GitGitGadget @ 2024-09-19 10:23 UTC (permalink / raw) To: git Cc: Patrick Steinhardt [ ], Junio C Hamano [ ], Derrick Stolee [ ], Shubham Kanodia, Shubham Kanodia From: Shubham Kanodia <shubham.kanodia10@gmail.com> This commit introduces a new configuration option, remote.<name>.prefetchref, which allows users to specify specific ref patterns to be prefetched during a git fetch --prefetch operation. The new option accepts a space-separated list of ref patterns. When the --prefetch option is used with git fetch, only the refs matching these patterns will be prefetched, instead of the default behavior of prefetching all fetchable refs. Example usage in .git/config: [remote "origin"] prefetchref = "refs/heads/main refs/heads/feature/*" This change allows users to optimize their prefetch operations, potentially reducing network traffic and improving performance for large repositories with many refs. Signed-off-by: Shubham Kanodia <shubham.kanodia10@gmail.com> --- remote: introduce config to set prefetch refs Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1782%2Fpastelsky%2Fsk%2Fremote-prefetchref-v3 Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1782/pastelsky/sk/remote-prefetchref-v3 Pull-Request: https://github.com/gitgitgadget/git/pull/1782 Range-diff vs v2: 1: f9f9e637bfa ! 1: 717d5957c47 remote: introduce config to set prefetch refs @@ Documentation/config/remote.txt: remote.<name>.fetch:: +remote.<name>.prefetchref:: + Specify the refs to be prefetched when fetching from this remote. -+ The value is a space-separated list of ref patterns (e.g., "refs/heads/master refs/heads/develop*"). -+ These patterns are used as the source part of the refspecs for prefetching. ++ The value is a space-separated list of ref patterns (e.g., "refs/heads/master !refs/heads/develop*"). + This can be used to optimize fetch operations by specifying exactly which refs should be prefetched. + remote.<name>.push:: @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' ' + cd .. && + + git config remote.remote4.prefetchref "refs/heads/topic/* !refs/heads/topic/y" && -+ # git config --add remote.remote4.prefetchref "!refs/topic/y" && -+ cat .git/config && + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && + GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" git maintenance run --task=prefetch 2>/dev/null && + test_subcommand git fetch remote4 $fetchargs <prefetch-mixed.txt && Documentation/config/remote.txt | 5 +++ builtin/fetch.c | 61 ++++++++++++++++++++++++++ remote.c | 8 ++++ remote.h | 3 ++ t/t7900-maintenance.sh | 78 +++++++++++++++++++++++++++++++++ 5 files changed, 155 insertions(+) diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt index 8efc53e836d..b04ee0c4c22 100644 --- a/Documentation/config/remote.txt +++ b/Documentation/config/remote.txt @@ -33,6 +33,11 @@ remote.<name>.fetch:: The default set of "refspec" for linkgit:git-fetch[1]. See linkgit:git-fetch[1]. +remote.<name>.prefetchref:: + Specify the refs to be prefetched when fetching from this remote. + The value is a space-separated list of ref patterns (e.g., "refs/heads/master !refs/heads/develop*"). + This can be used to optimize fetch operations by specifying exactly which refs should be prefetched. + remote.<name>.push:: The default set of "refspec" for linkgit:git-push[1]. See linkgit:git-push[1]. diff --git a/builtin/fetch.c b/builtin/fetch.c index b2b5aee5bf2..16c8a31c2e1 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -38,6 +38,7 @@ #include "trace.h" #include "trace2.h" #include "bundle-uri.h" +#include "wildmatch.h" #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000) @@ -485,6 +486,49 @@ static void filter_prefetch_refspec(struct refspec *rs) } } +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs) +{ + int i; + int has_positive = 0; + int matched_positive = 0; + int matched_negative = 0; + + for (i = 0; i < prefetch_refs->nr; i++) { + const char *pattern = prefetch_refs->items[i].string; + int is_negative = (*pattern == '!'); + + if (is_negative) + pattern++; + else + has_positive = 1; + + if (wildmatch(pattern, refname, 0) == 0) { + if (is_negative) + matched_negative = 1; + else + matched_positive = 1; + } + } + + if (!has_positive) + return !matched_negative; + + return matched_positive && !matched_negative; +} + + +static void ref_remove(struct ref **head, struct ref *to_remove) +{ + struct ref **pp, *p; + + for (pp = head; (p = *pp) != NULL; pp = &p->next) { + if (p == to_remove) { + *pp = p->next; + return; + } + } +} + static struct ref *get_ref_map(struct remote *remote, const struct ref *remote_refs, struct refspec *rs, @@ -502,6 +546,7 @@ static struct ref *get_ref_map(struct remote *remote, int existing_refs_populated = 0; filter_prefetch_refspec(rs); + if (remote) filter_prefetch_refspec(&remote->fetch); @@ -610,6 +655,22 @@ static struct ref *get_ref_map(struct remote *remote, else ref_map = apply_negative_refspecs(ref_map, &remote->fetch); + /** + * Filter out advertised refs that we don't want to fetch during + * prefetch if a prefetchref config is set + */ + if (prefetch && remote->prefetch_refs.nr) { + struct ref *ref, *next; + for (ref = ref_map; ref; ref = next) { + next = ref->next; + + if (!matches_prefetch_refs(ref->name, &remote->prefetch_refs)) { + ref_remove(&ref_map, ref); + free_one_ref(ref); + } + } + } + ref_map = ref_remove_duplicates(ref_map); for (rm = ref_map; rm; rm = rm->next) { diff --git a/remote.c b/remote.c index 8f3dee13186..b46d62b2c47 100644 --- a/remote.c +++ b/remote.c @@ -141,6 +141,7 @@ static struct remote *make_remote(struct remote_state *remote_state, ret->prune = -1; /* unspecified */ ret->prune_tags = -1; /* unspecified */ ret->name = xstrndup(name, len); + string_list_init_dup(&ret->prefetch_refs); refspec_init(&ret->push, REFSPEC_PUSH); refspec_init(&ret->fetch, REFSPEC_FETCH); @@ -166,6 +167,7 @@ static void remote_clear(struct remote *remote) free((char *)remote->uploadpack); FREE_AND_NULL(remote->http_proxy); FREE_AND_NULL(remote->http_proxy_authmethod); + string_list_clear(&remote->prefetch_refs, 0); } static void add_merge(struct branch *branch, const char *name) @@ -456,6 +458,12 @@ static int handle_config(const char *key, const char *value, remote->prune = git_config_bool(key, value); else if (!strcmp(subkey, "prunetags")) remote->prune_tags = git_config_bool(key, value); + else if (!strcmp(subkey, "prefetchref")) { + if (!value) + return config_error_nonbool(key); + string_list_split(&remote->prefetch_refs, value, ' ', -1); + return 0; + } else if (!strcmp(subkey, "url")) { if (!value) return config_error_nonbool(key); diff --git a/remote.h b/remote.h index b901b56746d..c18e68e0d8d 100644 --- a/remote.h +++ b/remote.h @@ -5,6 +5,7 @@ #include "hashmap.h" #include "refspec.h" #include "strvec.h" +#include "string-list.h" struct option; struct transport_ls_refs_options; @@ -77,6 +78,8 @@ struct remote { struct refspec fetch; + struct string_list prefetch_refs; + /* * The setting for whether to fetch tags (as a separate rule from the * configured refspecs); diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index abae7a97546..054f1f06f95 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -245,6 +245,84 @@ test_expect_success 'prefetch multiple remotes' ' test_subcommand git fetch remote2 $fetchargs <skip-remote1.txt ' +test_expect_success 'prefetch with positive prefetch ref patterns' ' + test_create_repo filter-prefetch-positive && + ( + cd filter-prefetch-positive && + test_commit initial && + git clone . clone2 && + git remote add remote2 "file://$(pwd)/clone2" && + + cd clone2 && + git checkout -b feature && test_commit feature-commit-2 && + git checkout -b wip/test && test_commit wip-test-commit-2 && + git checkout -b topic/x && test_commit topic-x-commit-2 && + git push -f origin feature wip/test topic/x&& + cd .. && + + git config remote.remote2.prefetchref "refs/heads/feature" && + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && + GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" git maintenance run --task=prefetch 2>/dev/null && + test_subcommand git fetch remote2 $fetchargs <prefetch-positive.txt && + + git rev-parse refs/prefetch/remotes/remote2/feature && + test_must_fail git rev-parse refs/prefetch/remotes/remote2/wip/test && + test_must_fail git rev-parse refs/prefetch/remotes/remote2/topic/x + ) +' + +test_expect_success 'prefetch with negative prefetch ref patterns' ' + test_create_repo filter-prefetch-negative && + ( + cd filter-prefetch-negative && + test_commit initial && + git clone . clone3 && + git remote add remote3 "file://$(pwd)/clone3" && + cat .git/config && + + cd clone3 && + git checkout -b feature && test_commit feature-commit-3 && + git checkout -b wip/test && test_commit wip-test-commit-3 && + git checkout -b topic/x && test_commit topic-x-commit-3 && + git push -f origin feature wip/test topic/x && + cd .. && + + git config remote.remote3.prefetchref "!refs/heads/wip/*" && + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && + GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" git maintenance run --task=prefetch 2>/dev/null && + test_subcommand git fetch remote3 $fetchargs <prefetch-negative.txt && + git rev-parse refs/prefetch/remotes/remote3/feature && + git rev-parse refs/prefetch/remotes/remote3/topic/x && + test_must_fail git rev-parse refs/prefetch/remotes/remote3/wip/test + ) +' + +test_expect_success 'prefetch with positive & negative prefetch ref patterns' ' + test_create_repo filter-prefetch-mixed && + ( + cd filter-prefetch-mixed && + test_commit initial && + git clone . clone4 && + git remote add remote4 "file://$(pwd)/clone4" && + + cd clone4 && + git checkout -b feature && test_commit feature-commit-4 && + git checkout -b topic/x && test_commit topic-x-commit-4 && + git checkout -b topic/y && test_commit topic-y-commit-4 && + git push -f origin feature topic/x topic/y && + cd .. && + + git config remote.remote4.prefetchref "refs/heads/topic/* !refs/heads/topic/y" && + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && + GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" git maintenance run --task=prefetch 2>/dev/null && + test_subcommand git fetch remote4 $fetchargs <prefetch-mixed.txt && + + test_must_fail git rev-parse refs/prefetch/remotes/remote4/feature && + test_must_fail git rev-parse refs/prefetch/remotes/remote4/topic/y && + git rev-parse refs/prefetch/remotes/remote4/topic/x + ) +' + test_expect_success 'loose-objects task' ' # Repack everything so we know the state of the object dir git repack -adk && base-commit: 2e7b89e038c0c888acf61f1b4ee5a43d4dd5e94c -- gitgitgadget ^ permalink raw reply related [flat|nested] 27+ messages in thread
* Re: [PATCH v3] remote: introduce config to set prefetch refs 2024-09-19 10:23 ` [PATCH v3] " Shubham Kanodia via GitGitGadget @ 2024-09-23 21:24 ` Junio C Hamano 2024-10-07 14:30 ` Shubham Kanodia 2024-10-04 20:21 ` [PATCH v4] remote: allow specifying refs to prefetch Shubham Kanodia via GitGitGadget 1 sibling, 1 reply; 27+ messages in thread From: Junio C Hamano @ 2024-09-23 21:24 UTC (permalink / raw) To: Shubham Kanodia via GitGitGadget Cc: git, Patrick Steinhardt [ ], Derrick Stolee [ ], Shubham Kanodia "Shubham Kanodia via GitGitGadget" <gitgitgadget@gmail.com> writes: > From: Shubham Kanodia <shubham.kanodia10@gmail.com> > > This commit introduces a new configuration option, ... The usual way to compose a log message of this project is to - Give an observation on how the current system work in the present tense (so no need to say "Currently X is Y", just "X is Y"), and discuss what you perceive as a problem in it. - Propose a solution (optional---often, problem description trivially leads to an obvious solution in reader's minds). - Give commands to the codebase to "become like so". in this order. To those who have been intimately following the discussion, it often is understandable without some of the above, but we are not writing for those who review the patches. We are primarily writing for future readers of "git log" who are not aware of the review discussion we have on list, so we should give something to prepare them by setting the stage and stating the objective first, before going into how the patch solved it. > diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt > index 8efc53e836d..b04ee0c4c22 100644 > --- a/Documentation/config/remote.txt > +++ b/Documentation/config/remote.txt > @@ -33,6 +33,11 @@ remote.<name>.fetch:: > The default set of "refspec" for linkgit:git-fetch[1]. See > linkgit:git-fetch[1]. > > +remote.<name>.prefetchref:: > + Specify the refs to be prefetched when fetching from this remote. > + The value is a space-separated list of ref patterns (e.g., "refs/heads/master !refs/heads/develop*"). > + This can be used to optimize fetch operations by specifying exactly which refs should be prefetched. Overly long lines. > diff --git a/builtin/fetch.c b/builtin/fetch.c > index b2b5aee5bf2..16c8a31c2e1 100644 > --- a/builtin/fetch.c > +++ b/builtin/fetch.c > @@ -38,6 +38,7 @@ > #include "trace.h" > #include "trace2.h" > #include "bundle-uri.h" > +#include "wildmatch.h" The match done by the wildmatch function is a way too loose to be used in the context of parsing and matching the src half of an refspec_item (e.g. a globbing refspec element can have at most one asterisk in it, but wildmatch does not have any sanity check to ensure it). The refspec logic to accept a fetch refspec "refs/heads/*:refs/remotes/origin/*" and rewrite a ref the remote side advertised as refs/heads/foo into refs/remotes/origin/foo, the remote-tracking ref on the local side, is in remote.c:match_name_with_pattern(), and it does not use wildmatch. > +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs) > +{ > + int i; > + int has_positive = 0; > + int matched_positive = 0; > + int matched_negative = 0; > + > + for (i = 0; i < prefetch_refs->nr; i++) { Just FYI, in modern Git codebase, loop control variable 'i' can be declared in the initialization part of the for() statement, i.e. for (int i = 0; i < ...) { which might make it easier to see that there are three variables of importance to determine the outcome of the computation (and 'i' is merely a loop counter). > + const char *pattern = prefetch_refs->items[i].string; > + int is_negative = (*pattern == '!'); > + > + if (is_negative) > + pattern++; > + else > + has_positive = 1; > + > + if (wildmatch(pattern, refname, 0) == 0) { > + if (is_negative) > + matched_negative = 1; > + else > + matched_positive = 1; > + } > + } I suspect that the function can be exposed to external callers as-is, possibly after renaming it to make it clear that the helper function is about refspec. Then you can call it from the code below in stead of wildmatch(), by passing the pattern as "key", and the refname as "name", with NULL for both "value" and "result", I think. > + > + if (!has_positive) > + return !matched_negative; > + > + return matched_positive && !matched_negative; > +} And the implementation of the logic looks correct. If the pattern says "!A !B !C" (I don't want any of these), we only care that none of these negative patterns trigger, but if the pattern has even one positive one "X Y !A !B !C" (it must match X or Y, but it must not match any of the A B C), we also make sure it matched at least one positive pattern. > +static void ref_remove(struct ref **head, struct ref *to_remove) > +{ > + struct ref **pp, *p; > + > + for (pp = head; (p = *pp) != NULL; pp = &p->next) { > + if (p == to_remove) { > + *pp = p->next; > + return; > + } > + } > +} We remote from a linked list of "struct ref" one such element to_remove. OK. > @@ -610,6 +655,22 @@ static struct ref *get_ref_map(struct remote *remote, > else > ref_map = apply_negative_refspecs(ref_map, &remote->fetch); > > + /** > + * Filter out advertised refs that we don't want to fetch during > + * prefetch if a prefetchref config is set > + */ > + if (prefetch && remote->prefetch_refs.nr) { > + struct ref *ref, *next; > + for (ref = ref_map; ref; ref = next) { > + next = ref->next; > + > + if (!matches_prefetch_refs(ref->name, &remote->prefetch_refs)) { > + ref_remove(&ref_map, ref); > + free_one_ref(ref); It is curious why such an overly deep indentation is used only in this if() statement. It is good that you made sure we do not leak the ref that we removed, but it looks verly inefficient to repeatedly call ref_remove (the function becomes more inefficient as the loop progresses as it is more costly to remove later elements on a linearly linked list). Wouldn't it be more efficient to iterate over the original list, sift each element into "surviving" and "discarded" bin as we go? Something along the lines of ... struct ref *new_list = NULL, **tail = &new_list; while (ref_map) { struct ref *ref = ref_map; ref_map = ref_map->next; ref->next = NULL; if (matches(...)) { *tail = ref; tail = &ref->next; } else { free_one_ref(ref); } } ref_map = new_list; ..., which I imitated code structure of ref_remove_duplicates(). > ref_map = ref_remove_duplicates(ref_map); > diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh > index abae7a97546..054f1f06f95 100755 > --- a/t/t7900-maintenance.sh > +++ b/t/t7900-maintenance.sh Avoid overly long lines in the test script, too. In any case, I very much agree with you that get_ref_map().is the best place to add this logic to. Looking much better. Thanks. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v3] remote: introduce config to set prefetch refs 2024-09-23 21:24 ` Junio C Hamano @ 2024-10-07 14:30 ` Shubham Kanodia 0 siblings, 0 replies; 27+ messages in thread From: Shubham Kanodia @ 2024-10-07 14:30 UTC (permalink / raw) To: Junio C Hamano Cc: Shubham Kanodia via GitGitGadget, git, Patrick Steinhardt [ ], Derrick Stolee [ ] Sounds good. Submitted a patch at: https://lore.kernel.org/git/pull.1782.v4.git.1728073292874.gitgitgadget@gmail.com/ Which seems to be detached from this thread now — perhaps because I re-wrote the commit message. Or perhaps I'm reading the threads wrong. Apologies for the break. On Tue, Sep 24, 2024 at 2:54 AM Junio C Hamano <gitster@pobox.com> wrote: > > "Shubham Kanodia via GitGitGadget" <gitgitgadget@gmail.com> writes: > > > From: Shubham Kanodia <shubham.kanodia10@gmail.com> > > > > This commit introduces a new configuration option, ... > > The usual way to compose a log message of this project is to > > - Give an observation on how the current system work in the present > tense (so no need to say "Currently X is Y", just "X is Y"), and > discuss what you perceive as a problem in it. > > - Propose a solution (optional---often, problem description > trivially leads to an obvious solution in reader's minds). > > - Give commands to the codebase to "become like so". > > in this order. > > To those who have been intimately following the discussion, it often > is understandable without some of the above, but we are not writing > for those who review the patches. We are primarily writing for future > readers of "git log" who are not aware of the review discussion we > have on list, so we should give something to prepare them by setting > the stage and stating the objective first, before going into how the > patch solved it. > > > diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt > > index 8efc53e836d..b04ee0c4c22 100644 > > --- a/Documentation/config/remote.txt > > +++ b/Documentation/config/remote.txt > > @@ -33,6 +33,11 @@ remote.<name>.fetch:: > > The default set of "refspec" for linkgit:git-fetch[1]. See > > linkgit:git-fetch[1]. > > > > +remote.<name>.prefetchref:: > > + Specify the refs to be prefetched when fetching from this remote. > > + The value is a space-separated list of ref patterns (e.g., "refs/heads/master !refs/heads/develop*"). > > + This can be used to optimize fetch operations by specifying exactly which refs should be prefetched. > > Overly long lines. > > > diff --git a/builtin/fetch.c b/builtin/fetch.c > > index b2b5aee5bf2..16c8a31c2e1 100644 > > --- a/builtin/fetch.c > > +++ b/builtin/fetch.c > > @@ -38,6 +38,7 @@ > > #include "trace.h" > > #include "trace2.h" > > #include "bundle-uri.h" > > +#include "wildmatch.h" > > The match done by the wildmatch function is a way too loose to be > used in the context of parsing and matching the src half of an > refspec_item (e.g. a globbing refspec element can have at most one > asterisk in it, but wildmatch does not have any sanity check to > ensure it). The refspec logic to accept a fetch refspec > "refs/heads/*:refs/remotes/origin/*" and rewrite a ref the remote > side advertised as refs/heads/foo into refs/remotes/origin/foo, the > remote-tracking ref on the local side, is in > remote.c:match_name_with_pattern(), and it does not use wildmatch. > > > +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs) > > +{ > > + int i; > > + int has_positive = 0; > > + int matched_positive = 0; > > + int matched_negative = 0; > > + > > + for (i = 0; i < prefetch_refs->nr; i++) { > > Just FYI, in modern Git codebase, loop control variable 'i' can be > declared in the initialization part of the for() statement, i.e. > > for (int i = 0; i < ...) { > > which might make it easier to see that there are three variables of > importance to determine the outcome of the computation (and 'i' is > merely a loop counter). > > > + const char *pattern = prefetch_refs->items[i].string; > > + int is_negative = (*pattern == '!'); > > + > > + if (is_negative) > > + pattern++; > > + else > > + has_positive = 1; > > + > > + if (wildmatch(pattern, refname, 0) == 0) { > > + if (is_negative) > > + matched_negative = 1; > > + else > > + matched_positive = 1; > > + } > > + } > > I suspect that the function can be exposed to external callers as-is, > possibly after renaming it to make it clear that the helper function > is about refspec. Then you can call it from the code below in stead > of wildmatch(), by passing the pattern as "key", and the refname as > "name", with NULL for both "value" and "result", I think. > > > + > > + if (!has_positive) > > + return !matched_negative; > > + > > + return matched_positive && !matched_negative; > > +} > > And the implementation of the logic looks correct. If the pattern > says "!A !B !C" (I don't want any of these), we only care that none > of these negative patterns trigger, but if the pattern has even one > positive one "X Y !A !B !C" (it must match X or Y, but it must not > match any of the A B C), we also make sure it matched at least one > positive pattern. > > > +static void ref_remove(struct ref **head, struct ref *to_remove) > > +{ > > + struct ref **pp, *p; > > + > > + for (pp = head; (p = *pp) != NULL; pp = &p->next) { > > + if (p == to_remove) { > > + *pp = p->next; > > + return; > > + } > > + } > > +} > > We remote from a linked list of "struct ref" one such element > to_remove. OK. > > > @@ -610,6 +655,22 @@ static struct ref *get_ref_map(struct remote *remote, > > else > > ref_map = apply_negative_refspecs(ref_map, &remote->fetch); > > > > + /** > > + * Filter out advertised refs that we don't want to fetch during > > + * prefetch if a prefetchref config is set > > + */ > > + if (prefetch && remote->prefetch_refs.nr) { > > + struct ref *ref, *next; > > + for (ref = ref_map; ref; ref = next) { > > + next = ref->next; > > + > > + if (!matches_prefetch_refs(ref->name, &remote->prefetch_refs)) { > > + ref_remove(&ref_map, ref); > > + free_one_ref(ref); > > It is curious why such an overly deep indentation is used only in > this if() statement. > > It is good that you made sure we do not leak the ref that we > removed, but it looks verly inefficient to repeatedly call > ref_remove (the function becomes more inefficient as the loop > progresses as it is more costly to remove later elements on a > linearly linked list). > > Wouldn't it be more efficient to iterate over the original list, sift > each element into "surviving" and "discarded" bin as we go? Something > along the lines of ... > > struct ref *new_list = NULL, **tail = &new_list; > > while (ref_map) { > struct ref *ref = ref_map; > > ref_map = ref_map->next; > ref->next = NULL; > > if (matches(...)) { > *tail = ref; > tail = &ref->next; > } else { > free_one_ref(ref); > } > } > ref_map = new_list; > > ..., which I imitated code structure of ref_remove_duplicates(). > > > ref_map = ref_remove_duplicates(ref_map); > > > diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh > > index abae7a97546..054f1f06f95 100755 > > --- a/t/t7900-maintenance.sh > > +++ b/t/t7900-maintenance.sh > > Avoid overly long lines in the test script, too. > > In any case, I very much agree with you that get_ref_map().is the > best place to add this logic to. Looking much better. > > Thanks. ^ permalink raw reply [flat|nested] 27+ messages in thread
* [PATCH v4] remote: allow specifying refs to prefetch 2024-09-19 10:23 ` [PATCH v3] " Shubham Kanodia via GitGitGadget 2024-09-23 21:24 ` Junio C Hamano @ 2024-10-04 20:21 ` Shubham Kanodia via GitGitGadget 2024-11-04 8:47 ` Shubham Kanodia ` (2 more replies) 1 sibling, 3 replies; 27+ messages in thread From: Shubham Kanodia via GitGitGadget @ 2024-10-04 20:21 UTC (permalink / raw) To: git Cc: Patrick Steinhardt [ ], Junio C Hamano [ ], Derrick Stolee [ ], Shubham Kanodia, Shubham Kanodia From: Shubham Kanodia <shubham.kanodia10@gmail.com> When using 'git fetch --prefetch', all fetchable refs are prefetched by default. In large repositories with many refs, this can lead to unnecessary network traffic and increased disk space use. Introduce a new configuration option 'remote.<name>.prefetchref' that allows users to specify specific ref patterns to be prefetched during a 'git fetch --prefetch' operation. The 'prefetchref' option accepts a space-separated list of ref patterns (e.g., 'refs/heads/main !refs/heads/feature/*'). When the '--prefetch' option is used with 'git fetch', only the refs matching these patterns will be prefetched. Signed-off-by: Shubham Kanodia <shubham.kanodia10@gmail.com> --- remote: introduce config to set prefetch refs Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1782%2Fpastelsky%2Fsk%2Fremote-prefetchref-v4 Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1782/pastelsky/sk/remote-prefetchref-v4 Pull-Request: https://github.com/gitgitgadget/git/pull/1782 Range-diff vs v3: 1: 717d5957c47 ! 1: e3d8aee1ea8 remote: introduce config to set prefetch refs @@ Metadata Author: Shubham Kanodia <shubham.kanodia10@gmail.com> ## Commit message ## - remote: introduce config to set prefetch refs + remote: allow specifying refs to prefetch - This commit introduces a new configuration option, - remote.<name>.prefetchref, which allows users to specify specific - ref patterns to be prefetched during a git fetch --prefetch - operation. + When using 'git fetch --prefetch', all fetchable refs are prefetched + by default. In large repositories with many refs, this can lead to + unnecessary network traffic and increased disk space use. - The new option accepts a space-separated list of ref patterns. - When the --prefetch option is used with git fetch, only the refs - matching these patterns will be prefetched, instead of the - default behavior of prefetching all fetchable refs. + Introduce a new configuration option 'remote.<name>.prefetchref' that + allows users to specify specific ref patterns to be prefetched during + a 'git fetch --prefetch' operation. - Example usage in .git/config: - [remote "origin"] - prefetchref = "refs/heads/main refs/heads/feature/*" - - This change allows users to optimize their prefetch operations, potentially - reducing network traffic and improving performance for large repositories - with many refs. + The 'prefetchref' option accepts a space-separated list of ref + patterns (e.g., 'refs/heads/main !refs/heads/feature/*'). When the + '--prefetch' option is used with 'git fetch', only the refs matching + these patterns will be prefetched. Signed-off-by: Shubham Kanodia <shubham.kanodia10@gmail.com> @@ Documentation/config/remote.txt: remote.<name>.fetch:: linkgit:git-fetch[1]. +remote.<name>.prefetchref:: -+ Specify the refs to be prefetched when fetching from this remote. -+ The value is a space-separated list of ref patterns (e.g., "refs/heads/master !refs/heads/develop*"). -+ This can be used to optimize fetch operations by specifying exactly which refs should be prefetched. ++ Specify the refs to be prefetched when fetching from this ++ remote. The value is a space-separated list of ref patterns ++ (e.g., "refs/heads/main !refs/heads/develop*"). This can be ++ used to optimize fetch operations by specifying exactly which ++ refs should be prefetched. + remote.<name>.push:: The default set of "refspec" for linkgit:git-push[1]. See linkgit:git-push[1]. ## builtin/fetch.c ## -@@ - #include "trace.h" - #include "trace2.h" - #include "bundle-uri.h" -+#include "wildmatch.h" - - #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000) - @@ builtin/fetch.c: static void filter_prefetch_refspec(struct refspec *rs) } } ++static int pattern_matches_ref(const char *pattern, const char *refname) ++{ ++ if (strchr(pattern, '*')) ++ return match_refspec_name_with_pattern(pattern, refname, NULL, NULL) != 0; ++ return strcmp(pattern, refname) == 0; ++} ++ +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs) +{ -+ int i; -+ int has_positive = 0; -+ int matched_positive = 0; -+ int matched_negative = 0; ++ int has_positive = 0, matched_positive = 0, matched_negative = 0; + -+ for (i = 0; i < prefetch_refs->nr; i++) { ++ for (int i = 0; i < prefetch_refs->nr; i++) { + const char *pattern = prefetch_refs->items[i].string; + int is_negative = (*pattern == '!'); ++ if (is_negative) pattern++; ++ else has_positive = 1; + -+ if (is_negative) -+ pattern++; -+ else -+ has_positive = 1; -+ -+ if (wildmatch(pattern, refname, 0) == 0) { -+ if (is_negative) -+ matched_negative = 1; -+ else -+ matched_positive = 1; ++ if (pattern_matches_ref(pattern, refname)) { ++ if (is_negative) matched_negative = 1; ++ else matched_positive = 1; + } + } + -+ if (!has_positive) -+ return !matched_negative; -+ -+ return matched_positive && !matched_negative; -+} -+ -+ -+static void ref_remove(struct ref **head, struct ref *to_remove) -+{ -+ struct ref **pp, *p; -+ -+ for (pp = head; (p = *pp) != NULL; pp = &p->next) { -+ if (p == to_remove) { -+ *pp = p->next; -+ return; -+ } -+ } ++ return has_positive ? (matched_positive && !matched_negative) : !matched_negative; +} + static struct ref *get_ref_map(struct remote *remote, const struct ref *remote_refs, struct refspec *rs, @@ builtin/fetch.c: static struct ref *get_ref_map(struct remote *remote, + struct hashmap existing_refs; int existing_refs_populated = 0; ++ struct ref *prefetch_filtered_ref_map = NULL, **ref_map_tail = &prefetch_filtered_ref_map; ++ struct ref *next; ++ filter_prefetch_refspec(rs); + if (remote) @@ builtin/fetch.c: static struct ref *get_ref_map(struct remote *remote, + * Filter out advertised refs that we don't want to fetch during + * prefetch if a prefetchref config is set + */ ++ + if (prefetch && remote->prefetch_refs.nr) { -+ struct ref *ref, *next; -+ for (ref = ref_map; ref; ref = next) { -+ next = ref->next; ++ prefetch_filtered_ref_map = NULL; ++ ref_map_tail = &prefetch_filtered_ref_map; ++ ++ for (rm = ref_map; rm; rm = next) { ++ next = rm->next; ++ rm->next = NULL; + -+ if (!matches_prefetch_refs(ref->name, &remote->prefetch_refs)) { -+ ref_remove(&ref_map, ref); -+ free_one_ref(ref); ++ if (matches_prefetch_refs(rm->name, &remote->prefetch_refs)) { ++ *ref_map_tail = rm; ++ ref_map_tail = &rm->next; ++ } else { ++ free_one_ref(rm); + } + } ++ ref_map = prefetch_filtered_ref_map; + } + ref_map = ref_remove_duplicates(ref_map); @@ remote.c: static int handle_config(const char *key, const char *value, else if (!strcmp(subkey, "url")) { if (!value) return config_error_nonbool(key); +@@ remote.c: struct strvec *push_url_of_remote(struct remote *remote) + return remote->pushurl.nr ? &remote->pushurl : &remote->url; + } + +-static int match_name_with_pattern(const char *key, const char *name, ++int match_refspec_name_with_pattern(const char *key, const char *name, + const char *value, char **result) + { + const char *kstar = strchr(key, '*'); +@@ remote.c: static int refspec_match(const struct refspec_item *refspec, + const char *name) + { + if (refspec->pattern) +- return match_name_with_pattern(refspec->src, name, NULL, NULL); ++ return match_refspec_name_with_pattern(refspec->src, name, NULL, NULL); + + return !strcmp(refspec->src, name); + } +@@ remote.c: static int query_matches_negative_refspec(struct refspec *rs, struct refspec_ite + const char *key = refspec->dst ? refspec->dst : refspec->src; + const char *value = refspec->src; + +- if (match_name_with_pattern(key, needle, value, &expn_name)) ++ if (match_refspec_name_with_pattern(key, needle, value, &expn_name)) + string_list_append_nodup(&reversed, expn_name); + } else if (refspec->matching) { + /* For the special matching refspec, any query should match */ +@@ remote.c: static void query_refspecs_multiple(struct refspec *rs, + if (!refspec->dst || refspec->negative) + continue; + if (refspec->pattern) { +- if (match_name_with_pattern(key, needle, value, result)) ++ if (match_refspec_name_with_pattern(key, needle, value, result)) + string_list_append_nodup(results, *result); + } else if (!strcmp(needle, key)) { + string_list_append(results, value); +@@ remote.c: int query_refspecs(struct refspec *rs, struct refspec_item *query) + if (!refspec->dst || refspec->negative) + continue; + if (refspec->pattern) { +- if (match_name_with_pattern(key, needle, value, result)) { ++ if (match_refspec_name_with_pattern(key, needle, value, result)) { + query->force = refspec->force; + return 0; + } +@@ remote.c: static char *get_ref_match(const struct refspec *rs, const struct ref *ref, + const char *dst_side = item->dst ? item->dst : item->src; + int match; + if (direction == FROM_SRC) +- match = match_name_with_pattern(item->src, ref->name, dst_side, &name); ++ match = match_refspec_name_with_pattern(item->src, ref->name, dst_side, &name); + else +- match = match_name_with_pattern(dst_side, ref->name, item->src, &name); ++ match = match_refspec_name_with_pattern(dst_side, ref->name, item->src, &name); + if (match) { + matching_refs = i; + break; +@@ remote.c: static struct ref *get_expanded_map(const struct ref *remote_refs, + + if (strchr(ref->name, '^')) + continue; /* a dereference item */ +- if (match_name_with_pattern(refspec->src, ref->name, ++ if (match_refspec_name_with_pattern(refspec->src, ref->name, + refspec->dst, &expn_name) && + !ignore_symref_update(expn_name, &scratch)) { + struct ref *cpy = copy_ref(ref); ## remote.h ## @@ @@ remote.h: struct remote { /* * The setting for whether to fetch tags (as a separate rule from the * configured refspecs); +@@ remote.h: int count_refspec_match(const char *, struct ref *refs, struct ref **matched_ref + + int check_ref_type(const struct ref *ref, int flags); + ++int match_refspec_name_with_pattern(const char *key, const char *name, ++ const char *value, char **result); ++ + /* + * Free a single ref and its peer, or an entire list of refs and their peers, + * respectively. ## t/t7900-maintenance.sh ## @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' ' @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' ' + git checkout -b feature && test_commit feature-commit-2 && + git checkout -b wip/test && test_commit wip-test-commit-2 && + git checkout -b topic/x && test_commit topic-x-commit-2 && -+ git push -f origin feature wip/test topic/x&& ++ git push -f origin feature wip/test topic/x && + cd .. && + + git config remote.remote2.prefetchref "refs/heads/feature" && -+ fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && -+ GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" git maintenance run --task=prefetch 2>/dev/null && ++ fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \ ++ --recurse-submodules=no --quiet" && ++ GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" \ ++ git maintenance run --task=prefetch 2>/dev/null && + test_subcommand git fetch remote2 $fetchargs <prefetch-positive.txt && + + git rev-parse refs/prefetch/remotes/remote2/feature && @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' ' + cd .. && + + git config remote.remote3.prefetchref "!refs/heads/wip/*" && -+ fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && -+ GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" git maintenance run --task=prefetch 2>/dev/null && ++ fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \ ++ --recurse-submodules=no --quiet" && ++ GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" \ ++ git maintenance run --task=prefetch 2>/dev/null && + test_subcommand git fetch remote3 $fetchargs <prefetch-negative.txt && + git rev-parse refs/prefetch/remotes/remote3/feature && + git rev-parse refs/prefetch/remotes/remote3/topic/x && @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' ' + git push -f origin feature topic/x topic/y && + cd .. && + -+ git config remote.remote4.prefetchref "refs/heads/topic/* !refs/heads/topic/y" && -+ fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && -+ GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" git maintenance run --task=prefetch 2>/dev/null && ++ git config remote.remote4.prefetchref \ ++ "refs/heads/topic/* !refs/heads/topic/y" && ++ fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \ ++ --recurse-submodules=no --quiet" && ++ GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" \ ++ git maintenance run --task=prefetch 2>/dev/null && + test_subcommand git fetch remote4 $fetchargs <prefetch-mixed.txt && + + test_must_fail git rev-parse refs/prefetch/remotes/remote4/feature && Documentation/config/remote.txt | 7 +++ builtin/fetch.c | 53 ++++++++++++++++++++ remote.c | 24 ++++++---- remote.h | 6 +++ t/t7900-maintenance.sh | 85 +++++++++++++++++++++++++++++++++ 5 files changed, 167 insertions(+), 8 deletions(-) diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt index 8efc53e836d..186f439ed7b 100644 --- a/Documentation/config/remote.txt +++ b/Documentation/config/remote.txt @@ -33,6 +33,13 @@ remote.<name>.fetch:: The default set of "refspec" for linkgit:git-fetch[1]. See linkgit:git-fetch[1]. +remote.<name>.prefetchref:: + Specify the refs to be prefetched when fetching from this + remote. The value is a space-separated list of ref patterns + (e.g., "refs/heads/main !refs/heads/develop*"). This can be + used to optimize fetch operations by specifying exactly which + refs should be prefetched. + remote.<name>.push:: The default set of "refspec" for linkgit:git-push[1]. See linkgit:git-push[1]. diff --git a/builtin/fetch.c b/builtin/fetch.c index b2b5aee5bf2..74603cfabe0 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -485,6 +485,32 @@ static void filter_prefetch_refspec(struct refspec *rs) } } +static int pattern_matches_ref(const char *pattern, const char *refname) +{ + if (strchr(pattern, '*')) + return match_refspec_name_with_pattern(pattern, refname, NULL, NULL) != 0; + return strcmp(pattern, refname) == 0; +} + +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs) +{ + int has_positive = 0, matched_positive = 0, matched_negative = 0; + + for (int i = 0; i < prefetch_refs->nr; i++) { + const char *pattern = prefetch_refs->items[i].string; + int is_negative = (*pattern == '!'); + if (is_negative) pattern++; + else has_positive = 1; + + if (pattern_matches_ref(pattern, refname)) { + if (is_negative) matched_negative = 1; + else matched_positive = 1; + } + } + + return has_positive ? (matched_positive && !matched_negative) : !matched_negative; +} + static struct ref *get_ref_map(struct remote *remote, const struct ref *remote_refs, struct refspec *rs, @@ -501,7 +527,11 @@ static struct ref *get_ref_map(struct remote *remote, struct hashmap existing_refs; int existing_refs_populated = 0; + struct ref *prefetch_filtered_ref_map = NULL, **ref_map_tail = &prefetch_filtered_ref_map; + struct ref *next; + filter_prefetch_refspec(rs); + if (remote) filter_prefetch_refspec(&remote->fetch); @@ -610,6 +640,29 @@ static struct ref *get_ref_map(struct remote *remote, else ref_map = apply_negative_refspecs(ref_map, &remote->fetch); + /** + * Filter out advertised refs that we don't want to fetch during + * prefetch if a prefetchref config is set + */ + + if (prefetch && remote->prefetch_refs.nr) { + prefetch_filtered_ref_map = NULL; + ref_map_tail = &prefetch_filtered_ref_map; + + for (rm = ref_map; rm; rm = next) { + next = rm->next; + rm->next = NULL; + + if (matches_prefetch_refs(rm->name, &remote->prefetch_refs)) { + *ref_map_tail = rm; + ref_map_tail = &rm->next; + } else { + free_one_ref(rm); + } + } + ref_map = prefetch_filtered_ref_map; + } + ref_map = ref_remove_duplicates(ref_map); for (rm = ref_map; rm; rm = rm->next) { diff --git a/remote.c b/remote.c index 8f3dee13186..6752c73370f 100644 --- a/remote.c +++ b/remote.c @@ -141,6 +141,7 @@ static struct remote *make_remote(struct remote_state *remote_state, ret->prune = -1; /* unspecified */ ret->prune_tags = -1; /* unspecified */ ret->name = xstrndup(name, len); + string_list_init_dup(&ret->prefetch_refs); refspec_init(&ret->push, REFSPEC_PUSH); refspec_init(&ret->fetch, REFSPEC_FETCH); @@ -166,6 +167,7 @@ static void remote_clear(struct remote *remote) free((char *)remote->uploadpack); FREE_AND_NULL(remote->http_proxy); FREE_AND_NULL(remote->http_proxy_authmethod); + string_list_clear(&remote->prefetch_refs, 0); } static void add_merge(struct branch *branch, const char *name) @@ -456,6 +458,12 @@ static int handle_config(const char *key, const char *value, remote->prune = git_config_bool(key, value); else if (!strcmp(subkey, "prunetags")) remote->prune_tags = git_config_bool(key, value); + else if (!strcmp(subkey, "prefetchref")) { + if (!value) + return config_error_nonbool(key); + string_list_split(&remote->prefetch_refs, value, ' ', -1); + return 0; + } else if (!strcmp(subkey, "url")) { if (!value) return config_error_nonbool(key); @@ -868,7 +876,7 @@ struct strvec *push_url_of_remote(struct remote *remote) return remote->pushurl.nr ? &remote->pushurl : &remote->url; } -static int match_name_with_pattern(const char *key, const char *name, +int match_refspec_name_with_pattern(const char *key, const char *name, const char *value, char **result) { const char *kstar = strchr(key, '*'); @@ -900,7 +908,7 @@ static int refspec_match(const struct refspec_item *refspec, const char *name) { if (refspec->pattern) - return match_name_with_pattern(refspec->src, name, NULL, NULL); + return match_refspec_name_with_pattern(refspec->src, name, NULL, NULL); return !strcmp(refspec->src, name); } @@ -969,7 +977,7 @@ static int query_matches_negative_refspec(struct refspec *rs, struct refspec_ite const char *key = refspec->dst ? refspec->dst : refspec->src; const char *value = refspec->src; - if (match_name_with_pattern(key, needle, value, &expn_name)) + if (match_refspec_name_with_pattern(key, needle, value, &expn_name)) string_list_append_nodup(&reversed, expn_name); } else if (refspec->matching) { /* For the special matching refspec, any query should match */ @@ -1014,7 +1022,7 @@ static void query_refspecs_multiple(struct refspec *rs, if (!refspec->dst || refspec->negative) continue; if (refspec->pattern) { - if (match_name_with_pattern(key, needle, value, result)) + if (match_refspec_name_with_pattern(key, needle, value, result)) string_list_append_nodup(results, *result); } else if (!strcmp(needle, key)) { string_list_append(results, value); @@ -1043,7 +1051,7 @@ int query_refspecs(struct refspec *rs, struct refspec_item *query) if (!refspec->dst || refspec->negative) continue; if (refspec->pattern) { - if (match_name_with_pattern(key, needle, value, result)) { + if (match_refspec_name_with_pattern(key, needle, value, result)) { query->force = refspec->force; return 0; } @@ -1456,9 +1464,9 @@ static char *get_ref_match(const struct refspec *rs, const struct ref *ref, const char *dst_side = item->dst ? item->dst : item->src; int match; if (direction == FROM_SRC) - match = match_name_with_pattern(item->src, ref->name, dst_side, &name); + match = match_refspec_name_with_pattern(item->src, ref->name, dst_side, &name); else - match = match_name_with_pattern(dst_side, ref->name, item->src, &name); + match = match_refspec_name_with_pattern(dst_side, ref->name, item->src, &name); if (match) { matching_refs = i; break; @@ -2076,7 +2084,7 @@ static struct ref *get_expanded_map(const struct ref *remote_refs, if (strchr(ref->name, '^')) continue; /* a dereference item */ - if (match_name_with_pattern(refspec->src, ref->name, + if (match_refspec_name_with_pattern(refspec->src, ref->name, refspec->dst, &expn_name) && !ignore_symref_update(expn_name, &scratch)) { struct ref *cpy = copy_ref(ref); diff --git a/remote.h b/remote.h index b901b56746d..9ffef206f23 100644 --- a/remote.h +++ b/remote.h @@ -5,6 +5,7 @@ #include "hashmap.h" #include "refspec.h" #include "strvec.h" +#include "string-list.h" struct option; struct transport_ls_refs_options; @@ -77,6 +78,8 @@ struct remote { struct refspec fetch; + struct string_list prefetch_refs; + /* * The setting for whether to fetch tags (as a separate rule from the * configured refspecs); @@ -207,6 +210,9 @@ int count_refspec_match(const char *, struct ref *refs, struct ref **matched_ref int check_ref_type(const struct ref *ref, int flags); +int match_refspec_name_with_pattern(const char *key, const char *name, + const char *value, char **result); + /* * Free a single ref and its peer, or an entire list of refs and their peers, * respectively. diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index abae7a97546..fc1b5d14e75 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -245,6 +245,91 @@ test_expect_success 'prefetch multiple remotes' ' test_subcommand git fetch remote2 $fetchargs <skip-remote1.txt ' +test_expect_success 'prefetch with positive prefetch ref patterns' ' + test_create_repo filter-prefetch-positive && + ( + cd filter-prefetch-positive && + test_commit initial && + git clone . clone2 && + git remote add remote2 "file://$(pwd)/clone2" && + + cd clone2 && + git checkout -b feature && test_commit feature-commit-2 && + git checkout -b wip/test && test_commit wip-test-commit-2 && + git checkout -b topic/x && test_commit topic-x-commit-2 && + git push -f origin feature wip/test topic/x && + cd .. && + + git config remote.remote2.prefetchref "refs/heads/feature" && + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \ + --recurse-submodules=no --quiet" && + GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" \ + git maintenance run --task=prefetch 2>/dev/null && + test_subcommand git fetch remote2 $fetchargs <prefetch-positive.txt && + + git rev-parse refs/prefetch/remotes/remote2/feature && + test_must_fail git rev-parse refs/prefetch/remotes/remote2/wip/test && + test_must_fail git rev-parse refs/prefetch/remotes/remote2/topic/x + ) +' + +test_expect_success 'prefetch with negative prefetch ref patterns' ' + test_create_repo filter-prefetch-negative && + ( + cd filter-prefetch-negative && + test_commit initial && + git clone . clone3 && + git remote add remote3 "file://$(pwd)/clone3" && + cat .git/config && + + cd clone3 && + git checkout -b feature && test_commit feature-commit-3 && + git checkout -b wip/test && test_commit wip-test-commit-3 && + git checkout -b topic/x && test_commit topic-x-commit-3 && + git push -f origin feature wip/test topic/x && + cd .. && + + git config remote.remote3.prefetchref "!refs/heads/wip/*" && + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \ + --recurse-submodules=no --quiet" && + GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" \ + git maintenance run --task=prefetch 2>/dev/null && + test_subcommand git fetch remote3 $fetchargs <prefetch-negative.txt && + git rev-parse refs/prefetch/remotes/remote3/feature && + git rev-parse refs/prefetch/remotes/remote3/topic/x && + test_must_fail git rev-parse refs/prefetch/remotes/remote3/wip/test + ) +' + +test_expect_success 'prefetch with positive & negative prefetch ref patterns' ' + test_create_repo filter-prefetch-mixed && + ( + cd filter-prefetch-mixed && + test_commit initial && + git clone . clone4 && + git remote add remote4 "file://$(pwd)/clone4" && + + cd clone4 && + git checkout -b feature && test_commit feature-commit-4 && + git checkout -b topic/x && test_commit topic-x-commit-4 && + git checkout -b topic/y && test_commit topic-y-commit-4 && + git push -f origin feature topic/x topic/y && + cd .. && + + git config remote.remote4.prefetchref \ + "refs/heads/topic/* !refs/heads/topic/y" && + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \ + --recurse-submodules=no --quiet" && + GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" \ + git maintenance run --task=prefetch 2>/dev/null && + test_subcommand git fetch remote4 $fetchargs <prefetch-mixed.txt && + + test_must_fail git rev-parse refs/prefetch/remotes/remote4/feature && + test_must_fail git rev-parse refs/prefetch/remotes/remote4/topic/y && + git rev-parse refs/prefetch/remotes/remote4/topic/x + ) +' + test_expect_success 'loose-objects task' ' # Repack everything so we know the state of the object dir git repack -adk && base-commit: 2e7b89e038c0c888acf61f1b4ee5a43d4dd5e94c -- gitgitgadget ^ permalink raw reply related [flat|nested] 27+ messages in thread
* Re: [PATCH v4] remote: allow specifying refs to prefetch 2024-10-04 20:21 ` [PATCH v4] remote: allow specifying refs to prefetch Shubham Kanodia via GitGitGadget @ 2024-11-04 8:47 ` Shubham Kanodia 2024-11-05 6:45 ` Patrick Steinhardt 2024-11-05 14:45 ` Phillip Wood 2 siblings, 0 replies; 27+ messages in thread From: Shubham Kanodia @ 2024-11-04 8:47 UTC (permalink / raw) To: Shubham Kanodia via GitGitGadget Cc: git, Patrick Steinhardt [ ], Junio C Hamano [ ], Derrick Stolee [ ] On Sat, Oct 5, 2024 at 1:51 AM Shubham Kanodia via GitGitGadget <gitgitgadget@gmail.com> wrote: > > From: Shubham Kanodia <shubham.kanodia10@gmail.com> > > When using 'git fetch --prefetch', all fetchable refs are prefetched > by default. In large repositories with many refs, this can lead to > unnecessary network traffic and increased disk space use. > > Introduce a new configuration option 'remote.<name>.prefetchref' that > allows users to specify specific ref patterns to be prefetched during > a 'git fetch --prefetch' operation. > > The 'prefetchref' option accepts a space-separated list of ref > patterns (e.g., 'refs/heads/main !refs/heads/feature/*'). When the > '--prefetch' option is used with 'git fetch', only the refs matching > these patterns will be prefetched. > > Signed-off-by: Shubham Kanodia <shubham.kanodia10@gmail.com> > --- > remote: introduce config to set prefetch refs > > Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1782%2Fpastelsky%2Fsk%2Fremote-prefetchref-v4 > Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1782/pastelsky/sk/remote-prefetchref-v4 > Pull-Request: https://github.com/gitgitgadget/git/pull/1782 > > Range-diff vs v3: > > 1: 717d5957c47 ! 1: e3d8aee1ea8 remote: introduce config to set prefetch refs > @@ Metadata > Author: Shubham Kanodia <shubham.kanodia10@gmail.com> > > ## Commit message ## > - remote: introduce config to set prefetch refs > + remote: allow specifying refs to prefetch > > - This commit introduces a new configuration option, > - remote.<name>.prefetchref, which allows users to specify specific > - ref patterns to be prefetched during a git fetch --prefetch > - operation. > + When using 'git fetch --prefetch', all fetchable refs are prefetched > + by default. In large repositories with many refs, this can lead to > + unnecessary network traffic and increased disk space use. > > - The new option accepts a space-separated list of ref patterns. > - When the --prefetch option is used with git fetch, only the refs > - matching these patterns will be prefetched, instead of the > - default behavior of prefetching all fetchable refs. > + Introduce a new configuration option 'remote.<name>.prefetchref' that > + allows users to specify specific ref patterns to be prefetched during > + a 'git fetch --prefetch' operation. > > - Example usage in .git/config: > - [remote "origin"] > - prefetchref = "refs/heads/main refs/heads/feature/*" > - > - This change allows users to optimize their prefetch operations, potentially > - reducing network traffic and improving performance for large repositories > - with many refs. > + The 'prefetchref' option accepts a space-separated list of ref > + patterns (e.g., 'refs/heads/main !refs/heads/feature/*'). When the > + '--prefetch' option is used with 'git fetch', only the refs matching > + these patterns will be prefetched. > > Signed-off-by: Shubham Kanodia <shubham.kanodia10@gmail.com> > > @@ Documentation/config/remote.txt: remote.<name>.fetch:: > linkgit:git-fetch[1]. > > +remote.<name>.prefetchref:: > -+ Specify the refs to be prefetched when fetching from this remote. > -+ The value is a space-separated list of ref patterns (e.g., "refs/heads/master !refs/heads/develop*"). > -+ This can be used to optimize fetch operations by specifying exactly which refs should be prefetched. > ++ Specify the refs to be prefetched when fetching from this > ++ remote. The value is a space-separated list of ref patterns > ++ (e.g., "refs/heads/main !refs/heads/develop*"). This can be > ++ used to optimize fetch operations by specifying exactly which > ++ refs should be prefetched. > + > remote.<name>.push:: > The default set of "refspec" for linkgit:git-push[1]. See > linkgit:git-push[1]. > > ## builtin/fetch.c ## > -@@ > - #include "trace.h" > - #include "trace2.h" > - #include "bundle-uri.h" > -+#include "wildmatch.h" > - > - #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000) > - > @@ builtin/fetch.c: static void filter_prefetch_refspec(struct refspec *rs) > } > } > > ++static int pattern_matches_ref(const char *pattern, const char *refname) > ++{ > ++ if (strchr(pattern, '*')) > ++ return match_refspec_name_with_pattern(pattern, refname, NULL, NULL) != 0; > ++ return strcmp(pattern, refname) == 0; > ++} > ++ > +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs) > +{ > -+ int i; > -+ int has_positive = 0; > -+ int matched_positive = 0; > -+ int matched_negative = 0; > ++ int has_positive = 0, matched_positive = 0, matched_negative = 0; > + > -+ for (i = 0; i < prefetch_refs->nr; i++) { > ++ for (int i = 0; i < prefetch_refs->nr; i++) { > + const char *pattern = prefetch_refs->items[i].string; > + int is_negative = (*pattern == '!'); > ++ if (is_negative) pattern++; > ++ else has_positive = 1; > + > -+ if (is_negative) > -+ pattern++; > -+ else > -+ has_positive = 1; > -+ > -+ if (wildmatch(pattern, refname, 0) == 0) { > -+ if (is_negative) > -+ matched_negative = 1; > -+ else > -+ matched_positive = 1; > ++ if (pattern_matches_ref(pattern, refname)) { > ++ if (is_negative) matched_negative = 1; > ++ else matched_positive = 1; > + } > + } > + > -+ if (!has_positive) > -+ return !matched_negative; > -+ > -+ return matched_positive && !matched_negative; > -+} > -+ > -+ > -+static void ref_remove(struct ref **head, struct ref *to_remove) > -+{ > -+ struct ref **pp, *p; > -+ > -+ for (pp = head; (p = *pp) != NULL; pp = &p->next) { > -+ if (p == to_remove) { > -+ *pp = p->next; > -+ return; > -+ } > -+ } > ++ return has_positive ? (matched_positive && !matched_negative) : !matched_negative; > +} > + > static struct ref *get_ref_map(struct remote *remote, > const struct ref *remote_refs, > struct refspec *rs, > @@ builtin/fetch.c: static struct ref *get_ref_map(struct remote *remote, > + struct hashmap existing_refs; > int existing_refs_populated = 0; > > ++ struct ref *prefetch_filtered_ref_map = NULL, **ref_map_tail = &prefetch_filtered_ref_map; > ++ struct ref *next; > ++ > filter_prefetch_refspec(rs); > + > if (remote) > @@ builtin/fetch.c: static struct ref *get_ref_map(struct remote *remote, > + * Filter out advertised refs that we don't want to fetch during > + * prefetch if a prefetchref config is set > + */ > ++ > + if (prefetch && remote->prefetch_refs.nr) { > -+ struct ref *ref, *next; > -+ for (ref = ref_map; ref; ref = next) { > -+ next = ref->next; > ++ prefetch_filtered_ref_map = NULL; > ++ ref_map_tail = &prefetch_filtered_ref_map; > ++ > ++ for (rm = ref_map; rm; rm = next) { > ++ next = rm->next; > ++ rm->next = NULL; > + > -+ if (!matches_prefetch_refs(ref->name, &remote->prefetch_refs)) { > -+ ref_remove(&ref_map, ref); > -+ free_one_ref(ref); > ++ if (matches_prefetch_refs(rm->name, &remote->prefetch_refs)) { > ++ *ref_map_tail = rm; > ++ ref_map_tail = &rm->next; > ++ } else { > ++ free_one_ref(rm); > + } > + } > ++ ref_map = prefetch_filtered_ref_map; > + } > + > ref_map = ref_remove_duplicates(ref_map); > @@ remote.c: static int handle_config(const char *key, const char *value, > else if (!strcmp(subkey, "url")) { > if (!value) > return config_error_nonbool(key); > +@@ remote.c: struct strvec *push_url_of_remote(struct remote *remote) > + return remote->pushurl.nr ? &remote->pushurl : &remote->url; > + } > + > +-static int match_name_with_pattern(const char *key, const char *name, > ++int match_refspec_name_with_pattern(const char *key, const char *name, > + const char *value, char **result) > + { > + const char *kstar = strchr(key, '*'); > +@@ remote.c: static int refspec_match(const struct refspec_item *refspec, > + const char *name) > + { > + if (refspec->pattern) > +- return match_name_with_pattern(refspec->src, name, NULL, NULL); > ++ return match_refspec_name_with_pattern(refspec->src, name, NULL, NULL); > + > + return !strcmp(refspec->src, name); > + } > +@@ remote.c: static int query_matches_negative_refspec(struct refspec *rs, struct refspec_ite > + const char *key = refspec->dst ? refspec->dst : refspec->src; > + const char *value = refspec->src; > + > +- if (match_name_with_pattern(key, needle, value, &expn_name)) > ++ if (match_refspec_name_with_pattern(key, needle, value, &expn_name)) > + string_list_append_nodup(&reversed, expn_name); > + } else if (refspec->matching) { > + /* For the special matching refspec, any query should match */ > +@@ remote.c: static void query_refspecs_multiple(struct refspec *rs, > + if (!refspec->dst || refspec->negative) > + continue; > + if (refspec->pattern) { > +- if (match_name_with_pattern(key, needle, value, result)) > ++ if (match_refspec_name_with_pattern(key, needle, value, result)) > + string_list_append_nodup(results, *result); > + } else if (!strcmp(needle, key)) { > + string_list_append(results, value); > +@@ remote.c: int query_refspecs(struct refspec *rs, struct refspec_item *query) > + if (!refspec->dst || refspec->negative) > + continue; > + if (refspec->pattern) { > +- if (match_name_with_pattern(key, needle, value, result)) { > ++ if (match_refspec_name_with_pattern(key, needle, value, result)) { > + query->force = refspec->force; > + return 0; > + } > +@@ remote.c: static char *get_ref_match(const struct refspec *rs, const struct ref *ref, > + const char *dst_side = item->dst ? item->dst : item->src; > + int match; > + if (direction == FROM_SRC) > +- match = match_name_with_pattern(item->src, ref->name, dst_side, &name); > ++ match = match_refspec_name_with_pattern(item->src, ref->name, dst_side, &name); > + else > +- match = match_name_with_pattern(dst_side, ref->name, item->src, &name); > ++ match = match_refspec_name_with_pattern(dst_side, ref->name, item->src, &name); > + if (match) { > + matching_refs = i; > + break; > +@@ remote.c: static struct ref *get_expanded_map(const struct ref *remote_refs, > + > + if (strchr(ref->name, '^')) > + continue; /* a dereference item */ > +- if (match_name_with_pattern(refspec->src, ref->name, > ++ if (match_refspec_name_with_pattern(refspec->src, ref->name, > + refspec->dst, &expn_name) && > + !ignore_symref_update(expn_name, &scratch)) { > + struct ref *cpy = copy_ref(ref); > > ## remote.h ## > @@ > @@ remote.h: struct remote { > /* > * The setting for whether to fetch tags (as a separate rule from the > * configured refspecs); > +@@ remote.h: int count_refspec_match(const char *, struct ref *refs, struct ref **matched_ref > + > + int check_ref_type(const struct ref *ref, int flags); > + > ++int match_refspec_name_with_pattern(const char *key, const char *name, > ++ const char *value, char **result); > ++ > + /* > + * Free a single ref and its peer, or an entire list of refs and their peers, > + * respectively. > > ## t/t7900-maintenance.sh ## > @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' ' > @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' ' > + git checkout -b feature && test_commit feature-commit-2 && > + git checkout -b wip/test && test_commit wip-test-commit-2 && > + git checkout -b topic/x && test_commit topic-x-commit-2 && > -+ git push -f origin feature wip/test topic/x&& > ++ git push -f origin feature wip/test topic/x && > + cd .. && > + > + git config remote.remote2.prefetchref "refs/heads/feature" && > -+ fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && > -+ GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" git maintenance run --task=prefetch 2>/dev/null && > ++ fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \ > ++ --recurse-submodules=no --quiet" && > ++ GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" \ > ++ git maintenance run --task=prefetch 2>/dev/null && > + test_subcommand git fetch remote2 $fetchargs <prefetch-positive.txt && > + > + git rev-parse refs/prefetch/remotes/remote2/feature && > @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' ' > + cd .. && > + > + git config remote.remote3.prefetchref "!refs/heads/wip/*" && > -+ fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && > -+ GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" git maintenance run --task=prefetch 2>/dev/null && > ++ fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \ > ++ --recurse-submodules=no --quiet" && > ++ GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" \ > ++ git maintenance run --task=prefetch 2>/dev/null && > + test_subcommand git fetch remote3 $fetchargs <prefetch-negative.txt && > + git rev-parse refs/prefetch/remotes/remote3/feature && > + git rev-parse refs/prefetch/remotes/remote3/topic/x && > @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' ' > + git push -f origin feature topic/x topic/y && > + cd .. && > + > -+ git config remote.remote4.prefetchref "refs/heads/topic/* !refs/heads/topic/y" && > -+ fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && > -+ GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" git maintenance run --task=prefetch 2>/dev/null && > ++ git config remote.remote4.prefetchref \ > ++ "refs/heads/topic/* !refs/heads/topic/y" && > ++ fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \ > ++ --recurse-submodules=no --quiet" && > ++ GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" \ > ++ git maintenance run --task=prefetch 2>/dev/null && > + test_subcommand git fetch remote4 $fetchargs <prefetch-mixed.txt && > + > + test_must_fail git rev-parse refs/prefetch/remotes/remote4/feature && > > > Documentation/config/remote.txt | 7 +++ > builtin/fetch.c | 53 ++++++++++++++++++++ > remote.c | 24 ++++++---- > remote.h | 6 +++ > t/t7900-maintenance.sh | 85 +++++++++++++++++++++++++++++++++ > 5 files changed, 167 insertions(+), 8 deletions(-) > > diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt > index 8efc53e836d..186f439ed7b 100644 > --- a/Documentation/config/remote.txt > +++ b/Documentation/config/remote.txt > @@ -33,6 +33,13 @@ remote.<name>.fetch:: > The default set of "refspec" for linkgit:git-fetch[1]. See > linkgit:git-fetch[1]. > > +remote.<name>.prefetchref:: > + Specify the refs to be prefetched when fetching from this > + remote. The value is a space-separated list of ref patterns > + (e.g., "refs/heads/main !refs/heads/develop*"). This can be > + used to optimize fetch operations by specifying exactly which > + refs should be prefetched. > + > remote.<name>.push:: > The default set of "refspec" for linkgit:git-push[1]. See > linkgit:git-push[1]. > diff --git a/builtin/fetch.c b/builtin/fetch.c > index b2b5aee5bf2..74603cfabe0 100644 > --- a/builtin/fetch.c > +++ b/builtin/fetch.c > @@ -485,6 +485,32 @@ static void filter_prefetch_refspec(struct refspec *rs) > } > } > > +static int pattern_matches_ref(const char *pattern, const char *refname) > +{ > + if (strchr(pattern, '*')) > + return match_refspec_name_with_pattern(pattern, refname, NULL, NULL) != 0; > + return strcmp(pattern, refname) == 0; > +} > + > +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs) > +{ > + int has_positive = 0, matched_positive = 0, matched_negative = 0; > + > + for (int i = 0; i < prefetch_refs->nr; i++) { > + const char *pattern = prefetch_refs->items[i].string; > + int is_negative = (*pattern == '!'); > + if (is_negative) pattern++; > + else has_positive = 1; > + > + if (pattern_matches_ref(pattern, refname)) { > + if (is_negative) matched_negative = 1; > + else matched_positive = 1; > + } > + } > + > + return has_positive ? (matched_positive && !matched_negative) : !matched_negative; > +} > + > static struct ref *get_ref_map(struct remote *remote, > const struct ref *remote_refs, > struct refspec *rs, > @@ -501,7 +527,11 @@ static struct ref *get_ref_map(struct remote *remote, > struct hashmap existing_refs; > int existing_refs_populated = 0; > > + struct ref *prefetch_filtered_ref_map = NULL, **ref_map_tail = &prefetch_filtered_ref_map; > + struct ref *next; > + > filter_prefetch_refspec(rs); > + > if (remote) > filter_prefetch_refspec(&remote->fetch); > > @@ -610,6 +640,29 @@ static struct ref *get_ref_map(struct remote *remote, > else > ref_map = apply_negative_refspecs(ref_map, &remote->fetch); > > + /** > + * Filter out advertised refs that we don't want to fetch during > + * prefetch if a prefetchref config is set > + */ > + > + if (prefetch && remote->prefetch_refs.nr) { > + prefetch_filtered_ref_map = NULL; > + ref_map_tail = &prefetch_filtered_ref_map; > + > + for (rm = ref_map; rm; rm = next) { > + next = rm->next; > + rm->next = NULL; > + > + if (matches_prefetch_refs(rm->name, &remote->prefetch_refs)) { > + *ref_map_tail = rm; > + ref_map_tail = &rm->next; > + } else { > + free_one_ref(rm); > + } > + } > + ref_map = prefetch_filtered_ref_map; > + } > + > ref_map = ref_remove_duplicates(ref_map); > > for (rm = ref_map; rm; rm = rm->next) { > diff --git a/remote.c b/remote.c > index 8f3dee13186..6752c73370f 100644 > --- a/remote.c > +++ b/remote.c > @@ -141,6 +141,7 @@ static struct remote *make_remote(struct remote_state *remote_state, > ret->prune = -1; /* unspecified */ > ret->prune_tags = -1; /* unspecified */ > ret->name = xstrndup(name, len); > + string_list_init_dup(&ret->prefetch_refs); > refspec_init(&ret->push, REFSPEC_PUSH); > refspec_init(&ret->fetch, REFSPEC_FETCH); > > @@ -166,6 +167,7 @@ static void remote_clear(struct remote *remote) > free((char *)remote->uploadpack); > FREE_AND_NULL(remote->http_proxy); > FREE_AND_NULL(remote->http_proxy_authmethod); > + string_list_clear(&remote->prefetch_refs, 0); > } > > static void add_merge(struct branch *branch, const char *name) > @@ -456,6 +458,12 @@ static int handle_config(const char *key, const char *value, > remote->prune = git_config_bool(key, value); > else if (!strcmp(subkey, "prunetags")) > remote->prune_tags = git_config_bool(key, value); > + else if (!strcmp(subkey, "prefetchref")) { > + if (!value) > + return config_error_nonbool(key); > + string_list_split(&remote->prefetch_refs, value, ' ', -1); > + return 0; > + } > else if (!strcmp(subkey, "url")) { > if (!value) > return config_error_nonbool(key); > @@ -868,7 +876,7 @@ struct strvec *push_url_of_remote(struct remote *remote) > return remote->pushurl.nr ? &remote->pushurl : &remote->url; > } > > -static int match_name_with_pattern(const char *key, const char *name, > +int match_refspec_name_with_pattern(const char *key, const char *name, > const char *value, char **result) > { > const char *kstar = strchr(key, '*'); > @@ -900,7 +908,7 @@ static int refspec_match(const struct refspec_item *refspec, > const char *name) > { > if (refspec->pattern) > - return match_name_with_pattern(refspec->src, name, NULL, NULL); > + return match_refspec_name_with_pattern(refspec->src, name, NULL, NULL); > > return !strcmp(refspec->src, name); > } > @@ -969,7 +977,7 @@ static int query_matches_negative_refspec(struct refspec *rs, struct refspec_ite > const char *key = refspec->dst ? refspec->dst : refspec->src; > const char *value = refspec->src; > > - if (match_name_with_pattern(key, needle, value, &expn_name)) > + if (match_refspec_name_with_pattern(key, needle, value, &expn_name)) > string_list_append_nodup(&reversed, expn_name); > } else if (refspec->matching) { > /* For the special matching refspec, any query should match */ > @@ -1014,7 +1022,7 @@ static void query_refspecs_multiple(struct refspec *rs, > if (!refspec->dst || refspec->negative) > continue; > if (refspec->pattern) { > - if (match_name_with_pattern(key, needle, value, result)) > + if (match_refspec_name_with_pattern(key, needle, value, result)) > string_list_append_nodup(results, *result); > } else if (!strcmp(needle, key)) { > string_list_append(results, value); > @@ -1043,7 +1051,7 @@ int query_refspecs(struct refspec *rs, struct refspec_item *query) > if (!refspec->dst || refspec->negative) > continue; > if (refspec->pattern) { > - if (match_name_with_pattern(key, needle, value, result)) { > + if (match_refspec_name_with_pattern(key, needle, value, result)) { > query->force = refspec->force; > return 0; > } > @@ -1456,9 +1464,9 @@ static char *get_ref_match(const struct refspec *rs, const struct ref *ref, > const char *dst_side = item->dst ? item->dst : item->src; > int match; > if (direction == FROM_SRC) > - match = match_name_with_pattern(item->src, ref->name, dst_side, &name); > + match = match_refspec_name_with_pattern(item->src, ref->name, dst_side, &name); > else > - match = match_name_with_pattern(dst_side, ref->name, item->src, &name); > + match = match_refspec_name_with_pattern(dst_side, ref->name, item->src, &name); > if (match) { > matching_refs = i; > break; > @@ -2076,7 +2084,7 @@ static struct ref *get_expanded_map(const struct ref *remote_refs, > > if (strchr(ref->name, '^')) > continue; /* a dereference item */ > - if (match_name_with_pattern(refspec->src, ref->name, > + if (match_refspec_name_with_pattern(refspec->src, ref->name, > refspec->dst, &expn_name) && > !ignore_symref_update(expn_name, &scratch)) { > struct ref *cpy = copy_ref(ref); > diff --git a/remote.h b/remote.h > index b901b56746d..9ffef206f23 100644 > --- a/remote.h > +++ b/remote.h > @@ -5,6 +5,7 @@ > #include "hashmap.h" > #include "refspec.h" > #include "strvec.h" > +#include "string-list.h" > > struct option; > struct transport_ls_refs_options; > @@ -77,6 +78,8 @@ struct remote { > > struct refspec fetch; > > + struct string_list prefetch_refs; > + > /* > * The setting for whether to fetch tags (as a separate rule from the > * configured refspecs); > @@ -207,6 +210,9 @@ int count_refspec_match(const char *, struct ref *refs, struct ref **matched_ref > > int check_ref_type(const struct ref *ref, int flags); > > +int match_refspec_name_with_pattern(const char *key, const char *name, > + const char *value, char **result); > + > /* > * Free a single ref and its peer, or an entire list of refs and their peers, > * respectively. > diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh > index abae7a97546..fc1b5d14e75 100755 > --- a/t/t7900-maintenance.sh > +++ b/t/t7900-maintenance.sh > @@ -245,6 +245,91 @@ test_expect_success 'prefetch multiple remotes' ' > test_subcommand git fetch remote2 $fetchargs <skip-remote1.txt > ' > > +test_expect_success 'prefetch with positive prefetch ref patterns' ' > + test_create_repo filter-prefetch-positive && > + ( > + cd filter-prefetch-positive && > + test_commit initial && > + git clone . clone2 && > + git remote add remote2 "file://$(pwd)/clone2" && > + > + cd clone2 && > + git checkout -b feature && test_commit feature-commit-2 && > + git checkout -b wip/test && test_commit wip-test-commit-2 && > + git checkout -b topic/x && test_commit topic-x-commit-2 && > + git push -f origin feature wip/test topic/x && > + cd .. && > + > + git config remote.remote2.prefetchref "refs/heads/feature" && > + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \ > + --recurse-submodules=no --quiet" && > + GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" \ > + git maintenance run --task=prefetch 2>/dev/null && > + test_subcommand git fetch remote2 $fetchargs <prefetch-positive.txt && > + > + git rev-parse refs/prefetch/remotes/remote2/feature && > + test_must_fail git rev-parse refs/prefetch/remotes/remote2/wip/test && > + test_must_fail git rev-parse refs/prefetch/remotes/remote2/topic/x > + ) > +' > + > +test_expect_success 'prefetch with negative prefetch ref patterns' ' > + test_create_repo filter-prefetch-negative && > + ( > + cd filter-prefetch-negative && > + test_commit initial && > + git clone . clone3 && > + git remote add remote3 "file://$(pwd)/clone3" && > + cat .git/config && > + > + cd clone3 && > + git checkout -b feature && test_commit feature-commit-3 && > + git checkout -b wip/test && test_commit wip-test-commit-3 && > + git checkout -b topic/x && test_commit topic-x-commit-3 && > + git push -f origin feature wip/test topic/x && > + cd .. && > + > + git config remote.remote3.prefetchref "!refs/heads/wip/*" && > + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \ > + --recurse-submodules=no --quiet" && > + GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" \ > + git maintenance run --task=prefetch 2>/dev/null && > + test_subcommand git fetch remote3 $fetchargs <prefetch-negative.txt && > + git rev-parse refs/prefetch/remotes/remote3/feature && > + git rev-parse refs/prefetch/remotes/remote3/topic/x && > + test_must_fail git rev-parse refs/prefetch/remotes/remote3/wip/test > + ) > +' > + > +test_expect_success 'prefetch with positive & negative prefetch ref patterns' ' > + test_create_repo filter-prefetch-mixed && > + ( > + cd filter-prefetch-mixed && > + test_commit initial && > + git clone . clone4 && > + git remote add remote4 "file://$(pwd)/clone4" && > + > + cd clone4 && > + git checkout -b feature && test_commit feature-commit-4 && > + git checkout -b topic/x && test_commit topic-x-commit-4 && > + git checkout -b topic/y && test_commit topic-y-commit-4 && > + git push -f origin feature topic/x topic/y && > + cd .. && > + > + git config remote.remote4.prefetchref \ > + "refs/heads/topic/* !refs/heads/topic/y" && > + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \ > + --recurse-submodules=no --quiet" && > + GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" \ > + git maintenance run --task=prefetch 2>/dev/null && > + test_subcommand git fetch remote4 $fetchargs <prefetch-mixed.txt && > + > + test_must_fail git rev-parse refs/prefetch/remotes/remote4/feature && > + test_must_fail git rev-parse refs/prefetch/remotes/remote4/topic/y && > + git rev-parse refs/prefetch/remotes/remote4/topic/x > + ) > +' > + > test_expect_success 'loose-objects task' ' > # Repack everything so we know the state of the object dir > git repack -adk && > > base-commit: 2e7b89e038c0c888acf61f1b4ee5a43d4dd5e94c > -- > gitgitgadget A friendly bump to revive this thread to see if the code looks good to you Junio. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v4] remote: allow specifying refs to prefetch 2024-10-04 20:21 ` [PATCH v4] remote: allow specifying refs to prefetch Shubham Kanodia via GitGitGadget 2024-11-04 8:47 ` Shubham Kanodia @ 2024-11-05 6:45 ` Patrick Steinhardt 2024-11-05 14:47 ` Phillip Wood 2024-11-05 14:45 ` Phillip Wood 2 siblings, 1 reply; 27+ messages in thread From: Patrick Steinhardt @ 2024-11-05 6:45 UTC (permalink / raw) To: Shubham Kanodia via GitGitGadget Cc: git, Junio C Hamano [ ], Derrick Stolee [ ], Shubham Kanodia On Fri, Oct 04, 2024 at 08:21:32PM +0000, Shubham Kanodia via GitGitGadget wrote: I'm coming rather late to the party and simply want to review this so that the thread gets revived. So my context may be lacking, please forgive me if I am reopening things that were already discussed. > diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt > index 8efc53e836d..186f439ed7b 100644 > --- a/Documentation/config/remote.txt > +++ b/Documentation/config/remote.txt > @@ -33,6 +33,13 @@ remote.<name>.fetch:: > The default set of "refspec" for linkgit:git-fetch[1]. See > linkgit:git-fetch[1]. > > +remote.<name>.prefetchref:: > + Specify the refs to be prefetched when fetching from this > + remote. The value is a space-separated list of ref patterns > + (e.g., "refs/heads/main !refs/heads/develop*"). This can be > + used to optimize fetch operations by specifying exactly which > + refs should be prefetched. I'm a bit surprised that we use a space-separated list here instead of accepting a multi-valued config like we do for "remote.<name>.fetch". Shouldn't we use the format here to make things more consistent? > remote.<name>.push:: > The default set of "refspec" for linkgit:git-push[1]. See > linkgit:git-push[1]. > diff --git a/builtin/fetch.c b/builtin/fetch.c > index b2b5aee5bf2..74603cfabe0 100644 > --- a/builtin/fetch.c > +++ b/builtin/fetch.c > @@ -485,6 +485,32 @@ static void filter_prefetch_refspec(struct refspec *rs) > } > } > > +static int pattern_matches_ref(const char *pattern, const char *refname) > +{ > + if (strchr(pattern, '*')) > + return match_refspec_name_with_pattern(pattern, refname, NULL, NULL) != 0; > + return strcmp(pattern, refname) == 0; > +} > + > +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs) > +{ > + int has_positive = 0, matched_positive = 0, matched_negative = 0; > + > + for (int i = 0; i < prefetch_refs->nr; i++) { > + const char *pattern = prefetch_refs->items[i].string; > + int is_negative = (*pattern == '!'); > + if (is_negative) pattern++; > + else has_positive = 1; > + > + if (pattern_matches_ref(pattern, refname)) { > + if (is_negative) matched_negative = 1; > + else matched_positive = 1; > + } > + } > + > + return has_positive ? (matched_positive && !matched_negative) : !matched_negative; > +} This is essentially open-coding a bunch of logic around how we parse refspecs. I'd propose to instead use the APIs we already have in this area, namely those in "refspec.h". > static struct ref *get_ref_map(struct remote *remote, > const struct ref *remote_refs, > struct refspec *rs, > @@ -501,7 +527,11 @@ static struct ref *get_ref_map(struct remote *remote, > struct hashmap existing_refs; > int existing_refs_populated = 0; > > + struct ref *prefetch_filtered_ref_map = NULL, **ref_map_tail = &prefetch_filtered_ref_map; > + struct ref *next; > + We don't typically have empty lines between variable declarations. > filter_prefetch_refspec(rs); > + > if (remote) > filter_prefetch_refspec(&remote->fetch); > > @@ -610,6 +640,29 @@ static struct ref *get_ref_map(struct remote *remote, > else > ref_map = apply_negative_refspecs(ref_map, &remote->fetch); > > + /** > + * Filter out advertised refs that we don't want to fetch during > + * prefetch if a prefetchref config is set > + */ Our comments typically start with `/*`, not `/**`. > + if (prefetch && remote->prefetch_refs.nr) { > + prefetch_filtered_ref_map = NULL; > + ref_map_tail = &prefetch_filtered_ref_map; As far as I can see, both of these variables have already been initialized beforehand. > + for (rm = ref_map; rm; rm = next) { > + next = rm->next; > + rm->next = NULL; > + > + if (matches_prefetch_refs(rm->name, &remote->prefetch_refs)) { > + *ref_map_tail = rm; > + ref_map_tail = &rm->next; > + } else { > + free_one_ref(rm); > + } > + } > + ref_map = prefetch_filtered_ref_map; > + } > + > ref_map = ref_remove_duplicates(ref_map); > > for (rm = ref_map; rm; rm = rm->next) { > diff --git a/remote.c b/remote.c > index 8f3dee13186..6752c73370f 100644 > --- a/remote.c > +++ b/remote.c > @@ -141,6 +141,7 @@ static struct remote *make_remote(struct remote_state *remote_state, > ret->prune = -1; /* unspecified */ > ret->prune_tags = -1; /* unspecified */ > ret->name = xstrndup(name, len); > + string_list_init_dup(&ret->prefetch_refs); > refspec_init(&ret->push, REFSPEC_PUSH); > refspec_init(&ret->fetch, REFSPEC_FETCH); > > @@ -166,6 +167,7 @@ static void remote_clear(struct remote *remote) > free((char *)remote->uploadpack); > FREE_AND_NULL(remote->http_proxy); > FREE_AND_NULL(remote->http_proxy_authmethod); > + string_list_clear(&remote->prefetch_refs, 0); > } > > static void add_merge(struct branch *branch, const char *name) > @@ -456,6 +458,12 @@ static int handle_config(const char *key, const char *value, > remote->prune = git_config_bool(key, value); > else if (!strcmp(subkey, "prunetags")) > remote->prune_tags = git_config_bool(key, value); > + else if (!strcmp(subkey, "prefetchref")) { > + if (!value) > + return config_error_nonbool(key); > + string_list_split(&remote->prefetch_refs, value, ' ', -1); > + return 0; > + } > else if (!strcmp(subkey, "url")) { > if (!value) > return config_error_nonbool(key); > @@ -868,7 +876,7 @@ struct strvec *push_url_of_remote(struct remote *remote) > return remote->pushurl.nr ? &remote->pushurl : &remote->url; > } > > -static int match_name_with_pattern(const char *key, const char *name, > +int match_refspec_name_with_pattern(const char *key, const char *name, > const char *value, char **result) > { > const char *kstar = strchr(key, '*'); Is this rename really necessary? It is not mentioned in the commit message, so this is surprising to me. If it really was necessary it should be split out into a separate commit that also explains why you think that this is a good idea. > diff --git a/remote.h b/remote.h > index b901b56746d..9ffef206f23 100644 > --- a/remote.h > +++ b/remote.h > @@ -5,6 +5,7 @@ > #include "hashmap.h" > #include "refspec.h" > #include "strvec.h" > +#include "string-list.h" > > struct option; > struct transport_ls_refs_options; > @@ -77,6 +78,8 @@ struct remote { > > struct refspec fetch; > > + struct string_list prefetch_refs; > + > /* > * The setting for whether to fetch tags (as a separate rule from the > * configured refspecs); > @@ -207,6 +210,9 @@ int count_refspec_match(const char *, struct ref *refs, struct ref **matched_ref > > int check_ref_type(const struct ref *ref, int flags); > > +int match_refspec_name_with_pattern(const char *key, const char *name, > + const char *value, char **result); I think instead of exposing this function we should rather expose `refspec_match()`, which is at a higher level and knows to handle the cases for us where the refspec is a pattern and when it's not. Patrick ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v4] remote: allow specifying refs to prefetch 2024-11-05 6:45 ` Patrick Steinhardt @ 2024-11-05 14:47 ` Phillip Wood 2024-11-05 16:26 ` Shubham Kanodia 0 siblings, 1 reply; 27+ messages in thread From: Phillip Wood @ 2024-11-05 14:47 UTC (permalink / raw) To: Patrick Steinhardt, Shubham Kanodia via GitGitGadget Cc: git, Junio C Hamano, Derrick Stolee, Shubham Kanodia On 05/11/2024 06:45, Patrick Steinhardt wrote: > On Fri, Oct 04, 2024 at 08:21:32PM +0000, Shubham Kanodia via GitGitGadget wrote: > >> diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt >> index 8efc53e836d..186f439ed7b 100644 >> --- a/Documentation/config/remote.txt >> +++ b/Documentation/config/remote.txt >> @@ -33,6 +33,13 @@ remote.<name>.fetch:: >> The default set of "refspec" for linkgit:git-fetch[1]. See >> linkgit:git-fetch[1]. >> >> +remote.<name>.prefetchref:: >> + Specify the refs to be prefetched when fetching from this >> + remote. The value is a space-separated list of ref patterns >> + (e.g., "refs/heads/main !refs/heads/develop*"). This can be >> + used to optimize fetch operations by specifying exactly which >> + refs should be prefetched. > > I'm a bit surprised that we use a space-separated list here instead of > accepting a multi-valued config like we do for "remote.<name>.fetch". > Shouldn't we use the format here to make things more consistent? I agree that would be a good idea. I also think that it would be helpful to expand the documentation to explain exactly how the patterns are matched. I think "remote.<name>.prefetch" would better match the existing "remote.<name>.fetch" and "remote.<push>.name" config key names. Best Wishes Phillip ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v4] remote: allow specifying refs to prefetch 2024-11-05 14:47 ` Phillip Wood @ 2024-11-05 16:26 ` Shubham Kanodia 2024-11-06 0:39 ` Junio C Hamano 2024-11-06 6:46 ` Patrick Steinhardt 0 siblings, 2 replies; 27+ messages in thread From: Shubham Kanodia @ 2024-11-05 16:26 UTC (permalink / raw) To: Phillip Wood Cc: Patrick Steinhardt, Shubham Kanodia via GitGitGadget, git, Junio C Hamano, Derrick Stolee On Tue, Nov 5, 2024 at 8:17 PM Phillip Wood <phillip.wood123@gmail.com> wrote: > > On 05/11/2024 06:45, Patrick Steinhardt wrote: > > On Fri, Oct 04, 2024 at 08:21:32PM +0000, Shubham Kanodia via GitGitGadget wrote: > > > >> diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt > >> index 8efc53e836d..186f439ed7b 100644 > >> --- a/Documentation/config/remote.txt > >> +++ b/Documentation/config/remote.txt > >> @@ -33,6 +33,13 @@ remote.<name>.fetch:: > >> The default set of "refspec" for linkgit:git-fetch[1]. See > >> linkgit:git-fetch[1]. > >> > >> +remote.<name>.prefetchref:: > >> + Specify the refs to be prefetched when fetching from this > >> + remote. The value is a space-separated list of ref patterns > >> + (e.g., "refs/heads/main !refs/heads/develop*"). This can be > >> + used to optimize fetch operations by specifying exactly which > >> + refs should be prefetched. > > > > I'm a bit surprised that we use a space-separated list here instead of > > accepting a multi-valued config like we do for "remote.<name>.fetch". > > Shouldn't we use the format here to make things more consistent? > > I agree that would be a good idea. I also think that it would be helpful > to expand the documentation to explain exactly how the patterns are > matched. I think "remote.<name>.prefetch" would better match the > existing "remote.<name>.fetch" and "remote.<push>.name" config key names. > > Best Wishes > > Phillip > Thanks for taking the time to look at this! Let me add context here based on a few things that have already been discussed as a part of this thread or an earlier discussion on https://lore.kernel.org/git/CAG=Um+1wTbXn_RN+LOCrpZpSNR_QF582PszxNyhz5anVHtBp+w@mail.gmail.com/ > I'm coming rather late to the party and simply want to review this so > that the thread gets revived. So my context may be lacking, please > forgive me if I am reopening things that were already discussed. I don't have a particular preference here, and this was discussed in an earlier thread where Junio opined (https://lore.kernel.org/git/xmqq5xrcn2k1.fsf@gitster.g/— > I agree that it is the right place to configure this as attributes > to remotes. It would make it handy if we could give a catch-all > configuration, though. For example: > > [remote "origin"] > prefetch = true > prefetchref = refs/heads/* refs/tags/* > [remote "*"] > prefetch = false > > may toggle prefetch off for all remotes, except that the tags and > the local branches of the remote "origin" are prefetched. Instead > of a multi-value configuration variable (like remote.*.fetch) where > we need to worry about clearing convention, we can use a regular > "last one wins" variable that is whitespace separated patterns, as > such a pattern can never have a whitespace in it. which is what my implementation is based on. > This is essentially open-coding a bunch of logic around how we parse > refspecs. I'd propose to instead use the APIs we already have in this > area, namely those in "refspec.h". Is there any that you would suggest? I don't see anything in `refspec.h` that might cover a similar logic. I'd previously used `wildmatch.h`, but that is really only suitable for glob-style wildcards. > Is this rename really necessary? It is not mentioned in the commit > message, so this is surprising to me What was previously `match_name_with_pattern` is now exposed out from `remote.c` to be used outside. I thought given that its now applicable outside `remote.c` it might be clearer to rename it to be more explicit (match name of what? remote? refspec? etc) However I agree that the higher level `refspec_match()` is a better function to expose anyway, and am happy to make that change. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v4] remote: allow specifying refs to prefetch 2024-11-05 16:26 ` Shubham Kanodia @ 2024-11-06 0:39 ` Junio C Hamano 2024-11-06 6:52 ` Patrick Steinhardt 2024-11-06 6:46 ` Patrick Steinhardt 1 sibling, 1 reply; 27+ messages in thread From: Junio C Hamano @ 2024-11-06 0:39 UTC (permalink / raw) To: Shubham Kanodia Cc: Phillip Wood, Patrick Steinhardt, Shubham Kanodia via GitGitGadget, git, Derrick Stolee Shubham Kanodia <shubham.kanodia10@gmail.com> writes: > I don't have a particular preference here, and this was discussed in > an earlier thread > where Junio opined (https://lore.kernel.org/git/xmqq5xrcn2k1.fsf@gitster.g/— > >> I agree that it is the right place to configure this as attributes >> to remotes. It would make it handy if we could give a catch-all >> configuration, though. For example: >> >> [remote "origin"] >> prefetch = true >> prefetchref = refs/heads/* refs/tags/* >> [remote "*"] >> prefetch = false >> >> may toggle prefetch off for all remotes, except that the tags and >> the local branches of the remote "origin" are prefetched. Instead >> of a multi-value configuration variable (like remote.*.fetch) where >> we need to worry about clearing convention, we can use a regular >> "last one wins" variable that is whitespace separated patterns, as >> such a pattern can never have a whitespace in it. > which is what my implementation is based on. I am fine with space separated list or multi-valued variable. The only difference is that with multi-valued list, we'd need to worry about ensuring that we have a way to "clear" the values we have seen so far. It has plenty of precedence and is not a rocket science. The above, if I recall correctly, was solely about the need for "catch-all default" (aka "*" remote) and not about multi-value vs space separated last-one-wins value at all. IOW, the above could have been [remote "origin"] prefetch = true prefetchref = refs/heads/* prefetchref = refs/tags/* [remote "*"] prefetch = false and conveyed exactly what I wanted to say in the message you quoted. In any case, I somehow thought that we discarded the arrangement with "*" wildcard as unworkable. If I remember the discussion before I left correctly, didn't it turn out to be troublesome to have [remote "*"] section because existing code would need to enumerate configured remotes, and we do not want to see "*" listed? If we found a workable solution to that while I was away, that would be great, but I haven't looked at what this latest round of the series does to solve it (yet). Perhaps teaching "git remote" and "git fetch --all" to skip "*" while enumerating remotes was sufficient? I dunno. Thanks. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v4] remote: allow specifying refs to prefetch 2024-11-06 0:39 ` Junio C Hamano @ 2024-11-06 6:52 ` Patrick Steinhardt 2024-11-06 8:20 ` Junio C Hamano 0 siblings, 1 reply; 27+ messages in thread From: Patrick Steinhardt @ 2024-11-06 6:52 UTC (permalink / raw) To: Junio C Hamano Cc: Shubham Kanodia, Phillip Wood, Shubham Kanodia via GitGitGadget, git, Derrick Stolee On Tue, Nov 05, 2024 at 04:39:06PM -0800, Junio C Hamano wrote: > Shubham Kanodia <shubham.kanodia10@gmail.com> writes: > > > I don't have a particular preference here, and this was discussed in > > an earlier thread > > where Junio opined (https://lore.kernel.org/git/xmqq5xrcn2k1.fsf@gitster.g/— > > > >> I agree that it is the right place to configure this as attributes > >> to remotes. It would make it handy if we could give a catch-all > >> configuration, though. For example: > >> > >> [remote "origin"] > >> prefetch = true > >> prefetchref = refs/heads/* refs/tags/* > >> [remote "*"] > >> prefetch = false > >> > >> may toggle prefetch off for all remotes, except that the tags and > >> the local branches of the remote "origin" are prefetched. Instead > >> of a multi-value configuration variable (like remote.*.fetch) where > >> we need to worry about clearing convention, we can use a regular > >> "last one wins" variable that is whitespace separated patterns, as > >> such a pattern can never have a whitespace in it. > > which is what my implementation is based on. > > I am fine with space separated list or multi-valued variable. The > only difference is that with multi-valued list, we'd need to worry > about ensuring that we have a way to "clear" the values we have seen > so far. It has plenty of precedence and is not a rocket science. > The above, if I recall correctly, was solely about the need for > "catch-all default" (aka "*" remote) and not about multi-value vs > space separated last-one-wins value at all. IOW, the above could > have been > > [remote "origin"] > prefetch = true > prefetchref = refs/heads/* > prefetchref = refs/tags/* > [remote "*"] > prefetch = false > > and conveyed exactly what I wanted to say in the message you quoted. Ah, I missed your mail here. I replied to this bit in a parallel email. > In any case, I somehow thought that we discarded the arrangement > with "*" wildcard as unworkable. If I remember the discussion > before I left correctly, didn't it turn out to be troublesome to > have [remote "*"] section because existing code would need to > enumerate configured remotes, and we do not want to see "*" listed? I wouldn't say unworkable, but it certainly isn't as easy as just adding the new syntax. > If we found a workable solution to that while I was away, that would > be great, but I haven't looked at what this latest round of the > series does to solve it (yet). Perhaps teaching "git remote" and > "git fetch --all" to skip "*" while enumerating remotes was > sufficient? I dunno. So yes, we'd have to teach Git to ignore "*" remotes in many places. I would hope that it isn't all that involved and that we only need to adjust a couple of places to ignore "*". But the remote logic is somewhat outside of my area of expertise, so my hope might be misplaced. If so, we might think about using a different syntax to achieve the same thing. Patrick ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v4] remote: allow specifying refs to prefetch 2024-11-06 6:52 ` Patrick Steinhardt @ 2024-11-06 8:20 ` Junio C Hamano 0 siblings, 0 replies; 27+ messages in thread From: Junio C Hamano @ 2024-11-06 8:20 UTC (permalink / raw) To: Patrick Steinhardt Cc: Shubham Kanodia, Phillip Wood, Shubham Kanodia via GitGitGadget, git, Derrick Stolee Patrick Steinhardt <ps@pks.im> writes: > So yes, we'd have to teach Git to ignore "*" remotes in many places. I > would hope that it isn't all that involved and that we only need to > adjust a couple of places to ignore "*". But the remote logic is > somewhat outside of my area of expertise, so my hope might be misplaced. I am hoping that it would not be too painful to exclude '*' from enumeration, and also, "git fetch '*'" and other things that expect the name of a remote do not say silly things like "ah, there is "remote.*.<something>" configuration defined, so '*' must be a name of a remote" ;-) It might take certain refactoring, but if it results in a cleaner codebase, that is also a welcome outcome. Thanks. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v4] remote: allow specifying refs to prefetch 2024-11-05 16:26 ` Shubham Kanodia 2024-11-06 0:39 ` Junio C Hamano @ 2024-11-06 6:46 ` Patrick Steinhardt 2024-11-06 11:04 ` Phillip Wood 1 sibling, 1 reply; 27+ messages in thread From: Patrick Steinhardt @ 2024-11-06 6:46 UTC (permalink / raw) To: Shubham Kanodia Cc: Phillip Wood, Shubham Kanodia via GitGitGadget, git, Junio C Hamano, Derrick Stolee On Tue, Nov 05, 2024 at 09:56:52PM +0530, Shubham Kanodia wrote: > On Tue, Nov 5, 2024 at 8:17 PM Phillip Wood <phillip.wood123@gmail.com> wrote: > > On 05/11/2024 06:45, Patrick Steinhardt wrote: > > > On Fri, Oct 04, 2024 at 08:21:32PM +0000, Shubham Kanodia via GitGitGadget wrote: > > >> diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt > > >> index 8efc53e836d..186f439ed7b 100644 > > >> --- a/Documentation/config/remote.txt > > >> +++ b/Documentation/config/remote.txt > > >> @@ -33,6 +33,13 @@ remote.<name>.fetch:: > > >> The default set of "refspec" for linkgit:git-fetch[1]. See > > >> linkgit:git-fetch[1]. > > >> > > >> +remote.<name>.prefetchref:: > > >> + Specify the refs to be prefetched when fetching from this > > >> + remote. The value is a space-separated list of ref patterns > > >> + (e.g., "refs/heads/main !refs/heads/develop*"). This can be > > >> + used to optimize fetch operations by specifying exactly which > > >> + refs should be prefetched. > > > > > > I'm a bit surprised that we use a space-separated list here instead of > > > accepting a multi-valued config like we do for "remote.<name>.fetch". > > > Shouldn't we use the format here to make things more consistent? > > > > I agree that would be a good idea. I also think that it would be helpful > > to expand the documentation to explain exactly how the patterns are > > matched. I think "remote.<name>.prefetch" would better match the > > existing "remote.<name>.fetch" and "remote.<push>.name" config key names. > > > > Best Wishes > > > > Phillip > > > > Thanks for taking the time to look at this! > > Let me add context here based on a few things that have already been discussed > as a part of this thread or an earlier discussion on > https://lore.kernel.org/git/CAG=Um+1wTbXn_RN+LOCrpZpSNR_QF582PszxNyhz5anVHtBp+w@mail.gmail.com/ > > > I'm coming rather late to the party and simply want to review this so > > that the thread gets revived. So my context may be lacking, please > > forgive me if I am reopening things that were already discussed. > > I don't have a particular preference here, and this was discussed in > an earlier thread > where Junio opined (https://lore.kernel.org/git/xmqq5xrcn2k1.fsf@gitster.g/— Fair enough, thanks for the pointer! The reason stated by Junio is that having a space-separated list of refspecs makes it easier to reset the refspec again, as we can simply use a "last-one-wins" pattern. And while I understand that, I would claim that we already have a properly established way to reset multi-valued lists by first assigning an empty value: [remote "origin"] prefetchref = refs/heads/* prefetchref = refs/tags/* prefetchref = prefetchref = refs/heads/* The final value would be only "refs/heads/*" due to the reset. So overall I'm still leaning into the direction of making this a multi-valued list for the sake of consistency with other refspec configs. @Junio Let me know in case you disagree though. > > I agree that it is the right place to configure this as attributes > > to remotes. It would make it handy if we could give a catch-all > > configuration, though. For example: > > > > [remote "origin"] > > prefetch = true > > prefetchref = refs/heads/* refs/tags/* > > [remote "*"] > > prefetch = false > > > > may toggle prefetch off for all remotes, except that the tags and > > the local branches of the remote "origin" are prefetched. Instead > > of a multi-value configuration variable (like remote.*.fetch) where > > we need to worry about clearing convention, we can use a regular > > "last one wins" variable that is whitespace separated patterns, as > > such a pattern can never have a whitespace in it. > > which is what my implementation is based on. > > > This is essentially open-coding a bunch of logic around how we parse > > refspecs. I'd propose to instead use the APIs we already have in this > > area, namely those in "refspec.h". > > Is there any that you would suggest? I don't see anything in `refspec.h` that > might cover a similar logic. I'd previously used `wildmatch.h`, but > that is really > only suitable for glob-style wildcards. The logic around refspecs is somewhat split up between different headers indeed. "refspec.h" really only concerns parsing of refspecs, but does not provide the interfaces to also use them to translate or match refs. I think the closest to what you need is `omit_name_by_refspec()`: you give it a refspec and a refname, and it returns whether any of the refspec items matches that ref. This also takes into oaccount whether the refspec item is negated ("!" prefix") or whether it is a pattern (contains "*"). Ideally, we'd move interfaces that provide basic refspec functionality into "refspec.h" so that they are easier to discover. But I'm fine with making this a #leftoverbit, I don't want to push the burden on you. Patrick ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v4] remote: allow specifying refs to prefetch 2024-11-06 6:46 ` Patrick Steinhardt @ 2024-11-06 11:04 ` Phillip Wood 0 siblings, 0 replies; 27+ messages in thread From: Phillip Wood @ 2024-11-06 11:04 UTC (permalink / raw) To: Patrick Steinhardt, Shubham Kanodia Cc: Shubham Kanodia via GitGitGadget, git, Junio C Hamano, Derrick Stolee On 06/11/2024 06:46, Patrick Steinhardt wrote: > On Tue, Nov 05, 2024 at 09:56:52PM +0530, Shubham Kanodia wrote: >> >> Let me add context here based on a few things that have already been discussed >> as a part of this thread or an earlier discussion on >> https://lore.kernel.org/git/CAG=Um+1wTbXn_RN+LOCrpZpSNR_QF582PszxNyhz5anVHtBp+w@mail.gmail.com/ >> >>> I'm coming rather late to the party and simply want to review this so >>> that the thread gets revived. So my context may be lacking, please >>> forgive me if I am reopening things that were already discussed. >> >> I don't have a particular preference here, and this was discussed in >> an earlier thread >> where Junio opined (https://lore.kernel.org/git/xmqq5xrcn2k1.fsf@gitster.g/— > > Fair enough, thanks for the pointer! The reason stated by Junio is that > having a space-separated list of refspecs makes it easier to reset the > refspec again, as we can simply use a "last-one-wins" pattern. And while > I understand that, I would claim that we already have a properly > established way to reset multi-valued lists by first assigning an empty > value: > > [remote "origin"] > prefetchref = refs/heads/* > prefetchref = refs/tags/* > prefetchref = > prefetchref = refs/heads/* > > The final value would be only "refs/heads/*" due to the reset. > > So overall I'm still leaning into the direction of making this a > multi-valued list for the sake of consistency with other refspec > configs. @Junio Let me know in case you disagree though. It is also easier to manipulate the list with "git config" when it is multivalued as one can add values and "git config --add" and remove specific entries with "git config --unset <key> <pattern>". With a single entry to append a new value one has to resort to something like git config <key> "$(echo $(git config <key>) <new-value>)" which is not very user friendly. Deleting a value is even less friendly. Best Wishes Phillip ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v4] remote: allow specifying refs to prefetch 2024-10-04 20:21 ` [PATCH v4] remote: allow specifying refs to prefetch Shubham Kanodia via GitGitGadget 2024-11-04 8:47 ` Shubham Kanodia 2024-11-05 6:45 ` Patrick Steinhardt @ 2024-11-05 14:45 ` Phillip Wood 2 siblings, 0 replies; 27+ messages in thread From: Phillip Wood @ 2024-11-05 14:45 UTC (permalink / raw) To: Shubham Kanodia via GitGitGadget, git Cc: Patrick Steinhardt, Junio C Hamano, Derrick Stolee, Shubham Kanodia Hi Shubham On 04/10/2024 21:21, Shubham Kanodia via GitGitGadget wrote: > From: Shubham Kanodia <shubham.kanodia10@gmail.com> > I agree with the Patrick's comments on the implementation, I've left a couple of test comments below. > +test_expect_success 'prefetch with positive prefetch ref patterns' ' > + test_create_repo filter-prefetch-positive && > + ( > + cd filter-prefetch-positive && > + test_commit initial && > + git clone . clone2 && > + git remote add remote2 "file://$(pwd)/clone2" && > + > + cd clone2 && > + git checkout -b feature && test_commit feature-commit-2 && > + git checkout -b wip/test && test_commit wip-test-commit-2 && > + git checkout -b topic/x && test_commit topic-x-commit-2 && > + git push -f origin feature wip/test topic/x && > + cd .. && I think it would make sense to have a blank line before this rather than after it so "cd" is grouped with the commands executed in that directory. > + git config remote.remote2.prefetchref "refs/heads/feature" && > + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \ > + --recurse-submodules=no --quiet" && > + GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" \ > + git maintenance run --task=prefetch 2>/dev/null && > + test_subcommand git fetch remote2 $fetchargs <prefetch-positive.txt && This seems to be testing what "git maintenance" runs which is not really related to testing the prefetch ref pattern matching. I think just git maintenance run --task=prefetch && would be sufficient. We certainly should not be redirecting stderr to /dev/null as that hides any error messages that are helpful when debugging test failures. > + git rev-parse refs/prefetch/remotes/remote2/feature && > + test_must_fail git rev-parse refs/prefetch/remotes/remote2/wip/test && > + test_must_fail git rev-parse refs/prefetch/remotes/remote2/topic/x these are the important tests for checking the prefetch pattern matching. We should perhaps be using "git rev-parse --verify" The test coverage looks good Best Wishes Phillip > + ) > +' > + > +test_expect_success 'prefetch with negative prefetch ref patterns' ' > + test_create_repo filter-prefetch-negative && > + ( > + cd filter-prefetch-negative && > + test_commit initial && > + git clone . clone3 && > + git remote add remote3 "file://$(pwd)/clone3" && > + cat .git/config && > + > + cd clone3 && > + git checkout -b feature && test_commit feature-commit-3 && > + git checkout -b wip/test && test_commit wip-test-commit-3 && > + git checkout -b topic/x && test_commit topic-x-commit-3 && > + git push -f origin feature wip/test topic/x && > + cd .. && > + > + git config remote.remote3.prefetchref "!refs/heads/wip/*" && > + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \ > + --recurse-submodules=no --quiet" && > + GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" \ > + git maintenance run --task=prefetch 2>/dev/null && > + test_subcommand git fetch remote3 $fetchargs <prefetch-negative.txt && > + git rev-parse refs/prefetch/remotes/remote3/feature && > + git rev-parse refs/prefetch/remotes/remote3/topic/x && > + test_must_fail git rev-parse refs/prefetch/remotes/remote3/wip/test > + ) > +' > + > +test_expect_success 'prefetch with positive & negative prefetch ref patterns' ' > + test_create_repo filter-prefetch-mixed && > + ( > + cd filter-prefetch-mixed && > + test_commit initial && > + git clone . clone4 && > + git remote add remote4 "file://$(pwd)/clone4" && > + > + cd clone4 && > + git checkout -b feature && test_commit feature-commit-4 && > + git checkout -b topic/x && test_commit topic-x-commit-4 && > + git checkout -b topic/y && test_commit topic-y-commit-4 && > + git push -f origin feature topic/x topic/y && > + cd .. && > + > + git config remote.remote4.prefetchref \ > + "refs/heads/topic/* !refs/heads/topic/y" && > + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \ > + --recurse-submodules=no --quiet" && > + GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" \ > + git maintenance run --task=prefetch 2>/dev/null && > + test_subcommand git fetch remote4 $fetchargs <prefetch-mixed.txt && > + > + test_must_fail git rev-parse refs/prefetch/remotes/remote4/feature && > + test_must_fail git rev-parse refs/prefetch/remotes/remote4/topic/y && > + git rev-parse refs/prefetch/remotes/remote4/topic/x > + ) > +' > + > test_expect_success 'loose-objects task' ' > # Repack everything so we know the state of the object dir > git repack -adk && > > base-commit: 2e7b89e038c0c888acf61f1b4ee5a43d4dd5e94c ^ permalink raw reply [flat|nested] 27+ messages in thread
end of thread, other threads:[~2024-11-06 11:04 UTC | newest] Thread overview: 27+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2024-09-09 9:47 [PATCH] remote: introduce config to set prefetch refs Shubham Kanodia via GitGitGadget 2024-09-09 9:51 ` Shubham Kanodia 2024-09-09 16:42 ` Junio C Hamano 2024-09-09 18:21 ` Shubham Kanodia 2024-09-09 18:33 ` Junio C Hamano 2024-09-13 6:16 ` Shubham Kanodia 2024-09-13 16:58 ` Junio C Hamano 2024-09-14 19:35 ` Shubham Kanodia 2024-09-14 20:11 ` Junio C Hamano 2024-09-15 14:06 ` Shubham Kanodia 2024-09-15 16:12 ` Junio C Hamano 2024-09-16 4:34 ` Shubham Kanodia 2024-09-15 14:08 ` [PATCH v2] " Shubham Kanodia via GitGitGadget 2024-09-19 10:23 ` [PATCH v3] " Shubham Kanodia via GitGitGadget 2024-09-23 21:24 ` Junio C Hamano 2024-10-07 14:30 ` Shubham Kanodia 2024-10-04 20:21 ` [PATCH v4] remote: allow specifying refs to prefetch Shubham Kanodia via GitGitGadget 2024-11-04 8:47 ` Shubham Kanodia 2024-11-05 6:45 ` Patrick Steinhardt 2024-11-05 14:47 ` Phillip Wood 2024-11-05 16:26 ` Shubham Kanodia 2024-11-06 0:39 ` Junio C Hamano 2024-11-06 6:52 ` Patrick Steinhardt 2024-11-06 8:20 ` Junio C Hamano 2024-11-06 6:46 ` Patrick Steinhardt 2024-11-06 11:04 ` Phillip Wood 2024-11-05 14:45 ` Phillip Wood
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).