From: Patrick Steinhardt <ps@pks.im>
To: Shubham Kanodia via GitGitGadget <gitgitgadget@gmail.com>
Cc: git@vger.kernel.org, "Junio C Hamano [ ]" <gitster@pobox.com>,
"Derrick Stolee [ ]" <stolee@gmail.com>,
Shubham Kanodia <shubham.kanodia10@gmail.com>
Subject: Re: [PATCH v4] remote: allow specifying refs to prefetch
Date: Tue, 5 Nov 2024 07:45:17 +0100 [thread overview]
Message-ID: <Zym--GVNJt_lsQEz@pks.im> (raw)
In-Reply-To: <pull.1782.v4.git.1728073292874.gitgitgadget@gmail.com>
On Fri, Oct 04, 2024 at 08:21:32PM +0000, Shubham Kanodia via GitGitGadget wrote:
I'm coming rather late to the party and simply want to review this so
that the thread gets revived. So my context may be lacking, please
forgive me if I am reopening things that were already discussed.
> diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt
> index 8efc53e836d..186f439ed7b 100644
> --- a/Documentation/config/remote.txt
> +++ b/Documentation/config/remote.txt
> @@ -33,6 +33,13 @@ remote.<name>.fetch::
> The default set of "refspec" for linkgit:git-fetch[1]. See
> linkgit:git-fetch[1].
>
> +remote.<name>.prefetchref::
> + Specify the refs to be prefetched when fetching from this
> + remote. The value is a space-separated list of ref patterns
> + (e.g., "refs/heads/main !refs/heads/develop*"). This can be
> + used to optimize fetch operations by specifying exactly which
> + refs should be prefetched.
I'm a bit surprised that we use a space-separated list here instead of
accepting a multi-valued config like we do for "remote.<name>.fetch".
Shouldn't we use the format here to make things more consistent?
> remote.<name>.push::
> The default set of "refspec" for linkgit:git-push[1]. See
> linkgit:git-push[1].
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index b2b5aee5bf2..74603cfabe0 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -485,6 +485,32 @@ static void filter_prefetch_refspec(struct refspec *rs)
> }
> }
>
> +static int pattern_matches_ref(const char *pattern, const char *refname)
> +{
> + if (strchr(pattern, '*'))
> + return match_refspec_name_with_pattern(pattern, refname, NULL, NULL) != 0;
> + return strcmp(pattern, refname) == 0;
> +}
> +
> +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs)
> +{
> + int has_positive = 0, matched_positive = 0, matched_negative = 0;
> +
> + for (int i = 0; i < prefetch_refs->nr; i++) {
> + const char *pattern = prefetch_refs->items[i].string;
> + int is_negative = (*pattern == '!');
> + if (is_negative) pattern++;
> + else has_positive = 1;
> +
> + if (pattern_matches_ref(pattern, refname)) {
> + if (is_negative) matched_negative = 1;
> + else matched_positive = 1;
> + }
> + }
> +
> + return has_positive ? (matched_positive && !matched_negative) : !matched_negative;
> +}
This is essentially open-coding a bunch of logic around how we parse
refspecs. I'd propose to instead use the APIs we already have in this
area, namely those in "refspec.h".
> static struct ref *get_ref_map(struct remote *remote,
> const struct ref *remote_refs,
> struct refspec *rs,
> @@ -501,7 +527,11 @@ static struct ref *get_ref_map(struct remote *remote,
> struct hashmap existing_refs;
> int existing_refs_populated = 0;
>
> + struct ref *prefetch_filtered_ref_map = NULL, **ref_map_tail = &prefetch_filtered_ref_map;
> + struct ref *next;
> +
We don't typically have empty lines between variable declarations.
> filter_prefetch_refspec(rs);
> +
> if (remote)
> filter_prefetch_refspec(&remote->fetch);
>
> @@ -610,6 +640,29 @@ static struct ref *get_ref_map(struct remote *remote,
> else
> ref_map = apply_negative_refspecs(ref_map, &remote->fetch);
>
> + /**
> + * Filter out advertised refs that we don't want to fetch during
> + * prefetch if a prefetchref config is set
> + */
Our comments typically start with `/*`, not `/**`.
> + if (prefetch && remote->prefetch_refs.nr) {
> + prefetch_filtered_ref_map = NULL;
> + ref_map_tail = &prefetch_filtered_ref_map;
As far as I can see, both of these variables have already been
initialized beforehand.
> + for (rm = ref_map; rm; rm = next) {
> + next = rm->next;
> + rm->next = NULL;
> +
> + if (matches_prefetch_refs(rm->name, &remote->prefetch_refs)) {
> + *ref_map_tail = rm;
> + ref_map_tail = &rm->next;
> + } else {
> + free_one_ref(rm);
> + }
> + }
> + ref_map = prefetch_filtered_ref_map;
> + }
> +
> ref_map = ref_remove_duplicates(ref_map);
>
> for (rm = ref_map; rm; rm = rm->next) {
> diff --git a/remote.c b/remote.c
> index 8f3dee13186..6752c73370f 100644
> --- a/remote.c
> +++ b/remote.c
> @@ -141,6 +141,7 @@ static struct remote *make_remote(struct remote_state *remote_state,
> ret->prune = -1; /* unspecified */
> ret->prune_tags = -1; /* unspecified */
> ret->name = xstrndup(name, len);
> + string_list_init_dup(&ret->prefetch_refs);
> refspec_init(&ret->push, REFSPEC_PUSH);
> refspec_init(&ret->fetch, REFSPEC_FETCH);
>
> @@ -166,6 +167,7 @@ static void remote_clear(struct remote *remote)
> free((char *)remote->uploadpack);
> FREE_AND_NULL(remote->http_proxy);
> FREE_AND_NULL(remote->http_proxy_authmethod);
> + string_list_clear(&remote->prefetch_refs, 0);
> }
>
> static void add_merge(struct branch *branch, const char *name)
> @@ -456,6 +458,12 @@ static int handle_config(const char *key, const char *value,
> remote->prune = git_config_bool(key, value);
> else if (!strcmp(subkey, "prunetags"))
> remote->prune_tags = git_config_bool(key, value);
> + else if (!strcmp(subkey, "prefetchref")) {
> + if (!value)
> + return config_error_nonbool(key);
> + string_list_split(&remote->prefetch_refs, value, ' ', -1);
> + return 0;
> + }
> else if (!strcmp(subkey, "url")) {
> if (!value)
> return config_error_nonbool(key);
> @@ -868,7 +876,7 @@ struct strvec *push_url_of_remote(struct remote *remote)
> return remote->pushurl.nr ? &remote->pushurl : &remote->url;
> }
>
> -static int match_name_with_pattern(const char *key, const char *name,
> +int match_refspec_name_with_pattern(const char *key, const char *name,
> const char *value, char **result)
> {
> const char *kstar = strchr(key, '*');
Is this rename really necessary? It is not mentioned in the commit
message, so this is surprising to me. If it really was necessary it
should be split out into a separate commit that also explains why you
think that this is a good idea.
> diff --git a/remote.h b/remote.h
> index b901b56746d..9ffef206f23 100644
> --- a/remote.h
> +++ b/remote.h
> @@ -5,6 +5,7 @@
> #include "hashmap.h"
> #include "refspec.h"
> #include "strvec.h"
> +#include "string-list.h"
>
> struct option;
> struct transport_ls_refs_options;
> @@ -77,6 +78,8 @@ struct remote {
>
> struct refspec fetch;
>
> + struct string_list prefetch_refs;
> +
> /*
> * The setting for whether to fetch tags (as a separate rule from the
> * configured refspecs);
> @@ -207,6 +210,9 @@ int count_refspec_match(const char *, struct ref *refs, struct ref **matched_ref
>
> int check_ref_type(const struct ref *ref, int flags);
>
> +int match_refspec_name_with_pattern(const char *key, const char *name,
> + const char *value, char **result);
I think instead of exposing this function we should rather expose
`refspec_match()`, which is at a higher level and knows to handle the
cases for us where the refspec is a pattern and when it's not.
Patrick
next prev parent reply other threads:[~2024-11-05 6:45 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-09 9:47 [PATCH] remote: introduce config to set prefetch refs Shubham Kanodia via GitGitGadget
2024-09-09 9:51 ` Shubham Kanodia
2024-09-09 16:42 ` Junio C Hamano
2024-09-09 18:21 ` Shubham Kanodia
2024-09-09 18:33 ` Junio C Hamano
2024-09-13 6:16 ` Shubham Kanodia
2024-09-13 16:58 ` Junio C Hamano
2024-09-14 19:35 ` Shubham Kanodia
2024-09-14 20:11 ` Junio C Hamano
2024-09-15 14:06 ` Shubham Kanodia
2024-09-15 16:12 ` Junio C Hamano
2024-09-16 4:34 ` Shubham Kanodia
2024-09-15 14:08 ` [PATCH v2] " Shubham Kanodia via GitGitGadget
2024-09-19 10:23 ` [PATCH v3] " Shubham Kanodia via GitGitGadget
2024-09-23 21:24 ` Junio C Hamano
2024-10-07 14:30 ` Shubham Kanodia
2024-10-04 20:21 ` [PATCH v4] remote: allow specifying refs to prefetch Shubham Kanodia via GitGitGadget
2024-11-04 8:47 ` Shubham Kanodia
2024-11-05 6:45 ` Patrick Steinhardt [this message]
2024-11-05 14:47 ` Phillip Wood
2024-11-05 16:26 ` Shubham Kanodia
2024-11-06 0:39 ` Junio C Hamano
2024-11-06 6:52 ` Patrick Steinhardt
2024-11-06 8:20 ` Junio C Hamano
2024-11-06 6:46 ` Patrick Steinhardt
2024-11-06 11:04 ` Phillip Wood
2024-11-05 14:45 ` Phillip Wood
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Zym--GVNJt_lsQEz@pks.im \
--to=ps@pks.im \
--cc=git@vger.kernel.org \
--cc=gitgitgadget@gmail.com \
--cc=gitster@pobox.com \
--cc=shubham.kanodia10@gmail.com \
--cc=stolee@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).