All of lore.kernel.org
 help / color / mirror / Atom feed
From: Patrick Steinhardt <ps@pks.im>
To: Shubham Kanodia via GitGitGadget <gitgitgadget@gmail.com>
Cc: git@vger.kernel.org, "Junio C Hamano [ ]" <gitster@pobox.com>,
	"Derrick Stolee [ ]" <stolee@gmail.com>,
	Shubham Kanodia <shubham.kanodia10@gmail.com>
Subject: Re: [PATCH v4] remote: allow specifying refs to prefetch
Date: Tue, 5 Nov 2024 07:45:17 +0100	[thread overview]
Message-ID: <Zym--GVNJt_lsQEz@pks.im> (raw)
In-Reply-To: <pull.1782.v4.git.1728073292874.gitgitgadget@gmail.com>

On Fri, Oct 04, 2024 at 08:21:32PM +0000, Shubham Kanodia via GitGitGadget wrote:

I'm coming rather late to the party and simply want to review this so
that the thread gets revived. So my context may be lacking, please
forgive me if I am reopening things that were already discussed.

> diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt
> index 8efc53e836d..186f439ed7b 100644
> --- a/Documentation/config/remote.txt
> +++ b/Documentation/config/remote.txt
> @@ -33,6 +33,13 @@ remote.<name>.fetch::
>  	The default set of "refspec" for linkgit:git-fetch[1]. See
>  	linkgit:git-fetch[1].
>  
> +remote.<name>.prefetchref::
> +	Specify the refs to be prefetched when fetching from this
> +	remote. The value is a space-separated list of ref patterns
> +	(e.g., "refs/heads/main !refs/heads/develop*"). This can be
> +	used to optimize fetch operations by specifying exactly which
> +	refs should be prefetched.

I'm a bit surprised that we use a space-separated list here instead of
accepting a multi-valued config like we do for "remote.<name>.fetch".
Shouldn't we use the format here to make things more consistent?

>  remote.<name>.push::
>  	The default set of "refspec" for linkgit:git-push[1]. See
>  	linkgit:git-push[1].
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index b2b5aee5bf2..74603cfabe0 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -485,6 +485,32 @@ static void filter_prefetch_refspec(struct refspec *rs)
>  	}
>  }
>  
> +static int pattern_matches_ref(const char *pattern, const char *refname)
> +{
> +	if (strchr(pattern, '*'))
> +		return match_refspec_name_with_pattern(pattern, refname, NULL, NULL) != 0;
> +	return strcmp(pattern, refname) == 0;
> +}
> +
> +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs)
> +{
> +	int has_positive = 0, matched_positive = 0, matched_negative = 0;
> +
> +	for (int i = 0; i < prefetch_refs->nr; i++) {
> +		const char *pattern = prefetch_refs->items[i].string;
> +		int is_negative = (*pattern == '!');
> +		if (is_negative) pattern++;
> +		else has_positive = 1;
> +
> +		if (pattern_matches_ref(pattern, refname)) {
> +			if (is_negative) matched_negative = 1;
> +			else matched_positive = 1;
> +		}
> +	}
> +
> +	return has_positive ? (matched_positive && !matched_negative) : !matched_negative;
> +}

This is essentially open-coding a bunch of logic around how we parse
refspecs. I'd propose to instead use the APIs we already have in this
area, namely those in "refspec.h".

>  static struct ref *get_ref_map(struct remote *remote,
>  			       const struct ref *remote_refs,
>  			       struct refspec *rs,
> @@ -501,7 +527,11 @@ static struct ref *get_ref_map(struct remote *remote,
>  	struct hashmap existing_refs;
>  	int existing_refs_populated = 0;
>  
> +	struct ref *prefetch_filtered_ref_map = NULL, **ref_map_tail = &prefetch_filtered_ref_map;
> +	struct ref *next;
> +

We don't typically have empty lines between variable declarations.

>  	filter_prefetch_refspec(rs);
> +
>  	if (remote)
>  		filter_prefetch_refspec(&remote->fetch);
>  
> @@ -610,6 +640,29 @@ static struct ref *get_ref_map(struct remote *remote,
>  	else
>  		ref_map = apply_negative_refspecs(ref_map, &remote->fetch);
>  
> +	/**
> +	 * Filter out advertised refs that we don't want to fetch during
> +	 * prefetch if a prefetchref config is set
> +	 */

Our comments typically start with `/*`, not `/**`.

> +	if (prefetch && remote->prefetch_refs.nr) {
> +		prefetch_filtered_ref_map = NULL;
> +		ref_map_tail = &prefetch_filtered_ref_map;

As far as I can see, both of these variables have already been
initialized beforehand.

> +		for (rm = ref_map; rm; rm = next) {
> +			next = rm->next;
> +			rm->next = NULL;
> +
> +			if (matches_prefetch_refs(rm->name, &remote->prefetch_refs)) {
> +				*ref_map_tail = rm;
> +				ref_map_tail = &rm->next;
> +			} else {
> +				free_one_ref(rm);
> +			}
> +		}
> +		ref_map = prefetch_filtered_ref_map;
> +	}
> +
>  	ref_map = ref_remove_duplicates(ref_map);
>  
>  	for (rm = ref_map; rm; rm = rm->next) {
> diff --git a/remote.c b/remote.c
> index 8f3dee13186..6752c73370f 100644
> --- a/remote.c
> +++ b/remote.c
> @@ -141,6 +141,7 @@ static struct remote *make_remote(struct remote_state *remote_state,
>  	ret->prune = -1;  /* unspecified */
>  	ret->prune_tags = -1;  /* unspecified */
>  	ret->name = xstrndup(name, len);
> +	string_list_init_dup(&ret->prefetch_refs);
>  	refspec_init(&ret->push, REFSPEC_PUSH);
>  	refspec_init(&ret->fetch, REFSPEC_FETCH);
>  
> @@ -166,6 +167,7 @@ static void remote_clear(struct remote *remote)
>  	free((char *)remote->uploadpack);
>  	FREE_AND_NULL(remote->http_proxy);
>  	FREE_AND_NULL(remote->http_proxy_authmethod);
> +	string_list_clear(&remote->prefetch_refs, 0);
>  }
>  
>  static void add_merge(struct branch *branch, const char *name)
> @@ -456,6 +458,12 @@ static int handle_config(const char *key, const char *value,
>  		remote->prune = git_config_bool(key, value);
>  	else if (!strcmp(subkey, "prunetags"))
>  		remote->prune_tags = git_config_bool(key, value);
> +	else if (!strcmp(subkey, "prefetchref")) {
> +		if (!value)
> +			return config_error_nonbool(key);
> +		string_list_split(&remote->prefetch_refs, value, ' ', -1);
> +		return 0;
> +	}
>  	else if (!strcmp(subkey, "url")) {
>  		if (!value)
>  			return config_error_nonbool(key);
> @@ -868,7 +876,7 @@ struct strvec *push_url_of_remote(struct remote *remote)
>  	return remote->pushurl.nr ? &remote->pushurl : &remote->url;
>  }
>  
> -static int match_name_with_pattern(const char *key, const char *name,
> +int match_refspec_name_with_pattern(const char *key, const char *name,
>  				   const char *value, char **result)
>  {
>  	const char *kstar = strchr(key, '*');

Is this rename really necessary? It is not mentioned in the commit
message, so this is surprising to me. If it really was necessary it
should be split out into a separate commit that also explains why you
think that this is a good idea.

> diff --git a/remote.h b/remote.h
> index b901b56746d..9ffef206f23 100644
> --- a/remote.h
> +++ b/remote.h
> @@ -5,6 +5,7 @@
>  #include "hashmap.h"
>  #include "refspec.h"
>  #include "strvec.h"
> +#include "string-list.h"
>  
>  struct option;
>  struct transport_ls_refs_options;
> @@ -77,6 +78,8 @@ struct remote {
>  
>  	struct refspec fetch;
>  
> +	struct string_list prefetch_refs;
> +
>  	/*
>  	 * The setting for whether to fetch tags (as a separate rule from the
>  	 * configured refspecs);
> @@ -207,6 +210,9 @@ int count_refspec_match(const char *, struct ref *refs, struct ref **matched_ref
>  
>  int check_ref_type(const struct ref *ref, int flags);
>  
> +int match_refspec_name_with_pattern(const char *key, const char *name,
> +					const char *value, char **result);

I think instead of exposing this function we should rather expose
`refspec_match()`, which is at a higher level and knows to handle the
cases for us where the refspec is a pattern and when it's not.

Patrick

  parent reply	other threads:[~2024-11-05  6:45 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-09  9:47 [PATCH] remote: introduce config to set prefetch refs Shubham Kanodia via GitGitGadget
2024-09-09  9:51 ` Shubham Kanodia
2024-09-09 16:42 ` Junio C Hamano
2024-09-09 18:21   ` Shubham Kanodia
2024-09-09 18:33     ` Junio C Hamano
2024-09-13  6:16       ` Shubham Kanodia
2024-09-13 16:58         ` Junio C Hamano
2024-09-14 19:35           ` Shubham Kanodia
2024-09-14 20:11             ` Junio C Hamano
2024-09-15 14:06               ` Shubham Kanodia
2024-09-15 16:12               ` Junio C Hamano
2024-09-16  4:34                 ` Shubham Kanodia
2024-09-15 14:08 ` [PATCH v2] " Shubham Kanodia via GitGitGadget
2024-09-19 10:23   ` [PATCH v3] " Shubham Kanodia via GitGitGadget
2024-09-23 21:24     ` Junio C Hamano
2024-10-07 14:30       ` Shubham Kanodia
2024-10-04 20:21     ` [PATCH v4] remote: allow specifying refs to prefetch Shubham Kanodia via GitGitGadget
2024-11-04  8:47       ` Shubham Kanodia
2024-11-05  6:45       ` Patrick Steinhardt [this message]
2024-11-05 14:47         ` Phillip Wood
2024-11-05 16:26           ` Shubham Kanodia
2024-11-06  0:39             ` Junio C Hamano
2024-11-06  6:52               ` Patrick Steinhardt
2024-11-06  8:20                 ` Junio C Hamano
2024-11-06  6:46             ` Patrick Steinhardt
2024-11-06 11:04               ` Phillip Wood
2024-11-05 14:45       ` Phillip Wood

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Zym--GVNJt_lsQEz@pks.im \
    --to=ps@pks.im \
    --cc=git@vger.kernel.org \
    --cc=gitgitgadget@gmail.com \
    --cc=gitster@pobox.com \
    --cc=shubham.kanodia10@gmail.com \
    --cc=stolee@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.