git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Michael Haggerty <mhagger@alum.mit.edu>
To: Jeff King <peff@peff.net>
Cc: Philip Oakley <philipoakley@iee.org>,
	Junio C Hamano <gitster@pobox.com>,
	git@vger.kernel.org
Subject: Re: [PATCH 00/17] Clean up how fetch_pack() handles the heads list
Date: Sun, 02 Sep 2012 09:02:17 +0200	[thread overview]
Message-ID: <50430479.3060006@alum.mit.edu> (raw)
In-Reply-To: <20120823203152.GA1810@sigill.intra.peff.net>

On 08/23/2012 10:31 PM, Jeff King wrote:
> On Thu, Aug 23, 2012 at 03:56:48PM -0400, Jeff King wrote:
> 
>> This code blames back to:
>>
>>  commit 4bcb310c2539b66d535e87508d1b7a90fe29c083
>>  Author: Alexandre Julliard <julliard@winehq.org>
>>  Date:   Fri Nov 24 16:00:13 2006 +0100
>>
>>     fetch-pack: Do not fetch tags for shallow clones.
>>
>>     A better fix may be to only fetch tags that point to commits that we
>>     are downloading, but git-clone doesn't have support for following
>>     tags. This will happen automatically on the next git-fetch though.
>>
>> So it is about making sure that "git clone --depth=1" does not
>> accidentally pull a single commit from v1.0, v1.1, v1.2, and so forth,
>> losing the purpose of using --depth in the first place. These days it is
>> largely irrelevant, since this code path is not followed by clone, and
>> clone will automatically restrict its list of fetched refs to a single
>> branch if --depth is used.
> 
> I think part of the confusion of this code is that inside the loop over
> the refs it is sometimes checking aspects of the ref, and sometimes
> checking invariants of the loop (like args.fetch_all). Splitting it into
> separate loops makes it easier to see what is going on, like the patch
> below (on top of Michael's series).
> 
> I'm not sure if it ends up being more readable, since the generic "cut
> down this linked list" function has to operate through callbacks with a
> void pointer. On the other hand, that function could also be used
> elsewhere.
> 
> diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c
> index 90683ca..877cf38 100644
> --- a/builtin/fetch-pack.c
> +++ b/builtin/fetch-pack.c
> @@ -521,51 +521,80 @@ static void mark_recent_complete_commits(unsigned long cutoff)
>  	}
>  }
>  
> -static void filter_refs(struct ref **refs, int *nr_heads, char **heads)
> +static void filter_refs_callback(struct ref **refs,
> +				 int (*want)(struct ref *, void *),
> +				 void *data)
>  {
> -	struct ref *newlist = NULL;
> -	struct ref **newtail = &newlist;
> +	struct ref **tail = refs;
>  	struct ref *ref, *next;
> -	int match_pos = 0, unmatched = 0;
>  
>  	for (ref = *refs; ref; ref = next) {
> -		int keep_ref = 0;
>  		next = ref->next;
> -		if (!memcmp(ref->name, "refs/", 5) &&
> -		    check_refname_format(ref->name, 0))
> -			; /* trash */
> -		else if (args.fetch_all &&
> -			   (!args.depth || prefixcmp(ref->name, "refs/tags/")))
> -			keep_ref = 1;
> -		else
> -			while (match_pos < *nr_heads) {
> -				int cmp = strcmp(ref->name, heads[match_pos]);
> -				if (cmp < 0) { /* definitely do not have it */
> -					break;
> -				} else if (cmp == 0) { /* definitely have it */
> -					free(heads[match_pos++]);
> -					keep_ref = 1;
> -					break;
> -				} else { /* might have it; keep looking */
> -					heads[unmatched++] = heads[match_pos++];
> -				}
> -			}
> -
> -		if (keep_ref) {
> -			*newtail = ref;
> -			ref->next = NULL;
> -			newtail = &ref->next;
> -		} else {
> +		if (want(ref, data))
> +			tail = &ref->next;
> +		else {
>  			free(ref);
> +			*tail = next;
>  		}
>  	}
> +}
>  
> -	/* copy any remaining unmatched heads: */
> -	while (match_pos < *nr_heads)
> -		heads[unmatched++] = heads[match_pos++];
> -	*nr_heads = unmatched;
> +static int ref_name_is_ok(struct ref *ref, void *data)
> +{
> +	return memcmp(ref->name, "refs/", 5) ||
> +		!check_refname_format(ref->name, 0);
> +}
> +
> +static int ref_ok_for_shallow(struct ref *ref, void *data)
> +{
> +	return prefixcmp(ref->name, "refs/tags/");
> +}
>  
> -	*refs = newlist;
> +struct filter_by_name_data {
> +	char **heads;
> +	int nr_heads;
> +	int match_pos;
> +	int unmatched;
> +};
> +
> +static int want_ref_name(struct ref *ref, void *data)
> +{
> +	struct filter_by_name_data *f = data;
> +
> +	while (f->match_pos < f->nr_heads) {
> +		int cmp = strcmp(ref->name, f->heads[f->match_pos]);
> +		if (cmp < 0) /* definitely do not have it */
> +			return 0;
> +		else if (cmp == 0) { /* definitely have it */
> +			free(f->heads[f->match_pos++]);
> +			return 1;
> +		} else /* might have it; keep looking */
> +			f->heads[f->unmatched++] = f->heads[f->match_pos++];
> +	}
> +	return 0;
> +}
> +
> +static void filter_refs(struct ref **refs, int *nr_heads, char **heads)
> +{
> +	struct filter_by_name_data f;
> +
> +	filter_refs_callback(refs, ref_name_is_ok, NULL);
> +
> +	if (args.fetch_all) {
> +		if (args.depth)
> +			filter_refs_callback(refs, ref_ok_for_shallow, NULL);
> +		return;
> +	}
> +
> +	memset(&f, 0, sizeof(f));
> +	f.heads = heads;
> +	f.nr_heads = *nr_heads;
> +	filter_refs_callback(refs, want_ref_name, &f);
> +
> +	/* copy any remaining unmatched heads: */
> +	while (f.match_pos < f.nr_heads)
> +		heads[f.unmatched++] = heads[f.match_pos++];
> +	*nr_heads = f.unmatched;
>  }
>  
>  static void mark_alternate_complete(const struct ref *ref, void *unused)
> 

I need a sanity check here.  I don't see that it is forbidden to call
"git fetch-pack --all --depth=N" and also specify some explicit
references.  (This usage would make it possible for the user to do a
shallow clone while also retrieving some specific tags.)

However, if I try to do this I get (before your change)

    $ git fetch-pack -v --all --depth=1 $URL refs/heads/master
    Server supports multi_ack_detailed
    Server supports side-band-64k
    Server supports ofs-delta
    want 9c9f2f4453a7260d0d60926c73811f025be98ded (HEAD)
    want 9c9f2f4453a7260d0d60926c73811f025be98ded (refs/heads/master)
    done
    remote: Counting objects: 6, done.
    remote: Compressing objects: 100% (4/4), done.
    remote: Total 6 (delta 0), reused 0 (delta 0)
    Unpacking objects: 100% (6/6), done.
    error: no such remote ref refs/heads/master
    9c9f2f4453a7260d0d60926c73811f025be98ded HEAD
    9c9f2f4453a7260d0d60926c73811f025be98ded refs/heads/master

Note the error message "no such remote ref refs/heads/master", which is
incorrect.  This happens because when the (fetch_all && (!depth ||
ref_non_tag(ref))) branch of the if statement succeeds, then the
look-in-heads-list branch is never executed, and therefore the requested
reference is never marked as having been found among the remote references.

On the other hand, if I try the same with an (existing) tag, before your
change I get

    $ git fetch-pack -v --all --depth=1 $URL refs/tags/c
    Server supports multi_ack_detailed
    Server supports side-band-64k
    Server supports ofs-delta
    fatal: The remote end hung up unexpectedly

Now this is obviously broken in git-fetch-pack, because when fetch_all
is specified, return_refs is never initialized, but the
look-in-heads-list branch of the if is executed nevertheless.

It is not hard to fix these problems in the old code: (1)
unconditionally initialize return_refs, and (2) exchange the
look-in-heads-list handling and the fetch_all handling.


After your change the first problem (spurious error when asking for a
head) remains unchanged.  The second problem is changed from a crash to
a misbehavior:

    $ git fetch-pack -v --all --depth=1 $URL refs/tags/c
    Initialized empty Git repository in /home/mhagger/tmp/cl/c/.git/
    Server supports multi_ack_detailed
    Server supports side-band-64k
    Server supports ofs-delta
    want 9c9f2f4453a7260d0d60926c73811f025be98ded (HEAD)
    want 9c9f2f4453a7260d0d60926c73811f025be98ded (refs/heads/master)
    done
    remote: Counting objects: 6, done.
    remote: Compressing objects: 100% (4/4), done.
    remote: Total 6 (delta 0), reused 0 (delta 0)
    Unpacking objects: 100% (6/6), done.
    error: no such remote ref refs/tags/c
    9c9f2f4453a7260d0d60926c73811f025be98ded HEAD
    9c9f2f4453a7260d0d60926c73811f025be98ded refs/heads/master

Note that "refs/tags/c" is not obtained from the remote even though it
was specifically requested.  The reason that your patch makes the crash
disappear is that when fetch_all is true, then the look-in-heads-list
branch is not executed at all -- but this is also why "refs/tags/c" is
not obtained.

So, assuming that we want to support

    git fetch-pack --all --depth=N $URL refs/tags/TAG

correctly, then your filter_refs_callback() refactoring won't work, at
least not the way that you have written it.  I will continue working on
this.

Michael

-- 
Michael Haggerty
mhagger@alum.mit.edu
http://softwareswirl.blogspot.com/

  parent reply	other threads:[~2012-09-02  7:03 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-08-23  8:10 [PATCH 00/17] Clean up how fetch_pack() handles the heads list mhagger
2012-08-23  8:10 ` [PATCH 01/17] t5500: add tests of error output for missing refs mhagger
2012-08-23  8:10 ` [PATCH 02/17] Rename static function fetch_pack() to http_fetch_pack() mhagger
2012-08-23  8:10 ` [PATCH 03/17] Fix formatting mhagger
2012-08-23  8:10 ` [PATCH 04/17] Name local variables more consistently mhagger
2012-08-23  8:39   ` Jeff King
2012-08-24  7:05     ` Michael Haggerty
2012-08-26 17:39     ` Junio C Hamano
2012-08-27  9:22       ` Michael Haggerty
2012-08-27  9:25         ` Jeff King
2012-08-27 16:55           ` Junio C Hamano
2012-08-27 16:50         ` Junio C Hamano
2012-08-23  8:10 ` [PATCH 05/17] Do not check the same match_pos twice mhagger
2012-08-23  8:42   ` Jeff King
2012-08-23  8:10 ` [PATCH 06/17] Let fetch_pack() inform caller about number of unique heads mhagger
2012-08-23  8:54   ` Jeff King
2012-08-25  5:05     ` Michael Haggerty
2012-08-23  8:10 ` [PATCH 07/17] Pass nr_heads to do_pack_ref() by reference mhagger
2012-08-23  8:10 ` [PATCH 08/17] Pass nr_heads to everything_local() " mhagger
2012-08-23  8:10 ` [PATCH 09/17] Pass nr_heads to filter_refs() " mhagger
2012-08-23  8:10 ` [PATCH 10/17] Remove ineffective optimization mhagger
2012-08-23  8:10 ` [PATCH 11/17] filter_refs(): do not leave gaps in return_refs mhagger
2012-08-23  8:10 ` [PATCH 12/17] filter_refs(): compress unmatched refs in heads array mhagger
2012-08-23  8:10 ` [PATCH 13/17] cmd_fetch_pack: return early if finish_connect() returns an error mhagger
2012-08-23  8:10 ` [PATCH 14/17] Report missing refs even if no existing refs were received mhagger
2012-08-23  8:10 ` [PATCH 15/17] cmd_fetch_pack(): simplify computation of return value mhagger
2012-08-23  8:10 ` [PATCH 16/17] fetch_pack(): free matching heads mhagger
2012-08-23  9:04   ` Jeff King
2012-08-23  8:10 ` [PATCH 17/17] fetch_refs(): simplify logic mhagger
2012-08-23  9:07   ` Jeff King
2012-08-25  6:37     ` Michael Haggerty
2012-08-23  9:26 ` [PATCH 00/17] Clean up how fetch_pack() handles the heads list Jeff King
2012-08-23 19:13   ` Philip Oakley
2012-08-23 19:56     ` Jeff King
2012-08-23 20:31       ` Jeff King
2012-08-25  7:05         ` Michael Haggerty
2012-09-02  7:02         ` Michael Haggerty [this message]
2012-08-23 22:09       ` Philip Oakley
2012-08-24  4:23       ` Junio C Hamano
2012-08-24 12:46         ` Philip Oakley

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=50430479.3060006@alum.mit.edu \
    --to=mhagger@alum.mit.edu \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    --cc=philipoakley@iee.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).