Git development
 help / color / mirror / Atom feed
* [PATCH 6/7] pickaxe: give diff_grep the same signature as has_changes
From: René Scharfe @ 2011-10-06 16:50 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Junio C Hamano
In-Reply-To: <4E8DD065.3040607@lsrfire.ath.cx>

Change diff_grep() to match the signature of has_changes() as a
preparation for the next patch that will use function pointers to
the two.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
 diffcore-pickaxe.c |    7 ++++---
 1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 4d66ba9..226fa0c 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -45,7 +45,8 @@ static void fill_one(struct diff_filespec *one,
 	}
 }
 
-static int diff_grep(struct diff_filepair *p, regex_t *regexp, struct diff_options *o)
+static int diff_grep(struct diff_filepair *p, struct diff_options *o,
+		     regex_t *regexp, kwset_t kws)
 {
 	regmatch_t regmatch;
 	struct userdiff_driver *textconv_one = NULL;
@@ -114,7 +115,7 @@ static void diffcore_pickaxe_grep(struct diff_options *o)
 		/* Showing the whole changeset if needle exists */
 		for (i = 0; i < q->nr; i++) {
 			struct diff_filepair *p = q->queue[i];
-			if (diff_grep(p, &regex, o))
+			if (diff_grep(p, o, &regex, NULL))
 				goto out; /* do not munge the queue */
 		}
 
@@ -129,7 +130,7 @@ static void diffcore_pickaxe_grep(struct diff_options *o)
 		/* Showing only the filepairs that has the needle */
 		for (i = 0; i < q->nr; i++) {
 			struct diff_filepair *p = q->queue[i];
-			if (diff_grep(p, &regex, o))
+			if (diff_grep(p, o, &regex, NULL))
 				diff_q(&outq, p);
 			else
 				diff_free_filepair(p);
-- 
1.7.7

^ permalink raw reply related

* [PATCH 5/7] pickaxe: pass diff_options to contains and has_changes
From: René Scharfe @ 2011-10-06 16:50 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Junio C Hamano
In-Reply-To: <4E8DD065.3040607@lsrfire.ath.cx>

Remove the unused parameter needle from contains() and has_changes().

Also replace the parameter len with a pointer to the diff_options.  We
can use its member pickaxe to check if the needle is an empty string
and use the kwsmatch structure to find out the length of the match
instead.

This change is done as a preparation to unify the signatures of
has_changes() and diff_grep(), which will be used in the patch after
the next one to factor out common code.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
 diffcore-pickaxe.c |   28 ++++++++++++++--------------
 1 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 4347ec1..4d66ba9 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -144,14 +144,13 @@ static void diffcore_pickaxe_grep(struct diff_options *o)
 	return;
 }
 
-static unsigned int contains(struct diff_filespec *one,
-			     const char *needle, unsigned long len,
+static unsigned int contains(struct diff_filespec *one, struct diff_options *o,
 			     regex_t *regexp, kwset_t kws)
 {
 	unsigned int cnt;
 	unsigned long sz;
 	const char *data;
-	if (!len)
+	if (!o->pickaxe[0])
 		return 0;
 	if (diff_populate_filespec(one, 0))
 		return 0;
@@ -175,14 +174,15 @@ static unsigned int contains(struct diff_filespec *one,
 
 	} else { /* Classic exact string match */
 		while (sz) {
-			size_t offset = kwsexec(kws, data, sz, NULL);
+			struct kwsmatch kwsm;
+			size_t offset = kwsexec(kws, data, sz, &kwsm);
 			const char *found;
 			if (offset == -1)
 				break;
 			else
 				found = data + offset;
-			sz -= found - data + len;
-			data = found + len;
+			sz -= found - data + kwsm.size[0];
+			data = found + kwsm.size[0];
 			cnt++;
 		}
 	}
@@ -190,20 +190,20 @@ static unsigned int contains(struct diff_filespec *one,
 	return cnt;
 }
 
-static int has_changes(struct diff_filepair *p, const char *needle,
-		       unsigned long len, regex_t *regexp, kwset_t kws)
+static int has_changes(struct diff_filepair *p, struct diff_options *o,
+		       regex_t *regexp, kwset_t kws)
 {
 	if (!DIFF_FILE_VALID(p->one)) {
 		if (!DIFF_FILE_VALID(p->two))
 			return 0; /* ignore unmerged */
 		/* created */
-		return contains(p->two, needle, len, regexp, kws) != 0;
+		return contains(p->two, o, regexp, kws) != 0;
 	}
 	if (!DIFF_FILE_VALID(p->two))
-		return contains(p->one, needle, len, regexp, kws) != 0;
+		return contains(p->one, o, regexp, kws) != 0;
 	if (!diff_unmodified_pair(p)) {
-		return contains(p->one, needle, len, regexp, kws) !=
-		       contains(p->two, needle, len, regexp, kws);
+		return contains(p->one, o, regexp, kws) !=
+		       contains(p->two, o, regexp, kws);
 	}
 	return 0;
 }
@@ -241,7 +241,7 @@ static void diffcore_pickaxe_count(struct diff_options *o)
 		/* Showing the whole changeset if needle exists */
 		for (i = 0; i < q->nr; i++) {
 			struct diff_filepair *p = q->queue[i];
-			if (has_changes(p, needle, len, regexp, kws))
+			if (has_changes(p, o, regexp, kws))
 				goto out; /* do not munge the queue */
 		}
 
@@ -257,7 +257,7 @@ static void diffcore_pickaxe_count(struct diff_options *o)
 		/* Showing only the filepairs that has the needle */
 		for (i = 0; i < q->nr; i++) {
 			struct diff_filepair *p = q->queue[i];
-			if (has_changes(p, needle, len, regexp, kws))
+			if (has_changes(p, o, regexp, kws))
 				diff_q(&outq, p);
 			else
 				diff_free_filepair(p);
-- 
1.7.7

^ permalink raw reply related

* Re: Git Bug report
From: Phil Hord @ 2011-10-06 16:48 UTC (permalink / raw)
  To: SZEDER Gábor
  Cc: Junio C Hamano, Fredrik Gustafsson, Federico Lucifredi, git
In-Reply-To: <20111006010940.GR2208@goldbirke>

On 10/5/11, SZEDER Gábor <szeder@ira.uka.de> wrote:
> On Wed, Oct 05, 2011 at 05:44:54PM -0700, Junio C Hamano wrote:
>> SZEDER Gábor <szeder@ira.uka.de> writes:
>>
>> > And what about unreadable .git files?
>>
>> Having then inside a working tree is so sick that I do not think it
>> deserves consideration.
>
> I'm not sure why is this any different than having a .git directory
> that is not a repository inside a working tree.

What should happen here? Ignore it and keep searching? Or fail?

I just added some common gitfile detection code and I noticed that the
oddball case now is the one that dies on error rather than continuing
to search for alternate explanations. I left the oddball behavior
assuming it is desireable, but now you have me rethinking it.

Phil

^ permalink raw reply

* [PATCH 4/7] pickaxe: factor out has_changes
From: René Scharfe @ 2011-10-06 16:26 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Junio C Hamano
In-Reply-To: <4E8DD065.3040607@lsrfire.ath.cx>

Move duplicate if/else construct into its own helper function.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
 diffcore-pickaxe.c |   57 +++++++++++++++++++--------------------------------
 1 files changed, 21 insertions(+), 36 deletions(-)

diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index c367d8d..4347ec1 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -190,13 +190,31 @@ static unsigned int contains(struct diff_filespec *one,
 	return cnt;
 }
 
+static int has_changes(struct diff_filepair *p, const char *needle,
+		       unsigned long len, regex_t *regexp, kwset_t kws)
+{
+	if (!DIFF_FILE_VALID(p->one)) {
+		if (!DIFF_FILE_VALID(p->two))
+			return 0; /* ignore unmerged */
+		/* created */
+		return contains(p->two, needle, len, regexp, kws) != 0;
+	}
+	if (!DIFF_FILE_VALID(p->two))
+		return contains(p->one, needle, len, regexp, kws) != 0;
+	if (!diff_unmodified_pair(p)) {
+		return contains(p->one, needle, len, regexp, kws) !=
+		       contains(p->two, needle, len, regexp, kws);
+	}
+	return 0;
+}
+
 static void diffcore_pickaxe_count(struct diff_options *o)
 {
 	const char *needle = o->pickaxe;
 	int opts = o->pickaxe_opts;
 	struct diff_queue_struct *q = &diff_queued_diff;
 	unsigned long len = strlen(needle);
-	int i, has_changes;
+	int i;
 	regex_t regex, *regexp = NULL;
 	kwset_t kws = NULL;
 	struct diff_queue_struct outq;
@@ -223,22 +241,7 @@ static void diffcore_pickaxe_count(struct diff_options *o)
 		/* Showing the whole changeset if needle exists */
 		for (i = 0; i < q->nr; i++) {
 			struct diff_filepair *p = q->queue[i];
-			if (!DIFF_FILE_VALID(p->one)) {
-				if (!DIFF_FILE_VALID(p->two))
-					continue; /* ignore unmerged */
-				/* created */
-				if (contains(p->two, needle, len, regexp, kws))
-					has_changes++;
-			}
-			else if (!DIFF_FILE_VALID(p->two)) {
-				if (contains(p->one, needle, len, regexp, kws))
-					has_changes++;
-			}
-			else if (!diff_unmodified_pair(p) &&
-				 contains(p->one, needle, len, regexp, kws) !=
-				 contains(p->two, needle, len, regexp, kws))
-				has_changes++;
-			if (has_changes)
+			if (has_changes(p, needle, len, regexp, kws))
 				goto out; /* do not munge the queue */
 		}
 
@@ -254,25 +257,7 @@ static void diffcore_pickaxe_count(struct diff_options *o)
 		/* Showing only the filepairs that has the needle */
 		for (i = 0; i < q->nr; i++) {
 			struct diff_filepair *p = q->queue[i];
-			has_changes = 0;
-			if (!DIFF_FILE_VALID(p->one)) {
-				if (!DIFF_FILE_VALID(p->two))
-					; /* ignore unmerged */
-				/* created */
-				else if (contains(p->two, needle, len, regexp,
-						  kws))
-					has_changes = 1;
-			}
-			else if (!DIFF_FILE_VALID(p->two)) {
-				if (contains(p->one, needle, len, regexp, kws))
-					has_changes = 1;
-			}
-			else if (!diff_unmodified_pair(p) &&
-				 contains(p->one, needle, len, regexp, kws) !=
-				 contains(p->two, needle, len, regexp, kws))
-				has_changes = 1;
-
-			if (has_changes)
+			if (has_changes(p, needle, len, regexp, kws))
 				diff_q(&outq, p);
 			else
 				diff_free_filepair(p);
-- 
1.7.7

^ permalink raw reply related

* Re: Git Bug report
From: Matthieu Moy @ 2011-10-06 16:26 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Phil Hord, SZEDER Gábor, git, Fredrik Gustafsson,
	Federico Lucifredi
In-Reply-To: <7vy5wy145q.fsf@alter.siamese.dyndns.org>

Junio C Hamano <gitster@pobox.com> writes:

>  If we cannot tell if it is or is not
> a GIT_DIR, we should error out---the reason we cannot tell most likely is
> because we cannot read it, and such a file, if it is not a GIT_DIR, cannot
> be tracked in the real GIT_DIR at a higher level, and if it is a GIT_DIR,
> we cannot use it to record updates or inspect existing history.

Plus, the user may have removed the permission on the .git directory by
mistake, and it would be very surprising behavior if git ran without
complaining using a higher level GIT_DIR (i.e. a more or less arbitrary
repo as far as the user is concerned ...)

> How's that sound as a guideline?

Sounds reasonable, yes.

-- 
Matthieu Moy
http://www-verimag.imag.fr/~moy/

^ permalink raw reply

* [PATCH 3/7] pickaxe: plug regex/kws leak
From: René Scharfe @ 2011-10-06 16:23 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Junio C Hamano
In-Reply-To: <4E8DD065.3040607@lsrfire.ath.cx>

With -S... --pickaxe-all, free the regex or the kws before returning
even if we found a match.  Also get rid of the variable has_changes,
as we can simply break out of the loop.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
 diffcore-pickaxe.c |   13 +++++++------
 1 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 96f7ea6..c367d8d 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -221,7 +221,7 @@ static void diffcore_pickaxe_count(struct diff_options *o)
 
 	if (opts & DIFF_PICKAXE_ALL) {
 		/* Showing the whole changeset if needle exists */
-		for (i = has_changes = 0; !has_changes && i < q->nr; i++) {
+		for (i = 0; i < q->nr; i++) {
 			struct diff_filepair *p = q->queue[i];
 			if (!DIFF_FILE_VALID(p->one)) {
 				if (!DIFF_FILE_VALID(p->two))
@@ -238,9 +238,9 @@ static void diffcore_pickaxe_count(struct diff_options *o)
 				 contains(p->one, needle, len, regexp, kws) !=
 				 contains(p->two, needle, len, regexp, kws))
 				has_changes++;
+			if (has_changes)
+				goto out; /* do not munge the queue */
 		}
-		if (has_changes)
-			return; /* not munge the queue */
 
 		/* otherwise we will clear the whole queue
 		 * by copying the empty outq at the end of this
@@ -278,13 +278,14 @@ static void diffcore_pickaxe_count(struct diff_options *o)
 				diff_free_filepair(p);
 		}
 
+	free(q->queue);
+	*q = outq;
+
+ out:
 	if (opts & DIFF_PICKAXE_REGEX)
 		regfree(&regex);
 	else
 		kwsfree(kws);
-
-	free(q->queue);
-	*q = outq;
 	return;
 }
 
-- 
1.7.7

^ permalink raw reply related

* Re: Git Bug report
From: Junio C Hamano @ 2011-10-06 16:22 UTC (permalink / raw)
  To: Phil Hord; +Cc: SZEDER Gábor, git, Fredrik Gustafsson, Federico Lucifredi
In-Reply-To: <CABURp0qCsKG2oOxLw4BfU8UM=9V+pigd69ZK=TZVwetBPqjuiA@mail.gmail.com>

Phil Hord <phil.hord@gmail.com> writes:

> On Oct 5, 2011 9:14 PM, "SZEDER Gábor" <szeder@ira.uka.de> wrote:
>>
>> On Wed, Oct 05, 2011 at 05:44:54PM -0700, Junio C Hamano wrote:
>> > SZEDER Gábor <szeder@ira.uka.de> writes:
>> >
>> > > And what about unreadable .git files?
>> >
>> > Having then inside a working tree is so sick that I do not think it
>> > deserves consideration.
>>
>> I'm not sure why is this any different than having a .git directory
>> that is not a repository inside a working tree.
>
> What should happen here? Ignore it and keep searching? Or fail?
>
> I just added some common gitfile detection code and I noticed that the
> oddball case now is the one that dies on error rather than continuing to
> search for alternate explanations.  I left the oddball behavior assuming it
> is desireable, but now you have me rethinking it.

Yeah, after thinking about it a bit more, whenever we see ".git" during
the upward discovery process, we should always warn if we know it is _not_
a GIT_DIR before looking for another ".git" at higher levels, as anything
in that directory cannot be added. If we cannot tell if it is or is not
a GIT_DIR, we should error out---the reason we cannot tell most likely is
because we cannot read it, and such a file, if it is not a GIT_DIR, cannot
be tracked in the real GIT_DIR at a higher level, and if it is a GIT_DIR,
we cannot use it to record updates or inspect existing history.

How's that sound as a guideline?

^ permalink raw reply

* [PATCH 2/7] pickaxe: plug regex leak
From: René Scharfe @ 2011-10-06 16:14 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Junio C Hamano
In-Reply-To: <4E8DD065.3040607@lsrfire.ath.cx>

With -G... --pickaxe-all, free the regex before returning even if we
found a match.  Also get rid of the variable has_changes, as we can
simply break out of the loop.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
 diffcore-pickaxe.c |   13 ++++++-------
 1 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 0835a3b..96f7ea6 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -96,7 +96,7 @@ static int diff_grep(struct diff_filepair *p, regex_t *regexp, struct diff_optio
 static void diffcore_pickaxe_grep(struct diff_options *o)
 {
 	struct diff_queue_struct *q = &diff_queued_diff;
-	int i, has_changes, err;
+	int i, err;
 	regex_t regex;
 	struct diff_queue_struct outq;
 	outq.queue = NULL;
@@ -112,13 +112,11 @@ static void diffcore_pickaxe_grep(struct diff_options *o)
 
 	if (o->pickaxe_opts & DIFF_PICKAXE_ALL) {
 		/* Showing the whole changeset if needle exists */
-		for (i = has_changes = 0; !has_changes && i < q->nr; i++) {
+		for (i = 0; i < q->nr; i++) {
 			struct diff_filepair *p = q->queue[i];
 			if (diff_grep(p, &regex, o))
-				has_changes++;
+				goto out; /* do not munge the queue */
 		}
-		if (has_changes)
-			return; /* do not munge the queue */
 
 		/*
 		 * Otherwise we will clear the whole queue by copying
@@ -138,10 +136,11 @@ static void diffcore_pickaxe_grep(struct diff_options *o)
 		}
 	}
 
-	regfree(&regex);
-
 	free(q->queue);
 	*q = outq;
+
+ out:
+	regfree(&regex);
 	return;
 }
 
-- 
1.7.7

^ permalink raw reply related

* Re: [PATCH 2/4] cleanup: use internal memory allocation wrapper functions everywhere
From: Brandon Casey @ 2011-10-06 16:14 UTC (permalink / raw)
  To: Johannes Sixt
  Cc: peff@peff.net, git@vger.kernel.org, gitster@pobox.com,
	sunshine@sunshineco.com, bharrosh@panasas.com,
	trast@student.ethz.ch
In-Reply-To: <4E8D4812.9090102@viscovery.net>

[removed Alexey Shumkin from cc]

On Thu, Oct 6, 2011 at 1:17 AM, Johannes Sixt <j.sixt@viscovery.net> wrote:
> Am 10/6/2011 4:00, schrieb Brandon Casey:
>> [resend without html bits added by "gmail offline"]
>> On Wed, Oct 5, 2011 at 7:53 PM, Brandon Casey <drafnel@gmail.com> wrote:
>>> On Thursday, September 15, 2011, Brandon Casey wrote:
>>>>
>>>> On Thu, Sep 15, 2011 at 1:52 AM, Johannes Sixt <j.sixt@viscovery.net>
>>>>> There is a danger that the high-level die() routine (which is used by
>>>>> the
>>>>> x-wrappers) uses one of the low-level compat/ routines. IOW, in the case
>>>>> of errors, recursion might occur. Therefore, I would prefer that the
>>>>> compat/ routines do their own error reporting (preferably via return
>>>>> values and errno).
>>>>
>>>> Thanks.  Will do.
>>>
>>> Hi Johannes,
>>> I have taken a closer look at the possibility of recursion with respect to
>>> die() and the functions in compat/.  It appears the risk is only related to
>>> vsnprintf/snprintf at the moment.  So as long as we avoid calling xmalloc et
>>> al from within snprintf.c, I think we should be safe from recursion.
>>> I'm inclined to keep the additions to mingw.c and win32/syslog.c since they
>>> both already use the x-wrappers or strbuf, even though they could easily be
>>> worked around.  The other file that was touched is compat/qsort, which
>>> returns void and doesn't have a good alternative error handling path.  So,
>>> I'm inclined to keep that one too.
>
> I'm fine with keeping the change to mingw.c (getaddrinfo related) and
> qsort: both are unlikely to be called from die().
>
> But syslog() *is* called from die() in git-daemon, and it would be better
> to back out the other offenders instead of adding to them.

Ah.  Yes, you're right.  x-wrappers should not be used in syslog.c and
the use of strbuf's should be replaced.

Thanks,
-Brandon

^ permalink raw reply

* Re: git commit -a reports untracked files after a clone
From: Jeff King @ 2011-10-06 16:06 UTC (permalink / raw)
  To: Joerg Rosenkranz; +Cc: Junio C Hamano, git
In-Reply-To: <loom.20111005T161522-357@post.gmane.org>

On Wed, Oct 05, 2011 at 02:26:30PM +0000, Joerg Rosenkranz wrote:

> > On Fri, May 27, 2011 at 02:00:45PM -0400, Jeff King wrote:
> >   1. We load the index, and for each entry, insert it into the index's
> >      name_hash. In addition, if ignorecase is turned on, we make an
> >      entry in the name_hash for the directory (e.g., "contrib/"), which
> >      uses the following code from 5102c61's hash_index_entry_directories:
> 
> Sorry for reactivating this old thread.
> We are running in this problem too. The behavior is the same in msysgit and on 
> Linux. Your patch resolves that problem for us.
> 
> Is there any chance to drive this patch forward?

Thanks for prodding. I wrote a big analysis at the end of that thread,
but didn't get much response. I've been meaning to pick it up again.

So I spent a few minutes revisiting the topic today. I think it's
definitely a bug, and the fix is definitely correct. The patch is below,
with what I hope is a coherent analysis adapted from the previous
thread.

The only question is whether or not it's OK to add a few bytes to
"struct cache_entry" to cater to an uncommon case (see the comments at
the end of the commit message). Junio might have thoughts on that.

-- >8 --
Subject: [PATCH] fix phantom untracked files when core.ignorecase is set

When core.ignorecase is turned on and there are stale index
entries, "git commit" can sometimes report directories as
untracked, even though they contain tracked files.

You can see an example of this with:

    # make a case-insensitive repo
    git init repo && cd repo &&
    git config core.ignorecase true &&

    # with some tracked files in a subdir
    mkdir subdir &&
    > subdir/one &&
    > subdir/two &&
    git add . &&
    git commit -m base &&

    # now make the index entries stale
    touch subdir/* &&

    # and then ask commit to update those entries and show
    # us the status template
    git commit -a

which will report "subdir/"  as untracked, even though it
clearly contains two tracked files. What is happening in the
commit program is this:

  1. We load the index, and for each entry, insert it into the index's
     name_hash. In addition, if ignorecase is turned on, we make an
     entry in the name_hash for the directory (e.g., "contrib/"), which
     uses the following code from 5102c61's hash_index_entry_directories:

        hash = hash_name(ce->name, ptr - ce->name);
        if (!lookup_hash(hash, &istate->name_hash)) {
                pos = insert_hash(hash, &istate->name_hash);
		if (pos) {
			ce->next = *pos;
			*pos = ce;
		}
        }

     Note that we only add the directory entry if there is not already an
     entry.

  2. We run add_files_to_cache, which gets updated information for each
     cache entry. It helpfully inserts this information into the cache,
     which calls replace_index_entry. This in turn calls
     remove_name_hash() on the old entry, and add_name_hash() on the new
     one. But remove_name_hash doesn't actually remove from the hash, it
     only marks it as "no longer interesting" (from cache.h):

      /*
       * We don't actually *remove* it, we can just mark it invalid so that
       * we won't find it in lookups.
       *
       * Not only would we have to search the lists (simple enough), but
       * we'd also have to rehash other hash buckets in case this makes the
       * hash bucket empty (common). So it's much better to just mark
       * it.
       */
      static inline void remove_name_hash(struct cache_entry *ce)
      {
              ce->ce_flags |= CE_UNHASHED;
      }

     This is OK in the specific-file case, since the entries in the hash
     form a linked list, and we can just skip the "not here anymore"
     entries during lookup.

     But for the directory hash entry, we will _not_ write a new entry,
     because there is already one there: the old one that is actually no
     longer interesting!

  3. While traversing the directories, we end up in the
     directory_exists_in_index_icase function to see if a directory is
     interesting. This in turn checks index_name_exists, which will
     look up the directory in the index's name_hash. We see the old,
     deleted record, and assume there is nothing interesting. The
     directory gets marked as untracked, even though there are index
     entries in it.

The problem is in the code I showed above:

        hash = hash_name(ce->name, ptr - ce->name);
        if (!lookup_hash(hash, &istate->name_hash)) {
                pos = insert_hash(hash, &istate->name_hash);
		if (pos) {
			ce->next = *pos;
			*pos = ce;
		}
        }

Having a single cache entry that represents the directory is
not enough; that entry may go away if the index is changed.
It may be tempting to say that the problem is in our removal
method; if we removed the entry entirely instead of simply
marking it as "not here anymore", then we would know we need
to insert a new entry. But that only covers this particular
case of remove-replace. In the more general case, consider
something like this:

  1. We add "foo/bar" and "foo/baz" to the index. Each gets
     their own entry in name_hash, plus we make a "foo/"
     entry that points to "foo/bar".

  2. We remove the "foo/bar" entry from the index, and from
     the name_hash.

  3. We ask if "foo/" exists, and see no entry, even though
     "foo/baz" exists.

So we need that directory entry to have the list of _all_
cache entries that indicate that the directory is tracked.
So that implies making a linked list as we do for other
entries, like:

  hash = hash_name(ce->name, ptr - ce->name);
  pos = insert_hash(hash, &istate->name_hash);
  if (pos) {
	  ce->next = *pos;
	  *pos = ce;
  }

But that's not right either. In fact, it shows a second bug
in the current code, which is that the "ce->next" pointer is
supposed to be linking entries for a specific filename
entry, but here we are overwriting it for the directory
entry. So the same cache entry ends up in two linked lists,
but they share the same "next" pointer.

As it turns out, this second bug can't be triggered in the
current code. The "if (pos)" conditional is totally dead
code; pos will only be non-NULL if there was an existing
hash entry, and we already checked that there wasn't one
through our call to lookup_hash.

But fixing the first bug means taking out that call to
lookup_hash, which is going to activate the buggy dead code,
and we'll end up splicing the two linked lists together.

So we need to have a separate next pointer for the list in
the directory bucket, and we need to traverse that list in
index_name_exists when we are looking up a directory.

This bloats "struct cache_entry" by a few bytes. Which is
annoying, because it's only necessary when core.ignorecase
is enabled. There's not an easy way around it, short of
separating out the "next" pointers from cache_entry entirely
(i.e., having a separate "cache_entry_list" struct that gets
stored in the name_hash). In practice, it probably doesn't
matter; we have thousands of cache entries, compared to the
millions of objects (where adding 4 bytes to the struct
actually does impact performance).

Signed-off-by: Jeff King <peff@peff.net>
---
 cache.h     |    1 +
 name-hash.c |   15 ++++++++-------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/cache.h b/cache.h
index 607c2ea..1334fbf 100644
--- a/cache.h
+++ b/cache.h
@@ -168,6 +168,7 @@ struct cache_entry {
 	unsigned int ce_flags;
 	unsigned char sha1[20];
 	struct cache_entry *next;
+	struct cache_entry *dir_next;
 	char name[FLEX_ARRAY]; /* more */
 };
 
diff --git a/name-hash.c b/name-hash.c
index c6b6a3f..225dd76 100644
--- a/name-hash.c
+++ b/name-hash.c
@@ -57,12 +57,10 @@ static void hash_index_entry_directories(struct index_state *istate, struct cach
 		if (*ptr == '/') {
 			++ptr;
 			hash = hash_name(ce->name, ptr - ce->name);
-			if (!lookup_hash(hash, &istate->name_hash)) {
-				pos = insert_hash(hash, ce, &istate->name_hash);
-				if (pos) {
-					ce->next = *pos;
-					*pos = ce;
-				}
+			pos = insert_hash(hash, ce, &istate->name_hash);
+			if (pos) {
+				ce->dir_next = *pos;
+				*pos = ce;
 			}
 		}
 	}
@@ -166,7 +164,10 @@ struct cache_entry *index_name_exists(struct index_state *istate, const char *na
 			if (same_name(ce, name, namelen, icase))
 				return ce;
 		}
-		ce = ce->next;
+		if (icase && name[namelen - 1] == '/')
+			ce = ce->dir_next;
+		else
+			ce = ce->next;
 	}
 
 	/*
-- 
1.7.7.37.g0e376

^ permalink raw reply related

* [PATCH 1/7] pickaxe: plug diff filespec leak with empty needle
From: René Scharfe @ 2011-10-06 16:03 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Junio C Hamano
In-Reply-To: <4E8DD065.3040607@lsrfire.ath.cx>

Check first for the unlikely case of an empty needle string and only
then populate the filespec, lest we leak it.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
 diffcore-pickaxe.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index c3760cf..0835a3b 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -152,10 +152,10 @@ static unsigned int contains(struct diff_filespec *one,
 	unsigned int cnt;
 	unsigned long sz;
 	const char *data;
-	if (diff_populate_filespec(one, 0))
-		return 0;
 	if (!len)
 		return 0;
+	if (diff_populate_filespec(one, 0))
+		return 0;
 
 	sz = one->size;
 	data = one->data;
-- 
1.7.7

^ permalink raw reply related

* [PATCH 0/7] pickaxe: plug memory leaks, deduplicate code
From: René Scharfe @ 2011-10-06 15:59 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Junio C Hamano

Hello,

this series aims to clean up the code for pickaxe (-S/-G).  The first
three patches plug minor to medium memory leaks.

	[PATCH 1/7] pickaxe: plug diff filespec leak with empty needle
	[PATCH 2/7] pickaxe: plug regex leak
	[PATCH 3/7] pickaxe: plug regex/kws leak

The next one moves a duplicate if/else cascade into its own helper
function:

	[PATCH 4/7] pickaxe: factor out has_changes

The remainder unifies the code for pickaxe (-S) and log grep (-G).

	[PATCH 5/7] pickaxe: pass diff_options to contains and has_changes
	[PATCH 6/7] pickaxe: give diff_grep the same signature as has_changes
	[PATCH 7/7] pickaxe: factor out pickaxe

As a result the code should be shorter and easier to maintain.

 diffcore-pickaxe.c |  178 ++++++++++++++++++++-------------------------------
 1 files changed, 70 insertions(+), 108 deletions(-)

^ permalink raw reply

* Re: What's cooking in git.git (Oct 2011, #01; Tue, 4)
From: Brad King @ 2011-10-06 15:55 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git
In-Reply-To: <7vvcs49ofl.fsf@alter.siamese.dyndns.org>

On 10/4/2011 10:12 PM, Junio C Hamano wrote:
> [Stalled]
>
> * hv/submodule-merge-search (2011-08-26) 5 commits
>   - submodule: Search for merges only at end of recursive merge
>   - allow multiple calls to submodule merge search for the same path
>   - submodule: Demonstrate known breakage during recursive merge

The above three commits are independent of the other two, no?
They form a complete topic demonstrating and fixing a merge bug
independent from pushing.  The other commits:

>   - push: Don't push a repository with unpushed submodules
>    (merged to 'next' on 2011-08-24 at 398e764)
>   + push: teach --recurse-submodules the on-demand option
>   (this branch is tangled with fg/submodule-auto-push.)
>
> The second from the bottom one needs to be replaced with a properly
> written commit log message.

are a separate topic.

-Brad

^ permalink raw reply

* Re: [PATCH] commit: teach --gpg-sign option
From: Shawn Pearce @ 2011-10-06 15:50 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git
In-Reply-To: <7vaa9f3pk8.fsf@alter.siamese.dyndns.org>

On Wed, Oct 5, 2011 at 17:56, Junio C Hamano <gitster@pobox.com> wrote:
> And this uses the gpg-interface.[ch] to allow signing the commit, i.e.
>
>    $ git commit --gpg-sign -m foo
>    You need a passphrase to unlock the secret key for
>    user: "Junio C Hamano <gitster@pobox.com>"
>    4096-bit RSA key, ID 96AFE6CB, created 2011-10-03 (main key ID 713660A7)
>
>    [master 8457d13] foo
>     1 files changed, 1 insertions(+), 0 deletions(-)
>
> The lines of GPG detached signature are placed in new header lines, after
> the standard tree/parent/author/committer headers, instead of tucking the
> signature block at the end of the commit log message text (similar to how
> signed tag is done), for multiple reasons:

I like this approach better than the prior "push certificate" idea.
The signature information is part of the history graph, and won't be
stripped or forgotten to be published as commits flow through
development trees, provided that the commit SHA-1 was preserved by not
doing a rewrite. There is no merge race on the server side to get a
branch updated.

The signature is automatically stripped by older Git tools like commit
--amend, format-patch or rebase, where the signature would be made
invalid anyway by changing the tree, parent or committer. That is a
nice bit of backward compatibility there. Of course there is some
concern this signature data will show up incorrectly on clients that
are so ancient they don't understand the "encoding" header, but those
clients are already very ancient, and already do not process encoding
correctly.

-- 
Shawn.

^ permalink raw reply

* git remote doesn't show remotes from .git/remotes
From: Kirill Likhodedov @ 2011-10-06 15:33 UTC (permalink / raw)
  To: git

Hello,

It seems that 'git remote' doesn't display remotes registered not in .git/config but in .git/remotes/.
Is it a bug?

# git version
git version 1.7.6
# git remote
origin
# ls .git/remotes
test
# git fetch test         // succeeds

Btw, are there advantages in using .git/remotes/ instead of .git/config ? 
If not, are there plans to remove .git/remotes/ support in future versions?

Thanks.

----------------------------------
Kirill Likhodedov
JetBrains, Inc
http://www.jetbrains.com
"Develop with pleasure!"

^ permalink raw reply

* Re: Merge seems to overwrite unstaged local changes
From: Jakub Narebski @ 2011-10-06 15:35 UTC (permalink / raw)
  To: Sebastian Schuberth; +Cc: Junio C Hamano, git
In-Reply-To: <4E8DACCC.2050302@gmail.com>

Sebastian Schuberth <sschuberth@gmail.com> writes:
> On 29.09.2011 15:07, Sebastian Schuberth wrote:
> 
>>> There recently have been quite a change in merge-recursive implementation
>>> and it would be really nice if you can try this again with the tip of
>>> 'master' before 1.7.7 final ships.
>>
>> The unstaged changes do not seem to get lost during the merge anymore
>> when using git version 1.7.7.rc3.4.g8d714 on Linux. I guess that
>> somewhat confirms that there's a bug in git<  1.7.7. I'll write a word
>> of warning to our in-house git users that they should always commit
>> before merging ...
> 
> It seems I'm not the only one who lost code due to this bug. For a
> more detailed analysis see this blog post:
> 
> http://benno.id.au/blog/2011/10/01/git-recursive-merge-broken
> 
> As it turns out, my use case also involves a rename of the file in
> which changes were lost. And just like for the blog's author it
> somewhat concerns me and shakes my confidence in Git for how long this
> severe bug slipped through undetected.

Khmmm... perhaps Mercurial was right in its transaction-based
atomicity (that allows to safely merge even if there are local
changes; not theough that I have doubts if it is sane behavior)
;-)

I wonder if it would be possible to get some Comp. Sci. student, or
graduate, or postdoc, to analyse formally the recursive merge
algorithm.  And to *prove* that it is correct (or not).

-- 
Jakub Narębski

^ permalink raw reply

* Re: What's cooking in git.git (Oct 2011, #01; Tue, 4)
From: Pascal Obry @ 2011-10-06 14:50 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git
In-Reply-To: <7vvcs49ofl.fsf@alter.siamese.dyndns.org>

Le 05/10/2011 04:12, Junio C Hamano a écrit :
> --------------------------------------------------
> [Stalled]

> * po/cygwin-backslash (2011-08-05) 2 commits
>   - On Cygwin support both UNIX and DOS style path-names
>   - git-compat-util: add generic find_last_dir_sep that respects is_dir_sep
>
> Incomplete with respect to backslash processing in prefix_filename(), and
> also loses the ability to escape glob specials.
> Will discard.

Sorry but this is letting best be enemy of good! But frankly I've lost 
all interest at making Cygwin/Git behave better.

Pascal.

-- 

--|------------------------------------------------------
--| Pascal Obry                           Team-Ada Member
--| 45, rue Gabriel Peri - 78114 Magny Les Hameaux FRANCE
--|------------------------------------------------------
--|    http://www.obry.net  -  http://v2p.fr.eu.org
--| "The best way to travel is by means of imagination"
--|
--| gpg --keyserver keys.gnupg.net --recv-key F949BD3B

^ permalink raw reply

* Re: [RFC/PATCH] Add multiple workdir support to branch/checkout
From: Jeff King @ 2011-10-06 14:42 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jay Soffian, Nguyen Thai Ngoc Duy, git
In-Reply-To: <7vzkhf713u.fsf@alter.siamese.dyndns.org>

On Wed, Oct 05, 2011 at 11:19:17AM -0700, Junio C Hamano wrote:

> Jay Soffian <jaysoffian@gmail.com> writes:
> 
> > Git has survived w/o needing to lock branches till now.
> 
> Careful. Git has survived without your patch series till now, as people
> learned to be careful when they use separate workdirs and avoid certain
> things, to the point that they are not necessarily aware that they are
> avoiding them (one good practice is to keep the HEADs of non-primary
> workdirs detached).
> 
> Does that mean what your patch aims to do is unnecessary? I think not.

It seems to me that things like receive.denyCurrentBranch and
receive.denyDeleteCurrent are just special hand-rolled versions of the
same concept.

Could they be implemented using this kind of branch locking? Moreover, I
think they would need to be to cope with new-workdir, as the definition
of "current" stops being "referenced by HEAD", but becomes much larger.

-Peff

^ permalink raw reply

* Re: [PATCH WIP 0/3] git log --exclude
From: Jeff King @ 2011-10-06 14:34 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Nguyễn Thái Ngọc Duy, git, Jonathan Nieder
In-Reply-To: <7vhb3n8ie9.fsf@alter.siamese.dyndns.org>

On Wed, Oct 05, 2011 at 10:20:30AM -0700, Junio C Hamano wrote:

> The way I envisioned the narrow cloning would work like this [*1*]:
> 
>  * The narrowed set of paths is an attribute of the local repository. It
>    is not tied to the history nor the current working tree state, so the
>    information does not live in the index or in the history. A new file
>    $GIT_DIR/narrowed-paths specifies a list of pathspecs. We call a
>    repository with such a file "a narrowed repository".
> 
>  * The objects that live in a narrowed repository are subset of the
>    objects in an unnarrowed repository that records the same
>    history. Objects are not modified in any way when transferring into
>    a narrowed repository. E.g. if you clone git.git but limit the tree to
>    Documentation/ and builtin/, you will get _all_ commit objects, even
>    the ones that do _not_ touch these two directories, and the top level
>    tree objects. These top level tree objects _do_ record the object names
>    for paths outside the narrowed area. To facilitate local history
>    traversal, we may add either grafts or replace entries to "gather" away
>    commits that do not touch the narrowed area, but this is not essential.

I'm really just a bystander on this topic, and haven't given it too much
thought. But one stumbling block I see for narrow clone is how narrow
repositories will interact with object transfer from other repositories.

For example, if I have a narrow git.git that omits Documentation, and I
do a "git fetch" from a non-narrow repository, then how do we tell the
non-narrow remote that we don't have blobs in Documentation, and that
they should not be used as delta bases for any objects that are sent?

The current protocol relies on certain repository properties on the
remote end that narrow clone will violate. I don't see a way around that
without a protocol extension to communicate the narrowness. What will
that extension look like?

-Peff

^ permalink raw reply

* [PATCH] revert.c: defer writing CHERRY_PICK_HEAD till it is safe to do so
From: Jay Soffian @ 2011-10-06 14:34 UTC (permalink / raw)
  To: git, Junio C Hamano; +Cc: Jay Soffian, nicolas.dichtel, Jeff King

do_pick_commit() writes out CHERRY_PICK_HEAD before invoking merge (either
via do_recursive_merge() or try_merge_command()) on the assumption that if
the merge fails it is due to conflict. However, if the tree is dirty, the
merge may not even start, aborting before do_pick_commit() can remove
CHERRY_PICK_HEAD.

Instead, defer writing CHERRY_PICK_HEAD till after merge has returned.
At this point we know the merge has either succeeded or failed due
to conflict. In either case, we want CHERRY_PICK_HEAD to be written
so that it may be picked up by the subsequent invocation of commit.

Signed-off-by: Jay Soffian <jaysoffian@gmail.com>
---
 builtin/revert.c                |    6 +++++-
 t/t3507-cherry-pick-conflict.sh |    7 +++++++
 2 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/builtin/revert.c b/builtin/revert.c
index 3117776c2c..48526e1ef1 100644
--- a/builtin/revert.c
+++ b/builtin/revert.c
@@ -384,6 +384,7 @@ static int do_pick_commit(void)
 	char *defmsg = NULL;
 	struct strbuf msgbuf = STRBUF_INIT;
 	int res;
+	int record_cherry_pick_head = 0;
 
 	if (no_commit) {
 		/*
@@ -477,7 +478,7 @@ static int do_pick_commit(void)
 			strbuf_addstr(&msgbuf, ")\n");
 		}
 		if (!no_commit)
-			write_cherry_pick_head();
+			record_cherry_pick_head = 1;
 	}
 
 	if (!strategy || !strcmp(strategy, "recursive") || action == REVERT) {
@@ -498,6 +499,9 @@ static int do_pick_commit(void)
 		free_commit_list(remotes);
 	}
 
+	if (record_cherry_pick_head)
+		write_cherry_pick_head();
+
 	if (res) {
 		error(action == REVERT
 		      ? _("could not revert %s... %s")
diff --git a/t/t3507-cherry-pick-conflict.sh b/t/t3507-cherry-pick-conflict.sh
index 212ec54aaf..29890bf5ac 100755
--- a/t/t3507-cherry-pick-conflict.sh
+++ b/t/t3507-cherry-pick-conflict.sh
@@ -77,6 +77,13 @@ test_expect_success 'cherry-pick --no-commit does not set CHERRY_PICK_HEAD' '
 	test_must_fail git rev-parse --verify CHERRY_PICK_HEAD
 '
 
+test_expect_success 'cherry-pick w/dirty tree does not set CHERRY_PICK_HEAD' '
+	pristine_detach initial &&
+	echo foo > foo &&
+	test_must_fail git cherry-pick base &&
+	test_must_fail git rev-parse --verify CHERRY_PICK_HEAD
+'
+
 test_expect_success 'GIT_CHERRY_PICK_HELP suppresses CHERRY_PICK_HEAD' '
 	pristine_detach initial &&
 	(
-- 
1.7.7.6.g25c34

^ permalink raw reply related

* Re: List of directories containing only ignored files
From: Marcin Wiśnicki @ 2011-10-06 14:32 UTC (permalink / raw)
  To: git
In-Reply-To: <4E8DB548.5030108@op5.se>

2011/10/6 Andreas Ericsson <ae@op5.se>:
> On 10/06/2011 03:56 PM, Marcin Wiśnicki wrote:
>>
>> Alternatively: how to remove such leftovers when switching branches ?
>
> git clean -X -f
>

That also would delete /project1/obj/foo.o which is not what I wanted.

Anyway I think I have found the answer:

 git ls-files -o --directory --exclude-standard

^ permalink raw reply

* Re: List of directories containing only ignored files
From: Andreas Ericsson @ 2011-10-06 14:03 UTC (permalink / raw)
  To: Marcin Wiśnicki; +Cc: git
In-Reply-To: <CAC9GOO__nN9W1vvoMxq2LKnn=YoFPjTE6jKPbQ2h7im3JtujQA@mail.gmail.com>

On 10/06/2011 03:56 PM, Marcin Wiśnicki wrote:
>
> Alternatively: how to remove such leftovers when switching branches ?

git clean -X -f

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

Considering the successes of the wars on alcohol, poverty, drugs and
terror, I think we should give some serious thought to declaring war
on peace.

^ permalink raw reply

* List of directories containing only ignored files
From: Marcin Wiśnicki @ 2011-10-06 13:56 UTC (permalink / raw)
  To: git

What's the easiest way to find all directories that contain only
ignored files but are not ignored themselves.

For example, given following .gitignore:

  obj/

and following files:

  /project1/foo.c
  /project1/obj/foo.o
  /project2/obj/bar.o

Where /project2 contains leftovers from different branch that are not
cleared on switch because they are ignored.

I would like to see:

  /project2


Alternatively: how to remove such leftovers when switching branches ?

^ permalink raw reply

* Re: git-cherry-pick and git-commit --amend in version 1.7.6.4
From: Jay Soffian @ 2011-10-06 13:44 UTC (permalink / raw)
  To: nicolas.dichtel; +Cc: git, Junio C Hamano, Jeff King
In-Reply-To: <4E8DABB0.4090206@6wind.com>

On Thu, Oct 6, 2011 at 9:22 AM, Nicolas Dichtel
<nicolas.dichtel@6wind.com> wrote:
> Here is the output:
> # GIT_TRACE=1 git cherry-pick 3f78d1f210ff89af77f042ab7f4a8fee39feb1c9
> trace: built-in: git 'cherry-pick'
> '3f78d1f210ff89af77f042ab7f4a8fee39feb1c9'
> trace: run_command: 'commit' '-n' '-F' '.git/MERGE_MSG'
> trace: exec: 'git' 'commit' '-n' '-F' '.git/MERGE_MSG'
> setup: git_dir: .git
> setup: worktree: /home/dichtel/DEV/linux-2.6
> setup: cwd: /home/dichtel/DEV/linux-2.6
> setup: prefix: (null)
> trace: built-in: git 'commit' '-n' '-F' '.git/MERGE_MSG'

I have a theory that determine_whence() inside commit.c isn't finding
.git/CHERRY_PICK_HEAD:

	else if (file_exists(git_path("CHERRY_PICK_HEAD")))
		whence = FROM_CHERRY_PICK;

That would cause the mis-attributed cherry-picked commit. commit.c is
also responsible for removing CHERRY_PICK_HEAD, which is not happening
correctly:

	unlink(git_path("CHERRY_PICK_HEAD"));

Maybe git_path("CHERRY_PICK_HEAD") is returning something unexpected.
But the trace output looks fine.

Aside, I'm a little confused by the "setup:" output appearing above.
In 1.7.5 and later, it requires setting GIT_TRACE_SETUP=1 to appear,
but you reported you're having this problem with 1.7.6.4.

j.

^ permalink raw reply

* Re: [RFC/PATCH] remote-curl: Obey passed URL
From: Jeff King @ 2011-10-06 13:37 UTC (permalink / raw)
  To: Michael J Gruber; +Cc: git, Shawn O. Pearce, Tay Ray Chuan
In-Reply-To: <20111006132500.GA1792@sigill.intra.peff.net>

On Thu, Oct 06, 2011 at 09:25:00AM -0400, Jeff King wrote:

> Your analysis is correct, but tweaking the remote object seems kind
> of ugly. I think a nicer solution would be to pass the URL in
> separately to http_init. Of the three existing callers:

Here's what that patch looks like. It's definitely an improvement and
fixes a real bug, so it may be worth applying. But I'm still going to
look into pushing the url examination closer to the point of use.

-- >8 --
Subject: [PATCH] http_init: accept separate URL parameter

The http_init function takes a "struct remote". Part of its
initialization procedure is to look at the remote's url and
grab some auth-related parameters. However, using the url
included in the remote is:

  - wrong; the remote-curl helper may have a separate,
    unrelated URL (e.g., from remote.*.pushurl). Looking at
    the remote's configured url is incorrect.

  - incomplete; http-fetch doesn't have a remote, so passes
    NULL. So http_init never gets to see the URL we are
    actually going to use.

  - cumbersome; http-push has a similar problem to
    http-fetch, but actually builds a fake remote just to
    pass in the URL.

Instead, let's just add a separate URL parameter to
http_init, and all three callsites can pass in the
appropriate information.

Signed-off-by: Jeff King <peff@peff.net>
---
 http-fetch.c  |    2 +-
 http-push.c   |   10 +---------
 http.c        |    8 ++++----
 http.h        |    2 +-
 remote-curl.c |    2 +-
 5 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/http-fetch.c b/http-fetch.c
index 3af4c71..e341872 100644
--- a/http-fetch.c
+++ b/http-fetch.c
@@ -63,7 +63,7 @@ int main(int argc, const char **argv)
 
 	git_config(git_default_config, NULL);
 
-	http_init(NULL);
+	http_init(NULL, url);
 	walker = get_http_walker(url);
 	walker->get_tree = get_tree;
 	walker->get_history = get_history;
diff --git a/http-push.c b/http-push.c
index 376331a..215b640 100644
--- a/http-push.c
+++ b/http-push.c
@@ -1747,7 +1747,6 @@ int main(int argc, char **argv)
 	int i;
 	int new_refs;
 	struct ref *ref, *local_refs;
-	struct remote *remote;
 
 	git_extract_argv0_path(argv[0]);
 
@@ -1821,14 +1820,7 @@ int main(int argc, char **argv)
 
 	memset(remote_dir_exists, -1, 256);
 
-	/*
-	 * Create a minimum remote by hand to give to http_init(),
-	 * primarily to allow it to look at the URL.
-	 */
-	remote = xcalloc(sizeof(*remote), 1);
-	ALLOC_GROW(remote->url, remote->url_nr + 1, remote->url_alloc);
-	remote->url[remote->url_nr++] = repo->url;
-	http_init(remote);
+	http_init(NULL, repo->url);
 
 #ifdef USE_CURL_MULTI
 	is_running_queue = 0;
diff --git a/http.c b/http.c
index b2ae8de..d9f9938 100644
--- a/http.c
+++ b/http.c
@@ -357,7 +357,7 @@ static void set_from_env(const char **var, const char *envname)
 		*var = val;
 }
 
-void http_init(struct remote *remote)
+void http_init(struct remote *remote, const char *url)
 {
 	char *low_speed_limit;
 	char *low_speed_time;
@@ -421,11 +421,11 @@ void http_init(struct remote *remote)
 	if (getenv("GIT_CURL_FTP_NO_EPSV"))
 		curl_ftp_no_epsv = 1;
 
-	if (remote && remote->url && remote->url[0]) {
-		http_auth_init(remote->url[0]);
+	if (url) {
+		http_auth_init(url);
 		if (!ssl_cert_password_required &&
 		    getenv("GIT_SSL_CERT_PASSWORD_PROTECTED") &&
-		    !prefixcmp(remote->url[0], "https://"))
+		    !prefixcmp(url, "https://"))
 			ssl_cert_password_required = 1;
 	}
 
diff --git a/http.h b/http.h
index 0bf8592..3c332a9 100644
--- a/http.h
+++ b/http.h
@@ -86,7 +86,7 @@ struct buffer {
 extern void step_active_slots(void);
 #endif
 
-extern void http_init(struct remote *remote);
+extern void http_init(struct remote *remote, const char *url);
 extern void http_cleanup(void);
 
 extern int data_received;
diff --git a/remote-curl.c b/remote-curl.c
index 69831e9..33d3d8c 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -852,7 +852,7 @@ int main(int argc, const char **argv)
 
 	url = strbuf_detach(&buf, NULL);
 
-	http_init(remote);
+	http_init(remote, url);
 
 	do {
 		if (strbuf_getline(&buf, stdin, '\n') == EOF)
-- 
1.7.7.37.g0e376

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox