Git development
 help / color / mirror / Atom feed
* [PATCH 2/2] bundle: add parse_bundle_header() helper function
From: Junio C Hamano @ 2011-10-13 22:35 UTC (permalink / raw)
  To: git; +Cc: Shawn O. Pearce, Phil Hord
In-Reply-To: <7vpqi034l0.fsf@alter.siamese.dyndns.org>

Move most of the code from read_bundle_header() to parse_bundle_header()
that takes a file descriptor that is already opened for reading, and make
the former responsible only for opening the file and noticing errors.

As a logical consequence of this, is_bundle() helper function can be
implemented as a non-complaining variant of read_bundle_header() that
does not return an open file descriptor, and can be used to tighten
the check used to decide the use of bundle transport in transport.c

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---

 * It generally is a bad practice to base a non-RFC patch on an RFC one,
   but in any case here is how I would do the is_bundle() helper.

 bundle.c    |   39 +++++++++++++++++++++++++++++++--------
 bundle.h    |    1 +
 transport.c |    2 +-
 3 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/bundle.c b/bundle.c
index 3aa715c..105b005 100644
--- a/bundle.c
+++ b/bundle.c
@@ -40,19 +40,18 @@ static int strbuf_readline_fd(struct strbuf *sb, int fd)
 	return 0;
 }
 
-int read_bundle_header(const char *path, struct bundle_header *header)
+static int parse_bundle_header(int fd, struct bundle_header *header,
+			       const char *report_path)
 {
 	struct strbuf buf = STRBUF_INIT;
-	int fd = open(path, O_RDONLY);
 	int status = 0;
 
-	if (fd < 0)
-		return error("could not open '%s'", path);
-
 	/* The bundle header begins with the signature */
 	if (strbuf_readline_fd(&buf, fd) ||
 	    strcmp(buf.buf, bundle_signature)) {
-		error("'%s' does not look like a v2 bundle file", path);
+		if (report_path)
+			error("'%s' does not look like a v2 bundle file",
+			      report_path);
 		status = -1;
 		goto abort;
 	}
@@ -77,8 +76,9 @@ int read_bundle_header(const char *path, struct bundle_header *header)
 		if (get_sha1_hex(buf.buf, sha1) ||
 		    (40 <= buf.len && !isspace(buf.buf[40])) ||
 		    (!is_prereq && buf.len <= 40)) {
-			error("unrecognized header: %s%s (%d)",
-			      (is_prereq ? "-" : ""), buf.buf, (int)buf.len);
+			if (report_path)
+				error("unrecognized header: %s%s (%d)",
+				      (is_prereq ? "-" : ""), buf.buf, (int)buf.len);
 			status = -1;
 			break;
 		} else {
@@ -98,6 +98,29 @@ int read_bundle_header(const char *path, struct bundle_header *header)
 	return fd;
 }
 
+int read_bundle_header(const char *path, struct bundle_header *header)
+{
+	int fd = open(path, O_RDONLY);
+
+	if (fd < 0)
+		return error("could not open '%s'", path);
+	return parse_bundle_header(fd, header, path);
+}
+
+int is_bundle(const char *path, int quiet)
+{
+	struct bundle_header header;
+	int fd = open(path, O_RDONLY);
+
+	if (fd < 0)
+		return 0;
+	memset(&header, 0, sizeof(header));
+	fd = parse_bundle_header(fd, &header, quiet ? NULL : path);
+	if (fd >= 0)
+		close(fd);
+	return (fd >= 0);
+}
+
 static int list_refs(struct ref_list *r, int argc, const char **argv)
 {
 	int i;
diff --git a/bundle.h b/bundle.h
index e2aedd6..c6228e2 100644
--- a/bundle.h
+++ b/bundle.h
@@ -14,6 +14,7 @@ struct bundle_header {
 	struct ref_list references;
 };
 
+int is_bundle(const char *path, int quiet);
 int read_bundle_header(const char *path, struct bundle_header *header);
 int create_bundle(struct bundle_header *header, const char *path,
 		int argc, const char **argv);
diff --git a/transport.c b/transport.c
index c9c8056..84d6480 100644
--- a/transport.c
+++ b/transport.c
@@ -913,7 +913,7 @@ struct transport *transport_get(struct remote *remote, const char *url)
 		ret->fetch = fetch_objs_via_rsync;
 		ret->push = rsync_transport_push;
 		ret->smart_options = NULL;
-	} else if (is_local(url) && is_file(url)) {
+	} else if (is_local(url) && is_file(url) && is_bundle(url, 1)) {
 		struct bundle_transport_data *data = xcalloc(1, sizeof(*data));
 		ret->data = data;
 		ret->get_refs_list = get_refs_from_bundle;
-- 
1.7.7.289.gd0d4bb

^ permalink raw reply related

* Re: [PATCH] t1402-check-ref-format: skip tests of refs beginning with slash on Windows
From: Junio C Hamano @ 2011-10-13 23:00 UTC (permalink / raw)
  To: Johannes Sixt
  Cc: mhagger, git, Jeff King, Drew Northup, Jakub Narebski,
	Heiko Voigt, Johan Herland, Julian Phillips
In-Reply-To: <4E969BFC.50706@viscovery.net>

Johannes Sixt <j.sixt@viscovery.net> writes:

> This patch is needed on top of mh/check-ref-format-3, or it could be
> inserted in front of this batch (which probably amounts to the same
> thing :-)

How about applying directly on 'master' then?

> +invalid_ref NOT_MINGW '/'
> ...
> @@ -155,21 +155,21 @@ test_expect_success 'check-ref-format --branch from subdir' '
>  '
>  
>  valid_ref_normalized() {
> -	test_expect_success "ref name '$1' simplifies to '$2'" "
> +	test_expect_success $3 "ref name '$1' simplifies to '$2'" "
>  		refname=\$(git check-ref-format --normalize '$1') &&
>  		test \"\$refname\" = '$2'"
>  }
>  invalid_ref_normalized() {
> -	test_expect_success "check-ref-format --normalize rejects '$1'" "
> +	test_expect_success $2 "check-ref-format --normalize rejects '$1'" "
>  		test_must_fail git check-ref-format --normalize '$1'"
>  }
> ...
> +valid_ref_normalized '/heads/foo' 'heads/foo' NOT_MINGW

The inconsistencies strikes me a bit.

Perhaps update to something like this?

	valid_ref_normalized () {
		if test $# = 3
                then
			prereq=$1
                        shift
		fi
                test_expect_success $prereq "ref name '$1' simplifies to '$2'"
		...
	}

^ permalink raw reply

* Re: [PATCH] t1402-check-ref-format: skip tests of refs beginning with slash on Windows
From: Junio C Hamano @ 2011-10-13 23:07 UTC (permalink / raw)
  To: Johannes Sixt
  Cc: mhagger, git, Jeff King, Drew Northup, Jakub Narebski,
	Heiko Voigt, Johan Herland, Julian Phillips
In-Reply-To: <7vfwiw33bf.fsf@alter.siamese.dyndns.org>

Bash on Windows converts program arguments that look like absolute POSIX
paths to their Windows form, i.e., drive-letter-colon format. For this
reason, those tests in t1402 that check refs that begin with a slash do not
work as expected on Windows: valid_ref tests are doomed to fail, and
invalid_ref tests fail for the wrong reason (that there is a colon rather
than that they begin with a slash).

Skip these tests.

Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---

Junio C Hamano <gitster@pobox.com> writes:

>> +invalid_ref NOT_MINGW '/'
>> ...
>> +valid_ref_normalized '/heads/foo' 'heads/foo' NOT_MINGW
>
> The inconsistencies strikes me a bit.

Here is what I tentatively will queue.

The interdiff from yours looks like this:

    diff --git a/t/t1402-check-ref-format.sh b/t/t1402-check-ref-format.sh
    index dba5e97..1ae4d87 100755
    --- a/t/t1402-check-ref-format.sh
    +++ b/t/t1402-check-ref-format.sh
    @@ -11,8 +11,9 @@ valid_ref() {
                    prereq=$1
                    shift
            esac
    -	test_expect_success $prereq "ref name '$1' is valid${2:+ with options $2}" \
    -			"git check-ref-format $2 '$1'"
    +	test_expect_success $prereq "ref name '$1' is valid${2:+ with options $2}" "
    +		git check-ref-format $2 '$1'
    +	"
     }
     invalid_ref() {
            prereq=
    @@ -21,8 +22,9 @@ invalid_ref() {
                    prereq=$1
                    shift
            esac
    -	test_expect_success $prereq "ref name '$1' is invalid${2:+ with options $2}" \
    -			"test_must_fail git check-ref-format $2 '$1'"
    +	test_expect_success $prereq "ref name '$1' is invalid${2:+ with options $2}" "
    +		test_must_fail git check-ref-format $2 '$1'
    +	"
     }

     invalid_ref ''
    @@ -155,21 +157,35 @@ test_expect_success 'check-ref-format --branch from subdir' '
     '

     valid_ref_normalized() {
    -	test_expect_success $3 "ref name '$1' simplifies to '$2'" "
    +	prereq=
    +	case $1 in
    +	[A-Z]*)
    +		prereq=$1
    +		shift
    +	esac
    +	test_expect_success $prereq "ref name '$1' simplifies to '$2'" "
                    refname=\$(git check-ref-format --normalize '$1') &&
    -		test \"\$refname\" = '$2'"
    +		test \"\$refname\" = '$2'
    +	"
     }
     invalid_ref_normalized() {
    -	test_expect_success $2 "check-ref-format --normalize rejects '$1'" "
    -		test_must_fail git check-ref-format --normalize '$1'"
    +	prereq=
    +	case $1 in
    +	[A-Z]*)
    +		prereq=$1
    +		shift
    +	esac
    +	test_expect_success $prereq "check-ref-format --normalize rejects '$1'" "
    +		test_must_fail git check-ref-format --normalize '$1'
    +	"
     }

     valid_ref_normalized 'heads/foo' 'heads/foo'
     valid_ref_normalized 'refs///heads/foo' 'refs/heads/foo'
    -valid_ref_normalized '/heads/foo' 'heads/foo' NOT_MINGW
    +valid_ref_normalized NOT_MINGW '/heads/foo' 'heads/foo'
     valid_ref_normalized '///heads/foo' 'heads/foo'
     invalid_ref_normalized 'foo'
    -invalid_ref_normalized '/foo' NOT_MINGW
    +invalid_ref_normalized NOT_MINGW '/foo'
     invalid_ref_normalized 'heads/foo/../bar'
     invalid_ref_normalized 'heads/./foo'
     invalid_ref_normalized 'heads\foo'

Thanks.

 t/t1402-check-ref-format.sh |   88 +++++++++++++++++++++++++-----------------
 1 files changed, 52 insertions(+), 36 deletions(-)

diff --git a/t/t1402-check-ref-format.sh b/t/t1402-check-ref-format.sh
index 710fcca..1ae4d87 100755
--- a/t/t1402-check-ref-format.sh
+++ b/t/t1402-check-ref-format.sh
@@ -5,38 +5,40 @@ test_description='Test git check-ref-format'
 . ./test-lib.sh
 
 valid_ref() {
-	if test "$#" = 1
-	then
-		test_expect_success "ref name '$1' is valid" \
-			"git check-ref-format '$1'"
-	else
-		test_expect_success "ref name '$1' is valid with options $2" \
-			"git check-ref-format $2 '$1'"
-	fi
+	prereq=
+	case $1 in
+	[A-Z]*)
+		prereq=$1
+		shift
+	esac
+	test_expect_success $prereq "ref name '$1' is valid${2:+ with options $2}" "
+		git check-ref-format $2 '$1'
+	"
 }
 invalid_ref() {
-	if test "$#" = 1
-	then
-		test_expect_success "ref name '$1' is invalid" \
-			"test_must_fail git check-ref-format '$1'"
-	else
-		test_expect_success "ref name '$1' is invalid with options $2" \
-			"test_must_fail git check-ref-format $2 '$1'"
-	fi
+	prereq=
+	case $1 in
+	[A-Z]*)
+		prereq=$1
+		shift
+	esac
+	test_expect_success $prereq "ref name '$1' is invalid${2:+ with options $2}" "
+		test_must_fail git check-ref-format $2 '$1'
+	"
 }
 
 invalid_ref ''
-invalid_ref '/'
-invalid_ref '/' --allow-onelevel
-invalid_ref '/' --normalize
-invalid_ref '/' '--allow-onelevel --normalize'
+invalid_ref NOT_MINGW '/'
+invalid_ref NOT_MINGW '/' --allow-onelevel
+invalid_ref NOT_MINGW '/' --normalize
+invalid_ref NOT_MINGW '/' '--allow-onelevel --normalize'
 valid_ref 'foo/bar/baz'
 valid_ref 'foo/bar/baz' --normalize
 invalid_ref 'refs///heads/foo'
 valid_ref 'refs///heads/foo' --normalize
 invalid_ref 'heads/foo/'
-invalid_ref '/heads/foo'
-valid_ref '/heads/foo' --normalize
+invalid_ref NOT_MINGW '/heads/foo'
+valid_ref NOT_MINGW '/heads/foo' --normalize
 invalid_ref '///heads/foo'
 valid_ref '///heads/foo' --normalize
 invalid_ref './foo'
@@ -115,14 +117,14 @@ invalid_ref "$ref" --refspec-pattern
 invalid_ref "$ref" '--refspec-pattern --allow-onelevel'
 
 ref='/foo'
-invalid_ref "$ref"
-invalid_ref "$ref" --allow-onelevel
-invalid_ref "$ref" --refspec-pattern
-invalid_ref "$ref" '--refspec-pattern --allow-onelevel'
-invalid_ref "$ref" --normalize
-valid_ref "$ref" '--allow-onelevel --normalize'
-invalid_ref "$ref" '--refspec-pattern --normalize'
-valid_ref "$ref" '--refspec-pattern --allow-onelevel --normalize'
+invalid_ref NOT_MINGW "$ref"
+invalid_ref NOT_MINGW "$ref" --allow-onelevel
+invalid_ref NOT_MINGW "$ref" --refspec-pattern
+invalid_ref NOT_MINGW "$ref" '--refspec-pattern --allow-onelevel'
+invalid_ref NOT_MINGW "$ref" --normalize
+valid_ref NOT_MINGW "$ref" '--allow-onelevel --normalize'
+invalid_ref NOT_MINGW "$ref" '--refspec-pattern --normalize'
+valid_ref NOT_MINGW "$ref" '--refspec-pattern --allow-onelevel --normalize'
 
 test_expect_success "check-ref-format --branch @{-1}" '
 	T=$(git write-tree) &&
@@ -155,21 +157,35 @@ test_expect_success 'check-ref-format --branch from subdir' '
 '
 
 valid_ref_normalized() {
-	test_expect_success "ref name '$1' simplifies to '$2'" "
+	prereq=
+	case $1 in
+	[A-Z]*)
+		prereq=$1
+		shift
+	esac
+	test_expect_success $prereq "ref name '$1' simplifies to '$2'" "
 		refname=\$(git check-ref-format --normalize '$1') &&
-		test \"\$refname\" = '$2'"
+		test \"\$refname\" = '$2'
+	"
 }
 invalid_ref_normalized() {
-	test_expect_success "check-ref-format --normalize rejects '$1'" "
-		test_must_fail git check-ref-format --normalize '$1'"
+	prereq=
+	case $1 in
+	[A-Z]*)
+		prereq=$1
+		shift
+	esac
+	test_expect_success $prereq "check-ref-format --normalize rejects '$1'" "
+		test_must_fail git check-ref-format --normalize '$1'
+	"
 }
 
 valid_ref_normalized 'heads/foo' 'heads/foo'
 valid_ref_normalized 'refs///heads/foo' 'refs/heads/foo'
-valid_ref_normalized '/heads/foo' 'heads/foo'
+valid_ref_normalized NOT_MINGW '/heads/foo' 'heads/foo'
 valid_ref_normalized '///heads/foo' 'heads/foo'
 invalid_ref_normalized 'foo'
-invalid_ref_normalized '/foo'
+invalid_ref_normalized NOT_MINGW '/foo'
 invalid_ref_normalized 'heads/foo/../bar'
 invalid_ref_normalized 'heads/./foo'
 invalid_ref_normalized 'heads\foo'
-- 
1.7.7.289.gd0d4bb

^ permalink raw reply related

* [PATCH] pack-objects: protect against disappearing packs
From: Jeff King @ 2011-10-14  1:23 UTC (permalink / raw)
  To: git; +Cc: git-dev, Shawn O. Pearce, Nicolas Pitre

It's possible that while pack-objects is running, a
simultaneously running prune process might delete a pack
that we are interested in. Because we load the pack indices
early on, we know that the pack contains our item, but by
the time we try to open and map it, it is gone.

Since c715f78, we already protect against this in the normal
object access code path, but pack-objects accesses the packs
at a lower level.  In the normal access path, we call
find_pack_entry, which will call find_pack_entry_one on each
pack index, which does the actual lookup. If it gets a hit,
we will actually open and verify the validity of the
matching packfile (using c715f78's is_pack_valid). If we
can't open it, we'll issue a warning and pretend that we
didn't find it, causing us to go on to the next pack (or on
to loose objects).

Furthermore, we will cache the descriptor to the opened
packfile. Which means that later, when we actually try to
access the object, we are likely to still have that packfile
opened, and won't care if it has been unlinked from the
filesystem.

Notice the "likely" above. If there is another pack access
in the interim, and we run out of descriptors, we could
close the pack. And then a later attempt to access the
closed pack could fail (we'll try to re-open it, of course,
but it may have been deleted). In practice, this doesn't
happen because we tend to look up items and then access them
immediately.

Pack-objects does not follow this code path. Instead, it
accesses the packs at a much lower level, using
find_pack_entry_one directly. This means we skip the
is_pack_valid check, and may end up with the name of a
packfile, but no open descriptor.

We can add the same is_pack_valid check here. Unfortunately,
the access patterns of pack-objects are not quite as nice
for keeping lookup and object access together. We look up
each object as we find out about it, and the only later when
writing the packfile do we necessarily access it. Which
means that the opened packfile may be closed in the interim.

In practice, however, adding this check still has value, for
three reasons.

  1. If you have a reasonable number of packs and/or a
     reasonable file descriptor limit, you can keep all of
     your packs open simultaneously. If this is the case,
     then the race is impossible to trigger.

  2. Even if you can't keep all packs open at once, you
     may end up keeping the deleted one open (i.e., you may
     get lucky).

  3. The race window is shortened. You may notice early that
     the pack is gone, and not try to access it. Triggering
     the problem without this check means deleting the pack
     any time after we read the list of index files, but
     before we access the looked-up objects.  Triggering it
     with this check means deleting the pack means deleting
     the pack after we do a lookup (and successfully access
     the packfile), but before we access the object. Which
     is a smaller window.
---
We're seeing this at GitHub because we prune pretty
aggressively. We let pushes go into individual repositories,
but then we kick off a job to move the resulting objects
into the repository's "network" repo, which is basically a
big alternates repository for related repos.

You can reproduce locally with:

    push() {
      (cd child &&
       echo content >>file &&
       git add file &&
       git commit -q -m "change `wc -l <file`" &&
       git push -q origin HEAD
      ) &&
      (cd network.git &&
       git fetch -q ../parent refs/*:refs/* &&
       git gc --auto
      ) &&
      (cd parent.git &&
       git repack -qadl
      )
    }

    fetch() {
      rm -rf clone$1
      git.compile clone -q file://$PWD/parent.git clone$1 2>>output$1 ||
      {
        echo >&2 FAIL FAIL FAIL
        exit 1
      }
    }

    git init --bare network.git &&
    git --git-dir=network.git config transfer.unpacklimit 1 &&
    git init --bare parent.git &&
    git --git-dir=parent.git config transfer.unpacklimit 1 &&
    echo "$PWD/network.git/objects" >parent.git/objects/info/alternates
    git clone parent.git child &&
    (for i in `seq 1 1000`; do push; done) &
    (for i in `seq 1 1000`; do fetch 1; done) &
    (for i in `seq 1 1000`; do fetch 2; done) &
    wait

One process repeatedly pushes new packs, then migrates the
objects over to the network repo. Simultaneously, two
processes are constantly cloning. After 10-20 seconds, I'll
usually get unlucky and a clone will fail (because the
remote pack-objects complains about the inaccessible
packfile).

Another option would be to open the pack at point of use, and if that
fails, try to find the object elsewhere. This throws off the rest of the
code a bit, though, as we were expecting to get it from a pack, which
went into our decision about what we should delta.

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/pack-objects.c |    4 ++++
 cache.h                |    1 +
 sha1_file.c            |    2 +-
 3 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 2b18de5..8681ccd 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -804,6 +804,10 @@ static int add_object_entry(const unsigned char *sha1, enum object_type type,
 		off_t offset = find_pack_entry_one(sha1, p);
 		if (offset) {
 			if (!found_pack) {
+				if (!is_pack_valid(p)) {
+					error("packfile %s cannot be accessed", p->pack_name);
+					continue;
+				}
 				found_offset = offset;
 				found_pack = p;
 			}
diff --git a/cache.h b/cache.h
index e39e160..495b468 100644
--- a/cache.h
+++ b/cache.h
@@ -1055,6 +1055,7 @@ struct extra_have_objects {
 extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t);
 extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t);
 extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *);
+extern int is_pack_valid(struct packed_git *);
 extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *);
 extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
 extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
diff --git a/sha1_file.c b/sha1_file.c
index 3401301..a22c5b4 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1987,7 +1987,7 @@ off_t find_pack_entry_one(const unsigned char *sha1,
 	return 0;
 }
 
-static int is_pack_valid(struct packed_git *p)
+int is_pack_valid(struct packed_git *p)
 {
 	/* An already open pack is known to be valid. */
 	if (p->pack_fd != -1)
-- 
1.7.6.4.37.g43b58b

^ permalink raw reply related

* Re: [PATCH] pack-objects: protect against disappearing packs
From: Jeff King @ 2011-10-14  1:31 UTC (permalink / raw)
  To: git; +Cc: git-dev, Shawn O. Pearce, Nicolas Pitre
In-Reply-To: <20111014012320.GA4395@sigill.intra.peff.net>

On Thu, Oct 13, 2011 at 09:23:20PM -0400, Jeff King wrote:

> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index 2b18de5..8681ccd 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -804,6 +804,10 @@ static int add_object_entry(const unsigned char *sha1, enum object_type type,
>  		off_t offset = find_pack_entry_one(sha1, p);
>  		if (offset) {
>  			if (!found_pack) {
> +				if (!is_pack_valid(p)) {
> +					error("packfile %s cannot be accessed", p->pack_name);
> +					continue;
> +				}

This message is modeled after the one in find_pack_entry. However,
they're not really errors, since we will try to find the object
elsewhere (and generally succeed). So the messages could just go away.
Though they can also alert you to something fishy going on (like a
packfile with bad permissions). But perhaps we should downgrade them
like this:

-- >8 --
Subject: [PATCH] downgrade "packfile cannot be accessed" errors to warnings

These can happen if another process simultaneously prunes a
pack. But that is not usually an error condition, because a
properly-running prune should have repacked the object into
a new pack. So we will notice that the pack has disappeared
unexpectedly, print a message, try other packs (possibly
after re-scanning the list of packs), and find it in the new
pack.

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/pack-objects.c |    2 +-
 sha1_file.c            |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 8681ccd..ba3705d 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -805,7 +805,7 @@ static int add_object_entry(const unsigned char *sha1, enum object_type type,
 		if (offset) {
 			if (!found_pack) {
 				if (!is_pack_valid(p)) {
-					error("packfile %s cannot be accessed", p->pack_name);
+					warning("packfile %s cannot be accessed", p->pack_name);
 					continue;
 				}
 				found_offset = offset;
diff --git a/sha1_file.c b/sha1_file.c
index a22c5b4..27f3b9b 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2038,7 +2038,7 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
 			 * was loaded!
 			 */
 			if (!is_pack_valid(p)) {
-				error("packfile %s cannot be accessed", p->pack_name);
+				warning("packfile %s cannot be accessed", p->pack_name);
 				goto next;
 			}
 			e->offset = offset;
-- 
1.7.6.4.37.g43b58b

^ permalink raw reply related

* Re: [BUG] git checkout <branch> allowed with uncommitted changes
From: Jeff King @ 2011-10-14  1:38 UTC (permalink / raw)
  To: arQon; +Cc: git
In-Reply-To: <loom.20111013T203610-130@post.gmane.org>

On Thu, Oct 13, 2011 at 06:56:14PM +0000, arQon wrote:

> I'll give a shot, though I don't know how good it'll be. Off the top of my
> head, I don't see any good way to explain the inconsistency with LOCAL CHANGES
> sometimes preventing switches and sometimes not, based on what is to the user
> an arbitrary set of rules that has nothing to do with the *current state* of
> the worktree, but rather the state of those files in prior commits.

The rules are fairly straightforward.  You are moving from branch A to
branch B. If path X is not changed going from A to B, git will not touch
it, whether or not you have local changes. If path X is changed going
from A to B, then git will refuse the checkout, and you have the option
of:

  1. checkout -f: overwrite your local changes with what's in B

  2. checkout -m: merge your changes with what's in B (using A as a
                  common ancestor)

> But sure, I'll see if I can come up with something. If nothing else,
> having the manpage at least explain what "M" means; that it can be
> potentially disastrous; and what you need to do to avoid it, would be
> a definite plus.

You keep saying things like "disastrous". Git's rules are specifically
designed to be as flexible as possible without allowing the checkout
command to cause data loss.

Do you actually have a case that causes irrecoverable data loss?

Note that I don't count "these changes were based on A, now they are
based on B" as data loss. Your file content is still completely intact,
and if you want to make them based on "A" again, you just need to
"git checkout A" again.

-Peff

^ permalink raw reply

* fatal: Not a git repository
From: T @ 2011-10-14  1:57 UTC (permalink / raw)
  To: git@vger.kernel.org

I am on RedHat Linux Server 5.7, installed git and all worked great.

Performed a "git add ." and sat back and watched it chew up GB fo data and create the .git repository, which I was going to study.  But then I decided to delete the .git repository before the commit, and redo the work after I added additional files to the filesystem.

The git application no longer works.  I have done a complete deletion of all files that I know of and a complete reinstallation and yet every time that I attempt to get git to reread the directory and recreate the repository or ANY repository on the system even while root the error message is:



[root@localhost wizdom]# git add .
fatal: Not a git repository (or any of the parent directories): .git




Any idea on how to fix this problem so that I can get on with using git?

Wizdom is a dir of management information systems related files, 29.4GB in size, containing 264380 files, for systems administration and when git had complete the add command it had create a dir /home/Object/wizdon/.git that was 26.1GB in size and with 212774 files in 256 sub-folders.


============================================
[root@localhost ~]# mkdir /tmp/git_source
[root@localhost ~]# cp /home/Object/Desktop/100511/git/git-1.7.7.tar.gz /tmp/git_source/.
[root@localhost ~]# openssl dgst -sha1 /tmp/git_source/git-1.7.7.tar.gz
SHA1(/tmp/git_source/git-1.7.7.tar.gz)= bbf85bd767ca6b7e9caa1489bb4ba7ec64e0ab35
[root@localhost ~]# cd /tmp/git_source/
[root@localhost git_source]# tar -xzf git-1.7.7.tar.gz
[root@localhost git_source]# ls
git-1.7.7  git-1.7.7.tar.gz
[root@localhost git_source]# ./configure
[root@localhost git-1.7.7]# make
[root@localhost git-1.7.7]# make install
[root@localhost git-1.7.7]# updatedb
[root@localhost git-1.7.7]# which git
/usr/local/bin/git
[root@localhost ~]# ls -al /usr/local/bin/gi*
-rwxr-xr-x 109 root root 5113079 Oct 13 13:14 /usr/local/bin/git
-rwxr-xr-x   2 root root  120939 Oct 13 13:14 /usr/local/bin/git-cvsserver
-rwxr-xr-x   1 root root  324172 Oct 13 13:14 /usr/local/bin/gitk
-rwxr-xr-x 109 root root 5113079 Oct 13 13:14 /usr/local/bin/git-receive-pack
-rwxr-xr-x   2 root root 2144914 Oct 13 13:14 /usr/local/bin/git-shell
-rwxr-xr-x 109 root root 5113079 Oct 13 13:14 /usr/local/bin/git-upload-archive
-rwxr-xr-x   2 root root 2195752 Oct 13 13:14 /usr/local/bin/git-upload-pack



Just to see what it would do, or what it does, I ran git against my systems administration collection called wizdom on a Dell T-410 with one socket in use.


[root@localhost git-1.7.7]# git --version
git version 1.7.7
[root@localhost wizdom]# git init
Initialized empty Git repository in /home/Object/Desktop/wizdom/.git/
[root@localhost wizdom]# git add .
[root@localhost wizdom]#



And in another window while "git add ." ran, I execute the top command to see what the processor had to say:

[root@localhost ~]# top
top - 16:12:43 up 1 day, 21:19,  4 users,  load average: 1.35, 0.73, 0.32
Tasks: 232 total,   2 running, 230 sleeping,   0 stopped,   0 zombie
Cpu0  :  0.0%us,  0.0%sy,  0.0%ni,100.0%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
Cpu1  :  0.0%us,  0.0%sy,  0.0%ni,100.0%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
Cpu2  :  0.7%us,  0.0%sy,  0.0%ni, 99.3%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
Cpu3  :  0.0%us,  2.0%sy,  0.0%ni, 46.7%id, 50.7%wa,  0.3%hi,  0.3%si,  0.0%st
Cpu4  :  0.0%us,  1.0%sy,  0.0%ni, 99.0%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
Cpu5  :  0.0%us,  0.0%sy,  0.0%ni,100.0%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
Cpu6  :  0.0%us,  0.0%sy,  0.0%ni,100.0%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
Cpu7  : 93.7%us,  2.7%sy,  0.0%ni,  3.3%id,  0.3%wa,  0.0%hi,  0.0%si,  0.0%st
Mem:   5968036k total,  5932372k used,    35664k free,   191688k buffers
Swap:  8159224k total,        0k used,  8159224k free,  4938104k cached

  PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
16250 root      25   0 17344 1428  792 D 96.2  0.0   1:09.41 git-fast-import
16233 root      18   0 44348  29m 1036 S  1.3  0.5   0:53.89 git
...



Thanks!

^ permalink raw reply

* Re: fatal: Not a git repository
From: BlackSwan @ 2011-10-14  2:14 UTC (permalink / raw)
  To: git
In-Reply-To: <1318557443.92591.YahooMailNeo@web114209.mail.gq1.yahoo.com>

Sorry.... I figured it out, and it was right in front of me.

[root@localhost wizdom]# git init


Thanks all!

--
View this message in context: http://git.661346.n2.nabble.com/fatal-Not-a-git-repository-tp6891065p6891093.html
Sent from the git mailing list archive at Nabble.com.

^ permalink raw reply

* Re: [PATCH] pack-objects: protect against disappearing packs
From: Nicolas Pitre @ 2011-10-14  2:42 UTC (permalink / raw)
  To: Jeff King; +Cc: git, git-dev, Shawn O. Pearce
In-Reply-To: <20111014012320.GA4395@sigill.intra.peff.net>

On Thu, 13 Oct 2011, Jeff King wrote:

> It's possible that while pack-objects is running, a
> simultaneously running prune process might delete a pack
> that we are interested in. Because we load the pack indices
> early on, we know that the pack contains our item, but by
> the time we try to open and map it, it is gone.
> 
> Since c715f78, we already protect against this in the normal
> object access code path, but pack-objects accesses the packs
> at a lower level.  In the normal access path, we call
> find_pack_entry, which will call find_pack_entry_one on each
> pack index, which does the actual lookup. If it gets a hit,
> we will actually open and verify the validity of the
> matching packfile (using c715f78's is_pack_valid). If we
> can't open it, we'll issue a warning and pretend that we
> didn't find it, causing us to go on to the next pack (or on
> to loose objects).
> 
> Furthermore, we will cache the descriptor to the opened
> packfile. Which means that later, when we actually try to
> access the object, we are likely to still have that packfile
> opened, and won't care if it has been unlinked from the
> filesystem.
> 
> Notice the "likely" above. If there is another pack access
> in the interim, and we run out of descriptors, we could
> close the pack. And then a later attempt to access the
> closed pack could fail (we'll try to re-open it, of course,
> but it may have been deleted). In practice, this doesn't
> happen because we tend to look up items and then access them
> immediately.
> 
> Pack-objects does not follow this code path. Instead, it
> accesses the packs at a much lower level, using
> find_pack_entry_one directly. This means we skip the
> is_pack_valid check, and may end up with the name of a
> packfile, but no open descriptor.
> 
> We can add the same is_pack_valid check here. Unfortunately,
> the access patterns of pack-objects are not quite as nice
> for keeping lookup and object access together. We look up
> each object as we find out about it, and the only later when
> writing the packfile do we necessarily access it. Which
> means that the opened packfile may be closed in the interim.
> 
> In practice, however, adding this check still has value, for
> three reasons.
> 
>   1. If you have a reasonable number of packs and/or a
>      reasonable file descriptor limit, you can keep all of
>      your packs open simultaneously. If this is the case,
>      then the race is impossible to trigger.
> 
>   2. Even if you can't keep all packs open at once, you
>      may end up keeping the deleted one open (i.e., you may
>      get lucky).
> 
>   3. The race window is shortened. You may notice early that
>      the pack is gone, and not try to access it. Triggering
>      the problem without this check means deleting the pack
>      any time after we read the list of index files, but
>      before we access the looked-up objects.  Triggering it
>      with this check means deleting the pack means deleting
>      the pack after we do a lookup (and successfully access
>      the packfile), but before we access the object. Which
>      is a smaller window.
> ---

you should put your SOB above that line I would think.

Acked-by: Nicolas Pitre <nico@fluxnic.net>

> We're seeing this at GitHub because we prune pretty
> aggressively. We let pushes go into individual repositories,
> but then we kick off a job to move the resulting objects
> into the repository's "network" repo, which is basically a
> big alternates repository for related repos.

While this patch certainly has value, it doesn't provide 100% 
reliability for that use case.  Maybe the github infrastructure should 
simply skip any auto-repack on push if some other object maintenance 
operation is ongoing, possibly via the pre-auto-gc hook.


Nicolas

^ permalink raw reply

* Re: [PATCH] pack-objects: protect against disappearing packs
From: Nicolas Pitre @ 2011-10-14  2:43 UTC (permalink / raw)
  To: Jeff King; +Cc: git, git-dev, Shawn O. Pearce
In-Reply-To: <20111014013130.GA7163@sigill.intra.peff.net>

On Thu, 13 Oct 2011, Jeff King wrote:

> On Thu, Oct 13, 2011 at 09:23:20PM -0400, Jeff King wrote:
> 
> > diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> > index 2b18de5..8681ccd 100644
> > --- a/builtin/pack-objects.c
> > +++ b/builtin/pack-objects.c
> > @@ -804,6 +804,10 @@ static int add_object_entry(const unsigned char *sha1, enum object_type type,
> >  		off_t offset = find_pack_entry_one(sha1, p);
> >  		if (offset) {
> >  			if (!found_pack) {
> > +				if (!is_pack_valid(p)) {
> > +					error("packfile %s cannot be accessed", p->pack_name);
> > +					continue;
> > +				}
> 
> This message is modeled after the one in find_pack_entry. However,
> they're not really errors, since we will try to find the object
> elsewhere (and generally succeed). So the messages could just go away.
> Though they can also alert you to something fishy going on (like a
> packfile with bad permissions). But perhaps we should downgrade them
> like this:
> 
> -- >8 --
> Subject: [PATCH] downgrade "packfile cannot be accessed" errors to warnings
> 
> These can happen if another process simultaneously prunes a
> pack. But that is not usually an error condition, because a
> properly-running prune should have repacked the object into
> a new pack. So we will notice that the pack has disappeared
> unexpectedly, print a message, try other packs (possibly
> after re-scanning the list of packs), and find it in the new
> pack.
> 
> Signed-off-by: Jeff King <peff@peff.net>

Acked-by: Nicolas Pitre <nico@fluxnic.net>


> ---
>  builtin/pack-objects.c |    2 +-
>  sha1_file.c            |    2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index 8681ccd..ba3705d 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -805,7 +805,7 @@ static int add_object_entry(const unsigned char *sha1, enum object_type type,
>  		if (offset) {
>  			if (!found_pack) {
>  				if (!is_pack_valid(p)) {
> -					error("packfile %s cannot be accessed", p->pack_name);
> +					warning("packfile %s cannot be accessed", p->pack_name);
>  					continue;
>  				}
>  				found_offset = offset;
> diff --git a/sha1_file.c b/sha1_file.c
> index a22c5b4..27f3b9b 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -2038,7 +2038,7 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
>  			 * was loaded!
>  			 */
>  			if (!is_pack_valid(p)) {
> -				error("packfile %s cannot be accessed", p->pack_name);
> +				warning("packfile %s cannot be accessed", p->pack_name);
>  				goto next;
>  			}
>  			e->offset = offset;
> -- 
> 1.7.6.4.37.g43b58b
> 

^ permalink raw reply

* Re: [PATCH] daemon: return "access denied" if a service is not allowed
From: Junio C Hamano @ 2011-10-14  5:01 UTC (permalink / raw)
  To: Jeff King
  Cc: Nguyen Thai Ngoc Duy, git, Ilari Liusvaara, Johannes Sixt,
	Jonathan Nieder
In-Reply-To: <20111013182816.GA17573@sigill.intra.peff.net>

Jeff King <peff@peff.net> writes:

> So if we want to do anything, I would think it would be a hook. Except
> that we may or may not have a repo, so it would not be a hook in
> $GIT_DIR/hooks, but rather some script to be run passed on the command
> line, like:
>
>   git daemon --informative-errors=/path/to/hook

I don't think it is necessarily good to have such a variation across
hosting sites. Your "something like this" patch looked like it was giving
a reasonable level of detail, IMO.

^ permalink raw reply

* interrupting "git rebase" (Re: git rebase +)
From: Jonathan Nieder @ 2011-10-14  5:26 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: Adam Piatyszek, git, Martin von Zweigbergk
In-Reply-To: <4E96E5E1.7010103@viscovery.net>

Johannes Sixt wrote:

> Hitting Ctrl-C during git-rebase results undefined behavior. git-rebase is
> a shell script and was never designed to operate in any form of atomicity.
>
> You should have let it run until it stopped.

Wait, really?  That's bad, and unlike most git commands.

> Then you could have said 'git
> rebase --abort' (if it didn't complete) or 'git reset --hard ORIG_HEAD'
> (if it completed).

If interrupting the rebase leaves the repository in a state that

	rm -fr .git/rebase-apply
	git reset --hard <appropriate commit name>

cannot recover from, I'd consider it a serious problem.

By the way, what happened to the "git rebase --abort-softly" synonym
for "rm -fr .git/rebase-apply" discussed a while ago?

^ permalink raw reply

* Re: interrupting "git rebase" (Re: git rebase +)
From: Jonathan Nieder @ 2011-10-14  5:29 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: Adam Piatyszek, git, Martin von Zweigbergk
In-Reply-To: <20111014052653.GA5052@elie.hsd1.il.comcast.net>

Jonathan Nieder wrote:

> By the way, what happened to the "git rebase --abort-softly" synonym
> for "rm -fr .git/rebase-apply" discussed a while ago?

(Here's a pointer I should have included[1].  Sorry for the noise.)

[1] http://thread.gmane.org/gmane.comp.version-control.git/174655

^ permalink raw reply

* Re: [PATCH 07/14] is_refname_available(): remove the "quiet" argument
From: Michael Haggerty @ 2011-10-14  5:35 UTC (permalink / raw)
  To: Drew Northup
  Cc: Junio C Hamano, git, Jeff King, Jakub Narebski, Heiko Voigt,
	Johan Herland, Julian Phillips
In-Reply-To: <1318509685.7231.6.camel@drew-northup.unet.maine.edu>

On 10/13/2011 02:41 PM, Drew Northup wrote:
> 
> On Thu, 2011-10-13 at 09:58 +0200, mhagger@alum.mit.edu wrote:
>> From: Michael Haggerty <mhagger@alum.mit.edu>
>>
>> quiet was always set to 0, so get rid of it.  Add a function docstring
>> for good measure.
> 
> I would like to know if perhaps it was an unfinished project somewhere
> to propagate the "quiet" option down to this level before removing the
> function argument. Comments?

The is_refname_available() function, including the quiet option, was
added in c976d415e53 (coincidentally the same commit that added
RENAME-REF).  I am unable to find any use of the function with quiet=1
anywhere in history.

>> +/*
>> + * Return true iff a reference named refname could be created without
> 
> Did you really mean "iff" (as in "if and only if") or just plain "if"
> here?

I did indeed mean "if and only if".  Are you asking because you think
that abbreviation is too obscure, or because you think that "if and only
if" is logically incorrect here and I should have used "if"?

Michael

-- 
Michael Haggerty
mhagger@alum.mit.edu
http://softwareswirl.blogspot.com/

^ permalink raw reply

* Re: [PATCH] t1402-check-ref-format: skip tests of refs beginning with slash on Windows
From: Johannes Sixt @ 2011-10-14  6:40 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: mhagger, git, Jeff King, Drew Northup, Jakub Narebski,
	Heiko Voigt, Johan Herland, Julian Phillips
In-Reply-To: <7vbotk32zm.fsf@alter.siamese.dyndns.org>

Am 10/14/2011 1:07, schrieb Junio C Hamano:
> Junio C Hamano <gitster@pobox.com> writes:
> 
>>> +invalid_ref NOT_MINGW '/'
>>> ...
>>> +valid_ref_normalized '/heads/foo' 'heads/foo' NOT_MINGW
>>
>> The inconsistencies strikes me a bit.
> 
> Here is what I tentatively will queue.

The patch looks good, and the test cases pass as expected.

Thank you very much,
-- Hannes

^ permalink raw reply

* Re: [BUG] git checkout <branch> allowed with uncommitted changes
From: Alexey Shumkin @ 2011-10-14  6:51 UTC (permalink / raw)
  To: arQon; +Cc: git
In-Reply-To: <20111013201711.3d55c693@ashu.dyn.rarus.ru>

> > Lucky you. :P  The most likely reason for me is, I'm working on
> > something and I get interrupted and have to switch. Since the code
> > may well not even compile at this point, the last thing I want to do
> > is commit it. 
> "git stash" helps here
> With Git you can/have_to/must change your SVN-based habits.
> DO NOT BE AFRAID OF FREQUENT COMMITS!
> There are local until you push them.
> 
> >git's ability for that commit to be local is half the
> > reason I'm trying to switch to it.
> You always have a chance to modify/reedit you commits
> see "git commit --amend" and "git rebase [-i]"
> 
> I'm telling you it as an ex-SVN user.
> >(I'm not particularly keen on
> > having to commit broken code to even a local repo, but that's still
> > a hell of a lot better than having it pushed upstream as well).
> 
> Again, do not be afraid to commit your changes. Be afraid of losing
> your changes. Git makes everything (as other discussion participants
> already described) to keep your changes within workflow when you
> switch between branches often.
> 
> Read some books which are describe Git's usual (and effective)
> workflow, ProGit - http://progit.org/book/
> Version Contol by Example (there is a chapter about Git) -
> http://git-scm.com/course/svn.html
oops,
wrong url

fixed link
Version Contol by Example (there is a chapter about Git) -
http://www.ericsink.com/vcbe/

^ permalink raw reply

* Re: [PATCH] pack-objects: protect against disappearing packs
From: Johannes Sixt @ 2011-10-14  7:06 UTC (permalink / raw)
  To: Jeff King; +Cc: git, git-dev, Shawn O. Pearce, Nicolas Pitre
In-Reply-To: <20111014012320.GA4395@sigill.intra.peff.net>

Am 10/14/2011 3:23, schrieb Jeff King:
> In practice, however, adding this check still has value, for
> three reasons.
> 
>   1. If you have a reasonable number of packs and/or a
>      reasonable file descriptor limit, you can keep all of
>      your packs open simultaneously. If this is the case,
>      then the race is impossible to trigger.

On Windows, we cannot remove files that are open. If I understand
correctly, this patch keeps more files open for a longer time. Is there
any chance that packfiles remain now open until an unlink() call?

I am not worried about parallel processes (we already have a problem
there), but that this can now happen within a single process, i.e., that a
single git-repack -a -d -f would now try to unlink a pack file that it
opened itself and did not close timely.

I'll test your patch later this weekend to see whether the test suite
finds something. But perhaps you know the answer already?

-- Hannes

^ permalink raw reply

* defined behaviour for multiple urls for a remote
From: Sitaram Chamarty @ 2011-10-14  7:07 UTC (permalink / raw)
  To: Git Mailing List

Hi,

What's the defined behaviour if I do this:

[remote "both"]
	url = https://code.google.com/p/gitolite/
        url = git@github.com:sitaramc/gitolite.git

I know what I'm seeing (a fetch only goes to the first URL, and does a
HEAD->FETCH_HEAD because I didn't provide a refspec line, while a push
seems to push all to both), but I was curious what the official
position is, because I couldn't find it in the docs.

-- 
Sitaram

^ permalink raw reply

* Re: [BUG] git checkout <branch> allowed with uncommitted changes
From: Victor Engmark @ 2011-10-14  7:16 UTC (permalink / raw)
  To: arQon; +Cc: git
In-Reply-To: <loom.20111013T175500-495@post.gmane.org>

On Thu, Oct 13, 2011 at 04:17:51PM +0000, arQon wrote:
> Victor Engmark <victor.engmark <at> terreactive.ch> writes:
> > 1. `checkout master` and commit the fix there, then shift back and
> > continue working
> 
> I absolutely agree. And it's far more common than any of us would like.
> My point is, you *can't* do this in git without first staging your current branch
> via either commit or stash, or you risk changes bleeding between the branches
> and/or work being lost irretrievably.

It's been pointed out enough times already that work is *not* lost (at
least unless you --force it to) *nor* the branches corrupted, so I'll
just advice to look at more Git documentation. I used CVS 2004-2006 or
so, Subversion 2006-2009, and Git since then. It's vastly superior to
either, and it's really difficult to lose any work unless getting into
the habit of forcing every change.

> This is not something that you would
> expect, and as you say:
> 
> > The second most important thing a VCS should do is not destroy any of your
> uncommitted work unless you tell it to
> 
> ... which is exactly what git does, and why I have a problem with it.

No, it does not. Others have explained this better already.

> But the response here is uniformly "that's just how git is", so obviously it's
> something you learn to become aware of over time, and avoid. It's not going to
> get "fixed", because people who are used to git don't see it as a bug, so I just
> have to decide whether I can live with it or not.

It took a while to get my head around just how broken the Subversion
model was, and why my expectations from using that model kept resulting
in "weird" state (although don't get me started on `svn clean`). Don't
worry, you'll see the light :)

-- 
terreActive AG
Kasinostrasse 30
CH-5001 Aarau
Tel: +41 62 834 00 55
Fax: +41 62 823 93 56
www.terreactive.ch

Wir sichern Ihren Erfolg - seit 15 Jahren

^ permalink raw reply

* [PATCH 0/6] http-auth-early
From: Michael J Gruber @ 2011-10-14  7:40 UTC (permalink / raw)
  To: git; +Cc: Jeff King, Junio C Hamano
In-Reply-To: <4E9692BF.8040705@drmicha.warpmail.net>

Here are the early parts of Jeff's http-auth-keyring series.
It contains only parts which are not using the credential API (which
is still under discussion), so that this can go in (and help users)
and alleviates the pressure on the credential discussion:

Early bits with cleanups to http.c.
Cherry-picked bit for improved prompts ("Username for ..." etc.)
Cherry-pickes bit for using configured pushurls.

I tried to pick/resolve in a way which should help rebasing Jeff's series
on top of this.

Jeff King (5):
  url: decode buffers that are not NUL-terminated
  improve httpd auth tests
  remote-curl: don't retry auth failures with dumb protocol
  http: retry authentication failures for all http requests
  http_init: accept separate URL parameter

Michael J Gruber (1):
  http: use hostname in credential description

 http-fetch.c          |    2 +-
 http-push.c           |   10 +-----
 http.c                |   91 +++++++++++++++++++++++++++---------------------
 http.h                |    2 +-
 remote-curl.c         |    4 +-
 t/lib-httpd.sh        |   10 +++--
 t/t5550-http-fetch.sh |   51 +++++++++++++++++++++++++--
 url.c                 |   26 ++++++++++----
 url.h                 |    1 +
 9 files changed, 128 insertions(+), 69 deletions(-)

-- 
1.7.7.338.g0156b

^ permalink raw reply

* [PATCH 1/6] url: decode buffers that are not NUL-terminated
From: Michael J Gruber @ 2011-10-14  7:40 UTC (permalink / raw)
  To: git; +Cc: Jeff King, Junio C Hamano
In-Reply-To: <cover.1318577792.git.git@drmicha.warpmail.net>

From: Jeff King <peff@peff.net>

The url_decode function needs only minor tweaks to handle
arbitrary buffers. Let's do those tweaks, which cleans up an
unreadable mess of temporary strings in http.c.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 http.c |   27 ++++-----------------------
 url.c  |   26 ++++++++++++++++++--------
 url.h  |    1 +
 3 files changed, 23 insertions(+), 31 deletions(-)

diff --git a/http.c b/http.c
index a1ea3db..c93716c 100644
--- a/http.c
+++ b/http.c
@@ -307,8 +307,7 @@ static CURL *get_curl_handle(void)
 
 static void http_auth_init(const char *url)
 {
-	char *at, *colon, *cp, *slash, *decoded;
-	int len;
+	char *at, *colon, *cp, *slash;
 
 	cp = strstr(url, "://");
 	if (!cp)
@@ -328,29 +327,11 @@ static void http_auth_init(const char *url)
 		return; /* No credentials */
 	if (!colon || at <= colon) {
 		/* Only username */
-		len = at - cp;
-		user_name = xmalloc(len + 1);
-		memcpy(user_name, cp, len);
-		user_name[len] = '\0';
-		decoded = url_decode(user_name);
-		free(user_name);
-		user_name = decoded;
+		user_name = url_decode_mem(cp, at - cp);
 		user_pass = NULL;
 	} else {
-		len = colon - cp;
-		user_name = xmalloc(len + 1);
-		memcpy(user_name, cp, len);
-		user_name[len] = '\0';
-		decoded = url_decode(user_name);
-		free(user_name);
-		user_name = decoded;
-		len = at - (colon + 1);
-		user_pass = xmalloc(len + 1);
-		memcpy(user_pass, colon + 1, len);
-		user_pass[len] = '\0';
-		decoded = url_decode(user_pass);
-		free(user_pass);
-		user_pass = decoded;
+		user_name = url_decode_mem(cp, colon - cp);
+		user_pass = url_decode_mem(colon + 1, at - (colon + 1));
 	}
 }
 
diff --git a/url.c b/url.c
index 3e06fd3..389d9da 100644
--- a/url.c
+++ b/url.c
@@ -68,18 +68,20 @@ static int url_decode_char(const char *q)
 	return val;
 }
 
-static char *url_decode_internal(const char **query, const char *stop_at,
-				 struct strbuf *out, int decode_plus)
+static char *url_decode_internal(const char **query, int len,
+				 const char *stop_at, struct strbuf *out,
+				 int decode_plus)
 {
 	const char *q = *query;
 
-	do {
+	while (len) {
 		unsigned char c = *q;
 
 		if (!c)
 			break;
 		if (stop_at && strchr(stop_at, c)) {
 			q++;
+			len--;
 			break;
 		}
 
@@ -88,6 +90,7 @@ static char *url_decode_internal(const char **query, const char *stop_at,
 			if (0 <= val) {
 				strbuf_addch(out, val);
 				q += 3;
+				len -= 3;
 				continue;
 			}
 		}
@@ -97,34 +100,41 @@ static char *url_decode_internal(const char **query, const char *stop_at,
 		else
 			strbuf_addch(out, c);
 		q++;
-	} while (1);
+		len--;
+	}
 	*query = q;
 	return strbuf_detach(out, NULL);
 }
 
 char *url_decode(const char *url)
 {
+	return url_decode_mem(url, strlen(url));
+}
+
+char *url_decode_mem(const char *url, int len)
+{
 	struct strbuf out = STRBUF_INIT;
-	const char *colon = strchr(url, ':');
+	const char *colon = memchr(url, ':', len);
 
 	/* Skip protocol part if present */
 	if (colon && url < colon) {
 		strbuf_add(&out, url, colon - url);
+		len -= colon - url;
 		url = colon;
 	}
-	return url_decode_internal(&url, NULL, &out, 0);
+	return url_decode_internal(&url, len, NULL, &out, 0);
 }
 
 char *url_decode_parameter_name(const char **query)
 {
 	struct strbuf out = STRBUF_INIT;
-	return url_decode_internal(query, "&=", &out, 1);
+	return url_decode_internal(query, -1, "&=", &out, 1);
 }
 
 char *url_decode_parameter_value(const char **query)
 {
 	struct strbuf out = STRBUF_INIT;
-	return url_decode_internal(query, "&", &out, 1);
+	return url_decode_internal(query, -1, "&", &out, 1);
 }
 
 void end_url_with_slash(struct strbuf *buf, const char *url)
diff --git a/url.h b/url.h
index 7100e32..abdaf6f 100644
--- a/url.h
+++ b/url.h
@@ -4,6 +4,7 @@
 extern int is_url(const char *url);
 extern int is_urlschemechar(int first_flag, int ch);
 extern char *url_decode(const char *url);
+extern char *url_decode_mem(const char *url, int len);
 extern char *url_decode_parameter_name(const char **query);
 extern char *url_decode_parameter_value(const char **query);
 
-- 
1.7.7.338.g0156b

^ permalink raw reply related

* [PATCH 2/6] improve httpd auth tests
From: Michael J Gruber @ 2011-10-14  7:40 UTC (permalink / raw)
  To: git; +Cc: Jeff King, Junio C Hamano
In-Reply-To: <cover.1318577792.git.git@drmicha.warpmail.net>

From: Jeff King <peff@peff.net>

These just checked that we could clone a repository when the
username and password were given in the URL; we should also
check that git will prompt when no or partial credentials
are given.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/lib-httpd.sh        |   10 +++++---
 t/t5550-http-fetch.sh |   51 +++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/t/lib-httpd.sh b/t/lib-httpd.sh
index b8996a3..f7dc078 100644
--- a/t/lib-httpd.sh
+++ b/t/lib-httpd.sh
@@ -81,8 +81,7 @@ prepare_httpd() {
 
 	if test -n "$LIB_HTTPD_SSL"
 	then
-		HTTPD_URL=https://127.0.0.1:$LIB_HTTPD_PORT
-		AUTH_HTTPD_URL=https://user%40host:user%40host@127.0.0.1:$LIB_HTTPD_PORT
+		HTTPD_PROTO=https
 
 		RANDFILE_PATH="$HTTPD_ROOT_PATH"/.rnd openssl req \
 			-config "$TEST_PATH/ssl.cnf" \
@@ -93,9 +92,12 @@ prepare_httpd() {
 		export GIT_SSL_NO_VERIFY
 		HTTPD_PARA="$HTTPD_PARA -DSSL"
 	else
-		HTTPD_URL=http://127.0.0.1:$LIB_HTTPD_PORT
-		AUTH_HTTPD_URL=http://user%40host:user%40host@127.0.0.1:$LIB_HTTPD_PORT
+		HTTPD_PROTO=http
 	fi
+	HTTPD_DEST=127.0.0.1:$LIB_HTTPD_PORT
+	HTTPD_URL=$HTTPD_PROTO://$HTTPD_DEST
+	HTTPD_URL_USER=$HTTPD_PROTO://user%40host@$HTTPD_DEST
+	HTTPD_URL_USER_PASS=$HTTPD_PROTO://user%40host:user%40host@$HTTPD_DEST
 
 	if test -n "$LIB_HTTPD_DAV" -o -n "$LIB_HTTPD_SVN"
 	then
diff --git a/t/t5550-http-fetch.sh b/t/t5550-http-fetch.sh
index a1883ca..ed4db09 100755
--- a/t/t5550-http-fetch.sh
+++ b/t/t5550-http-fetch.sh
@@ -35,11 +35,54 @@ test_expect_success 'clone http repository' '
 	test_cmp file clone/file
 '
 
-test_expect_success 'clone http repository with authentication' '
+test_expect_success 'create password-protected repository' '
 	mkdir "$HTTPD_DOCUMENT_ROOT_PATH/auth/" &&
-	cp -Rf "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" "$HTTPD_DOCUMENT_ROOT_PATH/auth/repo.git" &&
-	git clone $AUTH_HTTPD_URL/auth/repo.git clone-auth &&
-	test_cmp file clone-auth/file
+	cp -Rf "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" \
+	       "$HTTPD_DOCUMENT_ROOT_PATH/auth/repo.git"
+'
+
+test_expect_success 'setup askpass helpers' '
+	cat >askpass <<-EOF &&
+	#!/bin/sh
+	echo >>"$PWD/askpass-query" "askpass: \$*" &&
+	cat "$PWD/askpass-response"
+	EOF
+	chmod +x askpass &&
+	GIT_ASKPASS="$PWD/askpass" &&
+	export GIT_ASKPASS &&
+	>askpass-expect-none &&
+	echo "askpass: Password: " >askpass-expect-pass &&
+	{ echo "askpass: Username: " &&
+	  cat askpass-expect-pass
+	} >askpass-expect-both
+'
+
+test_expect_success 'cloning password-protected repository can fail' '
+	>askpass-query &&
+	echo wrong >askpass-response &&
+	test_must_fail git clone "$HTTPD_URL/auth/repo.git" clone-auth-fail &&
+	test_cmp askpass-expect-both askpass-query
+'
+
+test_expect_success 'http auth can use user/pass in URL' '
+	>askpass-query &&
+	echo wrong >askpass-reponse &&
+	git clone "$HTTPD_URL_USER_PASS/auth/repo.git" clone-auth-none &&
+	test_cmp askpass-expect-none askpass-query
+'
+
+test_expect_success 'http auth can use just user in URL' '
+	>askpass-query &&
+	echo user@host >askpass-response &&
+	git clone "$HTTPD_URL_USER/auth/repo.git" clone-auth-pass &&
+	test_cmp askpass-expect-pass askpass-query
+'
+
+test_expect_success 'http auth can request both user and pass' '
+	>askpass-query &&
+	echo user@host >askpass-response &&
+	git clone "$HTTPD_URL/auth/repo.git" clone-auth-both &&
+	test_cmp askpass-expect-both askpass-query
 '
 
 test_expect_success 'fetch changes via http' '
-- 
1.7.7.338.g0156b

^ permalink raw reply related

* [PATCH 3/6] remote-curl: don't retry auth failures with dumb protocol
From: Michael J Gruber @ 2011-10-14  7:40 UTC (permalink / raw)
  To: git; +Cc: Jeff King, Junio C Hamano
In-Reply-To: <cover.1318577792.git.git@drmicha.warpmail.net>

From: Jeff King <peff@peff.net>

When fetching an http URL, we first try fetching info/refs
with an extra "service" parameter. This will work for a
smart-http server, or a dumb server which ignores extra
parameters when fetching files. If that fails, we retry
without the extra parameter to remain compatible with dumb
servers which didn't like our first request.

If the server returned a "401 Unauthorized", indicating that
the credentials we provided were not good, there is not much
point in retrying. With the current code, we just waste an
extra round trip to the HTTP server before failing.

But as the http code becomes smarter about throwing away
rejected credentials and re-prompting the user for new ones
(which it will later in this series), this will become more
confusing. At some point we will stop asking for credentials
to retry smart http, and will be asking for credentials to
retry dumb http. So now we're not only wasting an extra HTTP
round trip for something that is unlikely to work, but we're
making the user re-type their password for it.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 remote-curl.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/remote-curl.c b/remote-curl.c
index faaeda4..6c24ab1 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -115,7 +115,7 @@ static struct discovery* discover_refs(const char *service)
 	http_ret = http_get_strbuf(refs_url, &buffer, HTTP_NO_CACHE);
 
 	/* try again with "plain" url (no ? or & appended) */
-	if (http_ret != HTTP_OK) {
+	if (http_ret != HTTP_OK && http_ret != HTTP_NOAUTH) {
 		free(refs_url);
 		strbuf_reset(&buffer);
 
-- 
1.7.7.338.g0156b

^ permalink raw reply related

* [PATCH 4/6] http: retry authentication failures for all http requests
From: Michael J Gruber @ 2011-10-14  7:40 UTC (permalink / raw)
  To: git; +Cc: Jeff King, Junio C Hamano
In-Reply-To: <cover.1318577792.git.git@drmicha.warpmail.net>

From: Jeff King <peff@peff.net>

Commit 42653c0 (Prompt for a username when an HTTP request
401s, 2010-04-01) changed http_get_strbuf to prompt for
credentials when we receive a 401, but didn't touch
http_get_file. The latter is called only for dumb http;
while it's usually the case that people don't use
authentication on top of dumb http, there is no reason not
to allow both types of requests to use this feature.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 http.c |   17 +++++++++++------
 1 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/http.c b/http.c
index c93716c..89e3cf4 100644
--- a/http.c
+++ b/http.c
@@ -846,13 +846,18 @@ static int http_request(const char *url, void *result, int target, int options)
 	return ret;
 }
 
+static int http_request_reauth(const char *url, void *result, int target,
+			       int options)
+{
+	int ret = http_request(url, result, target, options);
+	if (ret != HTTP_REAUTH)
+		return ret;
+	return http_request(url, result, target, options);
+}
+
 int http_get_strbuf(const char *url, struct strbuf *result, int options)
 {
-	int http_ret = http_request(url, result, HTTP_REQUEST_STRBUF, options);
-	if (http_ret == HTTP_REAUTH) {
-		http_ret = http_request(url, result, HTTP_REQUEST_STRBUF, options);
-	}
-	return http_ret;
+	return http_request_reauth(url, result, HTTP_REQUEST_STRBUF, options);
 }
 
 /*
@@ -875,7 +880,7 @@ static int http_get_file(const char *url, const char *filename, int options)
 		goto cleanup;
 	}
 
-	ret = http_request(url, result, HTTP_REQUEST_FILE, options);
+	ret = http_request_reauth(url, result, HTTP_REQUEST_FILE, options);
 	fclose(result);
 
 	if ((ret == HTTP_OK) && move_temp_to_file(tmpfile.buf, filename))
-- 
1.7.7.338.g0156b

^ permalink raw reply related

* [PATCH 5/6] http: use hostname in credential description
From: Michael J Gruber @ 2011-10-14  7:40 UTC (permalink / raw)
  To: git; +Cc: Jeff King, Junio C Hamano
In-Reply-To: <cover.1318577792.git.git@drmicha.warpmail.net>

Until now, a request for an http password looked like:

  Username:
  Password:

Now it will look like:

  Username for 'example.com':
  Password for 'example.com':

Picked-from: Jeff King <peff@peff.net>
Signed-off-by: Michael J Gruber <git@drmicha.warpmail.net>
---
 http.c                |   41 +++++++++++++++++++++++++++++++++--------
 t/t5550-http-fetch.sh |    4 ++--
 2 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/http.c b/http.c
index 89e3cf4..149e116 100644
--- a/http.c
+++ b/http.c
@@ -42,7 +42,7 @@ static long curl_low_speed_time = -1;
 static int curl_ftp_no_epsv;
 static const char *curl_http_proxy;
 static const char *curl_cookie_file;
-static char *user_name, *user_pass;
+static char *user_name, *user_pass, *description;
 static const char *user_agent;
 
 #if LIBCURL_VERSION_NUM >= 0x071700
@@ -139,6 +139,25 @@ static void process_curl_messages(void)
 }
 #endif
 
+static char *git_getpass_one(const char *what, const char *desc)
+{
+	struct strbuf prompt = STRBUF_INIT;
+	char *r;
+
+	if (desc)
+		strbuf_addf(&prompt, "%s for '%s': ", what, desc);
+	else
+		strbuf_addf(&prompt, "%s: ", what);
+	/* FIXME: for usernames, we should do something less magical that
+	* actually echoes the characters. However, we need to read from
+	* /dev/tty and not stdio, which is not portable (but getpass will do
+	* it for us). http.c uses the same workaround. */
+	r = git_getpass(prompt.buf);
+
+	strbuf_release(&prompt);
+	return xstrdup(r);
+}
+
 static int http_options(const char *var, const char *value, void *cb)
 {
 	if (!strcmp("http.sslverify", var)) {
@@ -214,7 +233,7 @@ static void init_curl_http_auth(CURL *result)
 	if (user_name) {
 		struct strbuf up = STRBUF_INIT;
 		if (!user_pass)
-			user_pass = xstrdup(git_getpass("Password: "));
+			user_pass = xstrdup(git_getpass_one("Password", description));
 		strbuf_addf(&up, "%s:%s", user_name, user_pass);
 		curl_easy_setopt(result, CURLOPT_USERPWD,
 				 strbuf_detach(&up, NULL));
@@ -229,7 +248,7 @@ static int has_cert_password(void)
 		return 0;
 	/* Only prompt the user once. */
 	ssl_cert_password_required = -1;
-	ssl_cert_password = git_getpass("Certificate Password: ");
+	ssl_cert_password = git_getpass_one("Certificate Password", description);
 	if (ssl_cert_password != NULL) {
 		ssl_cert_password = xstrdup(ssl_cert_password);
 		return 1;
@@ -307,7 +326,7 @@ static CURL *get_curl_handle(void)
 
 static void http_auth_init(const char *url)
 {
-	char *at, *colon, *cp, *slash;
+	const char *at, *colon, *cp, *slash, *host;
 
 	cp = strstr(url, "://");
 	if (!cp)
@@ -323,16 +342,22 @@ static void http_auth_init(const char *url)
 	at = strchr(cp, '@');
 	colon = strchr(cp, ':');
 	slash = strchrnul(cp, '/');
-	if (!at || slash <= at)
-		return; /* No credentials */
-	if (!colon || at <= colon) {
+	if (!at || slash <= at) {
+		/* No credentials, but we may have to ask for some later */
+		host = cp;
+	}
+	else if (!colon || at <= colon) {
 		/* Only username */
 		user_name = url_decode_mem(cp, at - cp);
 		user_pass = NULL;
+		host = at + 1;
 	} else {
 		user_name = url_decode_mem(cp, colon - cp);
 		user_pass = url_decode_mem(colon + 1, at - (colon + 1));
+		host = at + 1;
 	}
+
+	description = url_decode_mem(host, slash - host);
 }
 
 static void set_from_env(const char **var, const char *envname)
@@ -828,7 +853,7 @@ static int http_request(const char *url, void *result, int target, int options)
 				 * but that is non-portable.  Using git_getpass() can at least be stubbed
 				 * on other platforms with a different implementation if/when necessary.
 				 */
-				user_name = xstrdup(git_getpass("Username: "));
+				user_name = xstrdup(git_getpass_one("Username", description));
 				init_curl_http_auth(slot->curl);
 				ret = HTTP_REAUTH;
 			}
diff --git a/t/t5550-http-fetch.sh b/t/t5550-http-fetch.sh
index ed4db09..d1ab4d0 100755
--- a/t/t5550-http-fetch.sh
+++ b/t/t5550-http-fetch.sh
@@ -51,8 +51,8 @@ test_expect_success 'setup askpass helpers' '
 	GIT_ASKPASS="$PWD/askpass" &&
 	export GIT_ASKPASS &&
 	>askpass-expect-none &&
-	echo "askpass: Password: " >askpass-expect-pass &&
-	{ echo "askpass: Username: " &&
+	echo "askpass: Password for '\''$HTTPD_DEST'\'': " >askpass-expect-pass &&
+	{ echo "askpass: Username for '\''$HTTPD_DEST'\'': " &&
 	  cat askpass-expect-pass
 	} >askpass-expect-both
 '
-- 
1.7.7.338.g0156b

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox