Git development
 help / color / mirror / Atom feed
* [PATCH] handle concurrent pruning of packed objects
@ 2006-05-30 15:56 Jeff King
  0 siblings, 0 replies; 8+ messages in thread
From: Jeff King @ 2006-05-30 15:56 UTC (permalink / raw)
  To: junkio; +Cc: git

This patch causes read_sha1_file and sha1_object_info to re-examine the
list of packs if an object cannot be found. It works by re-running
prepare_packed_git() after an object fails to be found.

It does not attempt to clean up the old pack list. Old packs which are in
use can continue to be used (until unused by lru selection).  New packs
are placed at the front of the list and will thus be examined before old
packs.

Signed-off-by: Jeff King <peff@peff.net>
---

I tested this by making a simple repo with three commits: a, b, and c.
I ran git diff-tree --stdin, and then did the following:
  1. fed 'a b' to diff-tree
  2. ran git repack -a -d
  3. fed 'b c' to diff-tree
Vanilla git complains about the lack of object, whereas this version
provides the correct diff-tree output.

 sha1_file.c |   24 ++++++++++++++++++------
 1 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/sha1_file.c b/sha1_file.c
index f77c189..696e53f 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -626,12 +626,12 @@ static void prepare_packed_git_one(char 
 	closedir(dir);
 }
 
+static int prepare_packed_git_run_once = 0;
 void prepare_packed_git(void)
 {
-	static int run_once = 0;
 	struct alternate_object_database *alt;
 
-	if (run_once)
+	if (prepare_packed_git_run_once)
 		return;
 	prepare_packed_git_one(get_object_directory(), 1);
 	prepare_alt_odb();
@@ -640,7 +640,13 @@ void prepare_packed_git(void)
 		prepare_packed_git_one(alt->base, 0);
 		alt->name[-1] = '/';
 	}
-	run_once = 1;
+	prepare_packed_git_run_once = 1;
+}
+
+static void reprepare_packed_git(void)
+{
+	prepare_packed_git_run_once = 0;
+	prepare_packed_git();
 }
 
 int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type)
@@ -1212,9 +1218,12 @@ int sha1_object_info(const unsigned char
 	if (!map) {
 		struct pack_entry e;
 
-		if (!find_pack_entry(sha1, &e))
-			return error("unable to find %s", sha1_to_hex(sha1));
-		return packed_object_info(&e, type, sizep);
+		if (find_pack_entry(sha1, &e))
+			return packed_object_info(&e, type, sizep);
+		reprepare_packed_git();
+		if (find_pack_entry(sha1, &e))
+			return packed_object_info(&e, type, sizep);
+		return error("unable to find %s", sha1_to_hex(sha1));
 	}
 	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
 		status = error("unable to unpack %s header",
@@ -1256,6 +1265,9 @@ void * read_sha1_file(const unsigned cha
 		munmap(map, mapsize);
 		return buf;
 	}
+	reprepare_packed_git();
+	if (find_pack_entry(sha1, &e))
+		return read_packed_sha1(sha1, type, size);
 	return NULL;
 }
 
-- 
1.3.3.g331f

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH] handle concurrent pruning of packed objects
@ 2006-06-02 15:32 Jeff King
  2006-06-02 15:53 ` Junio C Hamano
  0 siblings, 1 reply; 8+ messages in thread
From: Jeff King @ 2006-06-02 15:32 UTC (permalink / raw)
  To: git

This patch causes read_sha1_file and sha1_object_info to re-examine the
list of packs if an object cannot be found. It works by re-running
prepare_packed_git() after an object fails to be found.

It does not attempt to clean up the old pack list. Old packs which are in
use can continue to be used (until unused by lru selection).  New packs
are placed at the front of the list and will thus be examined before old
packs.

Signed-off-by: Jeff King <peff@peff.net>
---

This is a repost, since there was no response last time. Linus
indicated this approach was reasonable; see:
  <Pine.LNX.4.64.0605300752430.5623@g5.osdl.org>

I tested this by making a simple repo with three commits: a, b, and c.
I ran git diff-tree --stdin, and then did the following:
  1. fed 'a b' to diff-tree
  2. ran git repack -a -d
  3. fed 'b c' to diff-tree
Vanilla git complains about the lack of object, whereas this version
provides the correct diff-tree output.

 sha1_file.c |   24 ++++++++++++++++++------
 1 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/sha1_file.c b/sha1_file.c
index f77c189..696e53f 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -626,12 +626,12 @@ static void prepare_packed_git_one(char 
 	closedir(dir);
 }
 
+static int prepare_packed_git_run_once = 0;
 void prepare_packed_git(void)
 {
-	static int run_once = 0;
 	struct alternate_object_database *alt;
 
-	if (run_once)
+	if (prepare_packed_git_run_once)
 		return;
 	prepare_packed_git_one(get_object_directory(), 1);
 	prepare_alt_odb();
@@ -640,7 +640,13 @@ void prepare_packed_git(void)
 		prepare_packed_git_one(alt->base, 0);
 		alt->name[-1] = '/';
 	}
-	run_once = 1;
+	prepare_packed_git_run_once = 1;
+}
+
+static void reprepare_packed_git(void)
+{
+	prepare_packed_git_run_once = 0;
+	prepare_packed_git();
 }
 
 int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type)
@@ -1212,9 +1218,12 @@ int sha1_object_info(const unsigned char
 	if (!map) {
 		struct pack_entry e;
 
-		if (!find_pack_entry(sha1, &e))
-			return error("unable to find %s", sha1_to_hex(sha1));
-		return packed_object_info(&e, type, sizep);
+		if (find_pack_entry(sha1, &e))
+			return packed_object_info(&e, type, sizep);
+		reprepare_packed_git();
+		if (find_pack_entry(sha1, &e))
+			return packed_object_info(&e, type, sizep);
+		return error("unable to find %s", sha1_to_hex(sha1));
 	}
 	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
 		status = error("unable to unpack %s header",
@@ -1256,6 +1265,9 @@ void * read_sha1_file(const unsigned cha
 		munmap(map, mapsize);
 		return buf;
 	}
+	reprepare_packed_git();
+	if (find_pack_entry(sha1, &e))
+		return read_packed_sha1(sha1, type, size);
 	return NULL;
 }
 
-- 
1.3.3.g331f

-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

----- End forwarded message -----

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] handle concurrent pruning of packed objects
  2006-06-02 15:32 [PATCH] handle concurrent pruning of packed objects Jeff King
@ 2006-06-02 15:53 ` Junio C Hamano
  2006-06-02 16:03   ` Junio C Hamano
  2006-06-02 16:04   ` Jeff King
  0 siblings, 2 replies; 8+ messages in thread
From: Junio C Hamano @ 2006-06-02 15:53 UTC (permalink / raw)
  To: Jeff King; +Cc: git

Jeff King <peff@peff.net> writes:

> This is a repost, since there was no response last time. Linus
> indicated this approach was reasonable; see:
>   <Pine.LNX.4.64.0605300752430.5623@g5.osdl.org>

I haven't forgotten about it, but I have been sick.

I am uncertain about not re-examining the packs it originally
thought it had.  By prepending the new ones (and the same old
surviving ones) at the beginning you are effectively hiding the
old packs, which sounds reasonable in the usual case.

Also I suspect this might have funny interaction with the case
where there are hand-added packs (see how verify-pack does it).
We do not silently "fix" missing object problems we discover
there.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] handle concurrent pruning of packed objects
  2006-06-02 15:53 ` Junio C Hamano
@ 2006-06-02 16:03   ` Junio C Hamano
  2006-06-02 16:04   ` Jeff King
  1 sibling, 0 replies; 8+ messages in thread
From: Junio C Hamano @ 2006-06-02 16:03 UTC (permalink / raw)
  To: Jeff King; +Cc: git

Junio C Hamano <junkio@cox.net> writes:

> Also I suspect this might have funny interaction with the case
> where there are hand-added packs (see how verify-pack does it).
> We do not silently "fix" missing object problems we discover
> there.

The last sentence should read 'We do not want to silently...'.
Sorry.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] handle concurrent pruning of packed objects
  2006-06-02 15:53 ` Junio C Hamano
  2006-06-02 16:03   ` Junio C Hamano
@ 2006-06-02 16:04   ` Jeff King
  2006-06-02 16:10     ` Junio C Hamano
  1 sibling, 1 reply; 8+ messages in thread
From: Jeff King @ 2006-06-02 16:04 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Fri, Jun 02, 2006 at 08:53:52AM -0700, Junio C Hamano wrote:

> I am uncertain about not re-examining the packs it originally
> thought it had.  By prepending the new ones (and the same old
> surviving ones) at the beginning you are effectively hiding the
> old packs, which sounds reasonable in the usual case.

That shouldn't make a difference for correctness, even if the old packs
are still there. If you have an object in two packs, then it doesn't
matter which one you pull it from. The main impacts I can think of are:
  1. The old pack may already be mapped, and it would be more efficient
     to use it. However, the new pack will be mapped on first use, so it
     will be used from then on.
  2. The pack list can grow without bound. However, for this to matter,
     you'd have to do many prunes during the course of a single git
     command.

> Also I suspect this might have funny interaction with the case
> where there are hand-added packs (see how verify-pack does it).
> We do not silently "fix" missing object problems we discover
> there.

I will take a look at this.

-Peff

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] handle concurrent pruning of packed objects
  2006-06-02 16:04   ` Jeff King
@ 2006-06-02 16:10     ` Junio C Hamano
  2006-06-02 16:49       ` [PATCH] sha1_file: avoid re-preparing duplicate packs Jeff King
  0 siblings, 1 reply; 8+ messages in thread
From: Junio C Hamano @ 2006-06-02 16:10 UTC (permalink / raw)
  To: Jeff King; +Cc: git

Jeff King <peff@peff.net> writes:

> That shouldn't make a difference for correctness, even if the old packs
> are still there. If you have an object in two packs, then it doesn't
> matter which one you pull it from. The main impacts I can think of are:
>   1. The old pack may already be mapped, and it would be more efficient
>      to use it. However, the new pack will be mapped on first use, so it
>      will be used from then on.
>   2. The pack list can grow without bound. However, for this to matter,
>      you'd have to do many prunes during the course of a single git
>      command.

I agree 100% on "shouldn't" part.  What I wonder is if everybody
works correctly if we mmap the same file (all available .idx are
mapped all the time, and we map .pack LRU) twice.  But I realize
we have NO_MMAP configuration for unfortunate platforms to work
it around so that wouldn't be a big deal.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] sha1_file: avoid re-preparing duplicate packs
  2006-06-02 16:10     ` Junio C Hamano
@ 2006-06-02 16:49       ` Jeff King
  2006-06-02 17:47         ` Linus Torvalds
  0 siblings, 1 reply; 8+ messages in thread
From: Jeff King @ 2006-06-02 16:49 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

When adding packs, skip the pack if we already have it in the packed_git
list. This might happen if we are re-preparing our packs because of a
missing object.
---

On Fri, Jun 02, 2006 at 09:10:24AM -0700, Junio C Hamano wrote:

> I agree 100% on "shouldn't" part.  What I wonder is if everybody
> works correctly if we mmap the same file (all available .idx are

This patch avoids duplicates in the packed_git list. It's not necessary
under Linux, at least, but it just seems cleaner, and it's simple to do.
The list might still have packs that are now gone. I didn't want to
purge anything from the packed_git list since I'm not clear on whether
other code might have pointers into the mmap'd portion.

 sha1_file.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/sha1_file.c b/sha1_file.c
index 696e53f..aea0f40 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -617,6 +617,12 @@ static void prepare_packed_git_one(char 
 
 		/* we have .idx.  Is it a file we can map? */
 		strcpy(path + len, de->d_name);
+		for (p = packed_git; p; p = p->next) {
+			if (!memcmp(path, p->pack_name, len + namelen - 4))
+				break;
+		}
+		if (p)
+			continue;
 		p = add_packed_git(path, len + namelen, local);
 		if (!p)
 			continue;
-- 
1.3.3.gfb825

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] sha1_file: avoid re-preparing duplicate packs
  2006-06-02 16:49       ` [PATCH] sha1_file: avoid re-preparing duplicate packs Jeff King
@ 2006-06-02 17:47         ` Linus Torvalds
  0 siblings, 0 replies; 8+ messages in thread
From: Linus Torvalds @ 2006-06-02 17:47 UTC (permalink / raw)
  To: Jeff King; +Cc: Junio C Hamano, git



On Fri, 2 Jun 2006, Jeff King wrote:
> 
> This patch avoids duplicates in the packed_git list. It's not necessary
> under Linux, at least, but it just seems cleaner, and it's simple to do.

I think it's a good idea even under Linux, since on 32-bit machines, one 
of the constraints we have may simply be virtual memory space to map 
things into. With big packs, avoiding mapping them twice is a good idea.

		Linus

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2006-06-02 17:47 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-06-02 15:32 [PATCH] handle concurrent pruning of packed objects Jeff King
2006-06-02 15:53 ` Junio C Hamano
2006-06-02 16:03   ` Junio C Hamano
2006-06-02 16:04   ` Jeff King
2006-06-02 16:10     ` Junio C Hamano
2006-06-02 16:49       ` [PATCH] sha1_file: avoid re-preparing duplicate packs Jeff King
2006-06-02 17:47         ` Linus Torvalds
  -- strict thread matches above, loose matches on Subject: below --
2006-05-30 15:56 [PATCH] handle concurrent pruning of packed objects Jeff King

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox