Git development

Git development
 help / color / mirror / Atom feed

* [PATCH 2/6] Read the base offset or name of delta objects
From: Hervé Cauwelier @ 2009-10-03 18:09 UTC (permalink / raw)
  To: git; +Cc: Hervé Cauwelier
In-Reply-To: <1254593401-18801-2-git-send-email-herve@itaapy.com>

Signed-off-by: Hervé Cauwelier <herve@itaapy.com>
---
 src/cc-compat.h |    3 +++
 src/odb.c       |   28 ++++++++++++++++++++++++++--
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/src/cc-compat.h b/src/cc-compat.h
index 8997caa..8dd6774 100644
--- a/src/cc-compat.h
+++ b/src/cc-compat.h
@@ -30,6 +30,9 @@
 # define GIT_TYPEOF(x)
 #endif
 
+#define bitsizeof(x)  (CHAR_BIT * sizeof(x))
+#define MSB(x, bits) ((x) & GIT_TYPEOF(x)(~0ULL << (bitsizeof(x) - (bits))))
+
 /*
  * Does our compiler/platform support the C99 <inttypes.h> and
  * <stdint.h> header files. (C99 requires that <inttypes.h>
diff --git a/src/odb.c b/src/odb.c
index a562a19..3cfc932 100644
--- a/src/odb.c
+++ b/src/odb.c
@@ -97,8 +97,10 @@ struct git_odb {
 };
 
 typedef struct {  /* object header data */
-	git_otype type;  /* object type */
-	size_t    size;  /* object size */
+	git_otype type;     /* object type */
+	size_t    size;     /* object size */
+	off_t base_offset;  /* delta base offset (GIT_OBJ_OFS_DELTA) */
+	git_oid base_name;  /* delta base name (GIT_OBJ_REF_DELTA) */
 } obj_hdr;
 
 static struct {
@@ -238,6 +240,7 @@ static size_t get_binary_object_header(obj_hdr *hdr, gitfo_buf *obj)
 	unsigned char c;
 	unsigned char *data = obj->data;
 	size_t shift, size, used = 0;
+	off_t base_offset;
 
 	if (obj->len == 0)
 		return 0;
@@ -258,6 +261,27 @@ static size_t get_binary_object_header(obj_hdr *hdr, gitfo_buf *obj)
 	}
 	hdr->size = size;
 
+	hdr->base_offset = 0;
+	hdr->base_name.id[0] = '\0';
+
+	if (hdr->type == GIT_OBJ_OFS_DELTA) {
+		c = data[used++];
+		base_offset = c & 127;
+		while (c & 128) {
+			base_offset++;
+			if (!base_offset || MSB(base_offset, 7))
+				return 0;  /* overflow */
+			c = data[used++];
+			base_offset = (base_offset << 7) + (c & 127);
+		}
+		assert(base_offset > 0);
+		hdr->base_offset = base_offset;
+	}
+	else if (hdr->type == GIT_OBJ_REF_DELTA) {
+		git_oid_mkraw(&hdr->base_name, data + used);
+		used += 20;
+	}
+
 	return used;
 }
 
-- 
1.6.4.4

^ permalink raw reply related

* [PATCH 1/6] Open the pack file and keep a map on it.
From: Hervé Cauwelier @ 2009-10-03 18:09 UTC (permalink / raw)
  To: git; +Cc: Hervé Cauwelier
In-Reply-To: <1254593401-18801-1-git-send-email-herve@itaapy.com>

On the same model than the idx file.

Signed-off-by: Hervé Cauwelier <herve@itaapy.com>
---
 src/odb.c |   67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/odb.h |    5 ++-
 2 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/src/odb.c b/src/odb.c
index 6d646a4..a562a19 100644
--- a/src/odb.c
+++ b/src/odb.c
@@ -64,6 +64,10 @@ struct git_pack {
 
 	/** Name of the pack file(s), without extension ("pack-abc"). */
 	char pack_name[GIT_PACK_NAME_MAX];
+
+	/** The .pack file, mapped into memory. */
+	git_file pack_fd;
+	git_map pack_map;
 };
 typedef struct git_pack git_pack;
 
@@ -782,7 +786,7 @@ static int pack_openidx(git_pack *p)
 			goto invalid_fail;
 		data = p->idx_map.data;
 
-		if (decode32(&data[0]) == PACK_TOC) {
+		if (decode32(&data[0]) == IDX_TOC) {
 			switch (decode32(&data[1])) {
 			case 2:
 				if (pack_openidx_v2(p))
@@ -809,6 +813,59 @@ unlock_fail:
 	return GIT_ERROR;
 }
 
+static int pack_openpack_map(git_pack *p)
+{
+	char pb[GIT_PATH_MAX];
+	off_t len;
+
+	if (git__fmt(pb, sizeof(pb), "%s/pack/%s.pack",
+			p->db->objects_dir,
+			p->pack_name) < 0)
+		return GIT_ERROR;
+
+	if ((p->pack_fd = gitfo_open(pb, O_RDONLY)) < 0)
+		return GIT_ERROR;
+
+	if ((len = gitfo_size(p->pack_fd)) < 0
+			|| !git__is_sizet(len)
+			|| gitfo_map_ro(&p->pack_map, p->pack_fd, 0, (size_t)len)) {
+		gitfo_close(p->pack_fd);
+		return GIT_ERROR;
+	}
+
+	return GIT_SUCCESS;
+}
+
+static int pack_openpack(git_pack *p)
+{
+	gitlck_lock(&p->lock);
+	if (p->invalid)
+		goto unlock_fail;
+	if (p->pack_fd < 0) {
+		uint32_t *data;
+
+		if (pack_openpack_map(p))
+			goto invalid_fail;
+		data = p->pack_map.data;
+
+		if (decode32(&data[0]) != PACK_TOC)
+			goto unmap_fail;
+	}
+	gitlck_unlock(&p->lock);
+	return GIT_SUCCESS;
+
+unmap_fail:
+	gitfo_free_map(&p->pack_map);
+
+invalid_fail:
+	p->invalid = 1;
+	p->pack_fd = -1;
+
+unlock_fail:
+	gitlck_unlock(&p->lock);
+	return GIT_ERROR;
+}
+
 static void pack_decidx(git_pack *p)
 {
 	gitlck_lock(&p->lock);
@@ -830,6 +887,11 @@ static void pack_dec(git_pack *p)
 			gitfo_close(p->idx_fd);
 			free(p->im_fanout);
 		}
+		if (p->pack_fd >= 0) {
+			gitfo_free_map(&p->pack_map);
+			gitfo_close(p->pack_fd);
+			p->pack_fd = -1;
+		}
 
 		gitlck_free(&p->lock);
 		free(p);
@@ -861,6 +923,7 @@ static git_pack *alloc_pack(const char *pack_name)
 	gitlck_init(&p->lock);
 	strcpy(p->pack_name, pack_name);
 	p->refcnt = 1;
+	p->pack_fd = -1;
 	return p;
 }
 
@@ -895,7 +958,7 @@ static int scan_one_pack(void *state, char *name)
 
 	r->next = *ret;
 	*ret = r;
-	return 0;
+	return GIT_SUCCESS;
 }
 
 static git_packlist* scan_packs(git_odb *db)
diff --git a/src/odb.h b/src/odb.h
index 2f205b2..0311d78 100644
--- a/src/odb.h
+++ b/src/odb.h
@@ -11,9 +11,10 @@
  *   uint32_t *fanout = ... the file data at offset 0 ...
  *   ntohl(fanout[0]) < ntohl(fanout[1])
  *
- * The value chosen here for PACK_TOC is such that the above
+ * The value chosen here for IDX_TOC is such that the above
  * cannot be true for an idx v1 file.
  */
-#define PACK_TOC 0xff744f63 /* -1tOc */
+#define IDX_TOC 0xff744f63 /* -1tOc */
+#define PACK_TOC 0x5041434b /* PACK */
 
 #endif
-- 
1.6.4.4

^ permalink raw reply related

* Patches for libgit2
From: Hervé Cauwelier @ 2009-10-03 18:09 UTC (permalink / raw)
  To: git

Hi,

Please find patches for libgit2. They allowed me to read the history of a
repository with only 75 patches but hacked by an history shortening.

Commits were both in a 5 GB pack file and loose objects. So I had to add
support for pack files. Hopefully, reading idx files was already there so I
followed the same model.

Patches can also be cloned at "git://git.hforge.org/libgit2.git".

There is a Python wrapper on the next side, forking from the existing
libgit2-python [1]. They bring the same functionnalities but the patches are
not slick yet.

[1] http://code.istique.net/gitweb/?p=libgit2-python.git;a=summary

One thing that would miss is unit tests, but I lack experience on testing C
code, and it would require having a small but exhaustive pack file with
different types of object, including ofs and ref deltas. Help on this part is
welcome.

Next step would be decoding raw objects to commits, trees, blobs and tags.
There is already the start of a commit structure. I'll be starting from that.

Regards,

Hervé

^ permalink raw reply

* [PATCH] Reserve a slot for argv[0] in default_arg.
From: Petter Urkedal @ 2009-10-03 13:29 UTC (permalink / raw)
  To: git; +Cc: Petter Urkedal

Setting "av" to one slot before the allocated "default_arg" array causes
glibc abort with "free(): invalid next size (normal)" in some
configurations (Gentoo, glibc-2.9_p20081201-r2, gcc-5.3.2 with PIE).
---
 builtin-show-branch.c |    7 +++++--
 1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/builtin-show-branch.c b/builtin-show-branch.c
index 3510a86..3ab72b7 100644
--- a/builtin-show-branch.c
+++ b/builtin-show-branch.c
@@ -568,6 +568,9 @@ static int git_show_branch_config(const char *var, const char *value, void *cb)
 		if (default_alloc <= default_num + 1) {
 			default_alloc = default_alloc * 3 / 2 + 20;
 			default_arg = xrealloc(default_arg, sizeof *default_arg * default_alloc);
+			if (!default_num)
+			    /* One unused position for argv[0]. */
+			    default_arg[default_num++] = NULL;
 		}
 		default_arg[default_num++] = xstrdup(value);
 		default_arg[default_num] = NULL;
@@ -692,8 +695,8 @@ int cmd_show_branch(int ac, const char **av, const char *prefix)
 
 	/* If nothing is specified, try the default first */
 	if (ac == 1 && default_num) {
-		ac = default_num + 1;
-		av = default_arg - 1; /* ick; we would not address av[0] */
+		ac = default_num;
+		av = default_arg;
 	}
 
 	ac = parse_options(ac, av, prefix, builtin_show_branch_options,
-- 
1.6.4.4

^ permalink raw reply related

* Re: [PATCH] Fix '--relative-date'
From: Johan Sageryd @ 2009-10-03 11:18 UTC (permalink / raw)
  To: Jeff King; +Cc: git
In-Reply-To: <20091003100615.GC17873@coredump.intra.peff.net>

> I've picked up your patch in my tree with the following test squashed
> in:
> 
> diff --git a/t/t0006-date.sh b/t/t0006-date.sh
> index a4d8fa8..75b02af 100755
> --- a/t/t0006-date.sh
> +++ b/t/t0006-date.sh
> @@ -24,6 +24,7 @@ check_show 13000000 '5 months ago'
>  check_show 37500000 '1 year, 2 months ago'
>  check_show 55188000 '1 year, 9 months ago'
>  check_show 630000000 '20 years ago'
> +check_show 31449600 '12 months ago'
>  
>  check_parse() {
>  	echo "$1 -> $2" >expect

Thank you!

/Johan

^ permalink raw reply

* Automatic merge of local modifications in submodules?
From: Josef Wolf @ 2009-10-03 11:30 UTC (permalink / raw)
  To: git

Hello,

I am trying to explore gti's submodule functionality. Since I'm used to
svn:externals, I am missing the automatic merge of local modifications.
So I'd like to do something like:

  (cd submodule; git stash)
  git submodule update
  (cd submodule; git stash pop)

The problem with this command sequence is that it is not atomic. So if it
is automatically executed from a script (e.g cron or post-update-hook), it
will cause damage.

Any ideas how to properly auto-merge local modifications on submodule
update? git-submodule don't seem to have an option to do that.

^ permalink raw reply

* Re: [PATCH] Fix '--relative-date'
From: Jeff King @ 2009-10-03 10:06 UTC (permalink / raw)
  To: Johan Sageryd; +Cc: git
In-Reply-To: <1254543618-3772-1-git-send-email-j416@1616.se>

On Sat, Oct 03, 2009 at 01:20:18PM +0900, Johan Sageryd wrote:

> This fixes '--relative-date' so that it does not give '0 year, 12
> months', for the interval 360 <= diff < 365.

Thanks. I think this was a regression introduced recently when we fixed
the rounding on how years are printed (it used to just say "1 year"
because we were close, but now we always round down, so our boundary for
"1 year" must match exactly).

I've picked up your patch in my tree with the following test squashed
in:

diff --git a/t/t0006-date.sh b/t/t0006-date.sh
index a4d8fa8..75b02af 100755
--- a/t/t0006-date.sh
+++ b/t/t0006-date.sh
@@ -24,6 +24,7 @@ check_show 13000000 '5 months ago'
 check_show 37500000 '1 year, 2 months ago'
 check_show 55188000 '1 year, 9 months ago'
 check_show 630000000 '20 years ago'
+check_show 31449600 '12 months ago'
 
 check_parse() {
 	echo "$1 -> $2" >expect

-Peff

^ permalink raw reply related

* Re: [PATCH/RFC 5/7] imap-send: provide fall-back random-source
From: Jeff King @ 2009-10-03  9:58 UTC (permalink / raw)
  To: Erik Faye-Lund; +Cc: msysgit, git
In-Reply-To: <1254530385-2824-5-git-send-email-kusmabite@gmail.com>

On Sat, Oct 03, 2009 at 12:39:43AM +0000, Erik Faye-Lund wrote:

> Since some systems (at least Windows) does not have
> /dev/random nor friends, we need another random-source.
> 
> This patch uses the C-runtime's rand()-function as a
> poor-mans random-source.

Hmm. It looks like this arc4 RNG is used just for generating a unique
"X-TUID" header. Which seems to be used in isync (from which imap-send
is derived) to be to avoid duplicates in synchronization. But imap-send
doesn't actually use it for anything, as it just blindly pushes the
messages.

In other words, should all of this TUID code (and the arc4 code) simply
be ripped out?

-Peff

^ permalink raw reply

* Re: [PATCH/RFC 1/7] imap-send: use separate read and write fds
From: Jeff King @ 2009-10-03  9:40 UTC (permalink / raw)
  To: Erik Faye-Lund; +Cc: msysgit, git
In-Reply-To: <1254530385-2824-1-git-send-email-kusmabite@gmail.com>

On Sat, Oct 03, 2009 at 12:39:39AM +0000, Erik Faye-Lund wrote:

> Signed-off-by: Erik Faye-Lund <kusmabite@gmail.com>

Why? Given its presence in this series, I can only assume it has to do
with windows portability, but it would be helpful to give a little bit
of the reasoning in the commit message.

-Peff

^ permalink raw reply

* Re: [PATCH 2/2] allow mangling short options which take integer arguments
From: Clemens Buchacher @ 2009-10-03  9:23 UTC (permalink / raw)
  To: Jeff King; +Cc: Johannes Schindelin, Shawn O. Pearce, git
In-Reply-To: <20091002075012.GB27664@coredump.intra.peff.net>

On Fri, Oct 02, 2009 at 03:50:12AM -0400, Jeff King wrote:

> On Fri, Oct 02, 2009 at 09:43:17AM +0200, Clemens Buchacher wrote:
> 
> > On Thu, Oct 01, 2009 at 11:55:03PM +0200, Johannes Schindelin wrote:
> > 
> > > And this patch looks even more straight-forward than 1/2, _but_... what 
> > > about cases where there are short options that are digits?
> > 
> > Could you point me to one of those? I did not find any during my
> > non-exhaustive search. We should be able to handle them easily by adding
> > PARSE_OPT_MANY.
> 
> The one that comes readily to mind is "git log -1", but that is actually
> parsed by the revision options parser, which doesn't use parseopt. But
> there are a few done by parseopt:
> 
>   $ git grep "OPT_.*'[0-9]'"
>   archive.c:              OPT__COMPR('1', &compression_level, "compress faster", 1),
>   archive.c:              OPT__COMPR_HIDDEN('2', &compression_level, 2),
>   archive.c:              OPT__COMPR_HIDDEN('3', &compression_level, 3),
>   archive.c:              OPT__COMPR_HIDDEN('4', &compression_level, 4),
>   archive.c:              OPT__COMPR_HIDDEN('5', &compression_level, 5),
>   archive.c:              OPT__COMPR_HIDDEN('6', &compression_level, 6),
>   archive.c:              OPT__COMPR_HIDDEN('7', &compression_level, 7),
>   archive.c:              OPT__COMPR_HIDDEN('8', &compression_level, 8),
>   archive.c:              OPT__COMPR('9', &compression_level, "compress better", 9),
>   builtin-checkout.c:             OPT_SET_INT('2', "ours", &opts.writeout_stage, "stage",
>   builtin-checkout.c:             OPT_SET_INT('3', "theirs", &opts.writeout_stage, "stage",

Those are not affected by this patch series. They can be cuddled with other
short options just like before, since they don't take arguments.

^ permalink raw reply

* [PATCH] Fix '--relative-date'
From: Johan Sageryd @ 2009-10-03  4:20 UTC (permalink / raw)
  To: git; +Cc: Johan Sageryd

This fixes '--relative-date' so that it does not give '0 year, 12 months', for the interval 360 <= diff < 365.

Signed-off-by: Johan Sageryd <j416@1616.se>
---
 date.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/date.c b/date.c
index e9ee4aa..5d05ef6 100644
--- a/date.c
+++ b/date.c
@@ -123,7 +123,7 @@ const char *show_date_relative(unsigned long time, int tz,
 		return timebuf;
 	}
 	/* Say months for the past 12 months or so */
-	if (diff < 360) {
+	if (diff < 365) {
 		snprintf(timebuf, timebuf_size, "%lu months ago", (diff + 15) / 30);
 		return timebuf;
 	}
-- 
1.6.4.4

^ permalink raw reply related

* [PATCH/RFC 7/7] mingw: enable OpenSSL
From: Erik Faye-Lund @ 2009-10-03  0:39 UTC (permalink / raw)
  To: msysgit; +Cc: git
In-Reply-To: <1254530385-2824-6-git-send-email-kusmabite@gmail.com>

Since we have OpenSSL in msysgit now, enable it to support SSL
encryption for imap-send.

Signed-off-by: Erik Faye-Lund <kusmabite@gmail.com>
---
 Makefile |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/Makefile b/Makefile
index 8ba789a..8818f0f 100644
--- a/Makefile
+++ b/Makefile
@@ -933,7 +933,6 @@ else
 ifneq (,$(findstring MINGW,$(uname_S)))
 	pathsep = ;
 	NO_PREAD = YesPlease
-	NO_OPENSSL = YesPlease
 	NO_LIBGEN_H = YesPlease
 	NO_SYMLINK_HEAD = YesPlease
 	NO_IPV6 = YesPlease
-- 
1.6.5.rc2.7.g4f8d3

^ permalink raw reply related

* [PATCH/RFC 6/7] mingw: wrap SSL_set_(w|r)fd to call _get_osfhandle
From: Erik Faye-Lund @ 2009-10-03  0:39 UTC (permalink / raw)
  To: msysgit; +Cc: git
In-Reply-To: <1254530385-2824-5-git-send-email-kusmabite@gmail.com>

SSL_set_fd (and friends) expects a OS file handle on Windows, not
a file descriptor as on UNIX(-ish).

This patch makes the Windows version of SSL_set_fd behave like the
UNIX versions, by calling _get_osfhandle on it's input.

Signed-off-by: Erik Faye-Lund <kusmabite@gmail.com>
---
 compat/mingw.h |   21 +++++++++++++++++++++
 1 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/compat/mingw.h b/compat/mingw.h
index 5b5258b..6907345 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -124,6 +124,27 @@ static inline int waitpid(pid_t pid, int *status, unsigned options)
 	return -1;
 }
 
+#ifndef NO_OPENSSL
+#include <openssl/ssl.h>
+static inline int mingw_SSL_set_fd(SSL *ssl, int fd)
+{
+	return SSL_set_fd(ssl, _get_osfhandle(fd));
+}
+#define SSL_set_fd mingw_SSL_set_fd
+
+static inline int mingw_SSL_set_rfd(SSL *ssl, int fd)
+{
+	return SSL_set_rfd(ssl, _get_osfhandle(fd));
+}
+#define SSL_set_rfd mingw_SSL_set_rfd
+
+static inline int mingw_SSL_set_wfd(SSL *ssl, int fd)
+{
+	return SSL_set_wfd(ssl, _get_osfhandle(fd));
+}
+#define SSL_set_wfd mingw_SSL_set_wfd
+#endif
+
 /*
  * implementations of missing functions
  */
-- 
1.6.5.rc2.7.g4f8d3

^ permalink raw reply related

* [PATCH/RFC 5/7] imap-send: provide fall-back random-source
From: Erik Faye-Lund @ 2009-10-03  0:39 UTC (permalink / raw)
  To: msysgit; +Cc: git
In-Reply-To: <1254530385-2824-4-git-send-email-kusmabite@gmail.com>

Since some systems (at least Windows) does not have
/dev/random nor friends, we need another random-source.

This patch uses the C-runtime's rand()-function as a
poor-mans random-source.

Signed-off-by: Erik Faye-Lund <kusmabite@gmail.com>
---
 imap-send.c |   17 ++++++++++-------
 1 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/imap-send.c b/imap-send.c
index 8338717..dda7b7f 100644
--- a/imap-send.c
+++ b/imap-send.c
@@ -511,14 +511,17 @@ static void arc4_init(void)
 	unsigned char j, si, dat[128];
 
 	if ((fd = open("/dev/urandom", O_RDONLY)) < 0 && (fd = open("/dev/random", O_RDONLY)) < 0) {
-		fprintf(stderr, "Fatal: no random number source available.\n");
-		exit(3);
-	}
-	if (read_in_full(fd, dat, 128) != 128) {
-		fprintf(stderr, "Fatal: cannot read random number source.\n");
-		exit(3);
+		/* poor-mans random-source */
+		srand(clock());
+		for (i = 0; i < 128; ++i)
+			dat[i] = rand() & 0xFF;
+	} else {
+		if (read_in_full(fd, dat, 128) != 128) {
+			fprintf(stderr, "Fatal: cannot read random number source.\n");
+			exit(3);
+		}
+		close(fd);
 	}
-	close(fd);
 
 	for (i = 0; i < 256; i++)
 		rs.s[i] = i;
-- 
1.6.5.rc2.7.g4f8d3

^ permalink raw reply related

* [PATCH/RFC 4/7] imap-send: build imap-send on Windows
From: Erik Faye-Lund @ 2009-10-03  0:39 UTC (permalink / raw)
  To: msysgit; +Cc: git
In-Reply-To: <1254530385-2824-3-git-send-email-kusmabite@gmail.com>

Since the POSIX-specific tunneling code has been replaced
by the run-command API (and a compile-error has been
cleaned away), we can now enable imap-send on Windows
builds.

Signed-off-by: Erik Faye-Lund <kusmabite@gmail.com>
---
 Makefile |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/Makefile b/Makefile
index 12defd4..8ba789a 100644
--- a/Makefile
+++ b/Makefile
@@ -361,6 +361,7 @@ PROGRAMS += git-show-index$X
 PROGRAMS += git-unpack-file$X
 PROGRAMS += git-upload-pack$X
 PROGRAMS += git-var$X
+PROGRAMS += git-imap-send$X
 
 # List built-in command $C whose implementation cmd_$C() is not in
 # builtin-$C.o but is linked in as part of some other command.
@@ -1056,7 +1057,6 @@ EXTLIBS += -lz
 
 ifndef NO_POSIX_ONLY_PROGRAMS
 	PROGRAMS += git-daemon$X
-	PROGRAMS += git-imap-send$X
 endif
 ifndef NO_OPENSSL
 	OPENSSL_LIBSSL = -lssl
-- 
1.6.5.rc2.7.g4f8d3

^ permalink raw reply related

* [PATCH/RFC 3/7] imap-send: fix compilation-error on Windows
From: Erik Faye-Lund @ 2009-10-03  0:39 UTC (permalink / raw)
  To: msysgit; +Cc: git
In-Reply-To: <1254530385-2824-2-git-send-email-kusmabite@gmail.com>

mmsystem.h (included from windows.h) defines DRV_OK to 1. To avoid
an error due to DRV_OK redefenition, this patch undefines the old
definition (i.e the one from mmsystem.h) before defining DRV_OK.

Signed-off-by: Erik Faye-Lund <kusmabite@gmail.com>
---
 imap-send.c |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/imap-send.c b/imap-send.c
index 55a663a..8338717 100644
--- a/imap-send.c
+++ b/imap-send.c
@@ -94,6 +94,10 @@ struct msg_data {
 	unsigned int crlf:1;
 };
 
+#ifdef DRV_OK
+#undef DRV_OK
+#endif
+
 #define DRV_OK          0
 #define DRV_MSG_BAD     -1
 #define DRV_BOX_BAD     -2
-- 
1.6.5.rc2.7.g4f8d3

^ permalink raw reply related

* [PATCH/RFC 2/7] imap-send: use run-command API for tunneling
From: Erik Faye-Lund @ 2009-10-03  0:39 UTC (permalink / raw)
  To: msysgit; +Cc: git
In-Reply-To: <1254530385-2824-1-git-send-email-kusmabite@gmail.com>

Signed-off-by: Erik Faye-Lund <kusmabite@gmail.com>
---
 imap-send.c |   37 ++++++++++++++++---------------------
 1 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/imap-send.c b/imap-send.c
index ba50961..55a663a 100644
--- a/imap-send.c
+++ b/imap-send.c
@@ -24,6 +24,7 @@
 
 #include "cache.h"
 #include "exec_cmd.h"
+#include "run-command.h"
 #ifdef NO_OPENSSL
 typedef void *SSL;
 #endif
@@ -989,8 +990,7 @@ static struct store *imap_open_store(struct imap_server_conf *srvc)
 	struct imap_store *ctx;
 	struct imap *imap;
 	char *arg, *rsp;
-	int s = -1, a[2], preauth;
-	pid_t pid;
+	int s = -1, preauth;
 
 	ctx = xcalloc(sizeof(*ctx), 1);
 
@@ -1001,29 +1001,24 @@ static struct store *imap_open_store(struct imap_server_conf *srvc)
 	/* open connection to IMAP server */
 
 	if (srvc->tunnel) {
-		imap_info("Starting tunnel '%s'... ", srvc->tunnel);
+		const char *argv[4];
+		struct child_process tunnel = {0};
 
-		if (socketpair(PF_UNIX, SOCK_STREAM, 0, a)) {
-			perror("socketpair");
-			exit(1);
-		}
+		imap_info("Starting tunnel '%s'... ", srvc->tunnel);
 
-		pid = fork();
-		if (pid < 0)
-			_exit(127);
-		if (!pid) {
-			if (dup2(a[0], 0) == -1 || dup2(a[0], 1) == -1)
-				_exit(127);
-			close(a[0]);
-			close(a[1]);
-			execl("/bin/sh", "sh", "-c", srvc->tunnel, NULL);
-			_exit(127);
-		}
+		argv[0] = "/bin/sh";
+		argv[1] = "-c";
+		argv[2] = srvc->tunnel;
+		argv[3] = NULL;
 
-		close(a[0]);
+		tunnel.argv = argv;
+		tunnel.in = -1;
+		tunnel.out = -1;
+		if (start_command(&tunnel))
+			die("cannot start proxy %s", argv[0]);
 
-		imap->buf.sock.fd[0] = a[1];
-		imap->buf.sock.fd[1] = dup(a[1]);
+		imap->buf.sock.fd[0] = tunnel.out;
+		imap->buf.sock.fd[1] = tunnel.in;
 
 		imap_info("ok\n");
 	} else {
-- 
1.6.5.rc2.7.g4f8d3

^ permalink raw reply related

* [PATCH/RFC 1/7] imap-send: use separate read and write fds
From: Erik Faye-Lund @ 2009-10-03  0:39 UTC (permalink / raw)
  To: msysgit; +Cc: git

Signed-off-by: Erik Faye-Lund <kusmabite@gmail.com>
---
 imap-send.c |   37 +++++++++++++++++++++++--------------
 1 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/imap-send.c b/imap-send.c
index 3847fd1..ba50961 100644
--- a/imap-send.c
+++ b/imap-send.c
@@ -154,7 +154,7 @@ struct imap_list {
 };
 
 struct imap_socket {
-	int fd;
+	int fd[2];
 	SSL *ssl;
 };
 
@@ -304,8 +304,12 @@ static int ssl_socket_connect(struct imap_socket *sock, int use_tls_only, int ve
 		ssl_socket_perror("SSL_new");
 		return -1;
 	}
-	if (!SSL_set_fd(sock->ssl, sock->fd)) {
-		ssl_socket_perror("SSL_set_fd");
+	if (!SSL_set_rfd(sock->ssl, sock->fd[0])) {
+		ssl_socket_perror("SSL_set_rfd");
+		return -1;
+	}
+	if (!SSL_set_wfd(sock->ssl, sock->fd[1])) {
+		ssl_socket_perror("SSL_set_wfd");
 		return -1;
 	}
 
@@ -327,11 +331,12 @@ static int socket_read(struct imap_socket *sock, char *buf, int len)
 		n = SSL_read(sock->ssl, buf, len);
 	else
 #endif
-		n = xread(sock->fd, buf, len);
+		n = xread(sock->fd[0], buf, len);
 	if (n <= 0) {
 		socket_perror("read", sock, n);
-		close(sock->fd);
-		sock->fd = -1;
+		close(sock->fd[0]);
+		close(sock->fd[1]);
+		sock->fd[0] = sock->fd[1] = -1;
 	}
 	return n;
 }
@@ -344,11 +349,12 @@ static int socket_write(struct imap_socket *sock, const char *buf, int len)
 		n = SSL_write(sock->ssl, buf, len);
 	else
 #endif
-		n = write_in_full(sock->fd, buf, len);
+		n = write_in_full(sock->fd[1], buf, len);
 	if (n != len) {
 		socket_perror("write", sock, n);
-		close(sock->fd);
-		sock->fd = -1;
+		close(sock->fd[0]);
+		close(sock->fd[1]);
+		sock->fd[0] = sock->fd[1] = -1;
 	}
 	return n;
 }
@@ -361,7 +367,8 @@ static void socket_shutdown(struct imap_socket *sock)
 		SSL_free(sock->ssl);
 	}
 #endif
-	close(sock->fd);
+	close(sock->fd[0]);
+	close(sock->fd[1]);
 }
 
 /* simple line buffering */
@@ -960,7 +967,7 @@ static void imap_close_server(struct imap_store *ictx)
 {
 	struct imap *imap = ictx->imap;
 
-	if (imap->buf.sock.fd != -1) {
+	if (imap->buf.sock.fd[0] != -1) {
 		imap_exec(ictx, NULL, "LOGOUT");
 		socket_shutdown(&imap->buf.sock);
 	}
@@ -988,7 +995,7 @@ static struct store *imap_open_store(struct imap_server_conf *srvc)
 	ctx = xcalloc(sizeof(*ctx), 1);
 
 	ctx->imap = imap = xcalloc(sizeof(*imap), 1);
-	imap->buf.sock.fd = -1;
+	imap->buf.sock.fd[0] = imap->buf.sock.fd[1] = -1;
 	imap->in_progress_append = &imap->in_progress;
 
 	/* open connection to IMAP server */
@@ -1015,7 +1022,8 @@ static struct store *imap_open_store(struct imap_server_conf *srvc)
 
 		close(a[0]);
 
-		imap->buf.sock.fd = a[1];
+		imap->buf.sock.fd[0] = a[1];
+		imap->buf.sock.fd[1] = dup(a[1]);
 
 		imap_info("ok\n");
 	} else {
@@ -1092,7 +1100,8 @@ static struct store *imap_open_store(struct imap_server_conf *srvc)
 			goto bail;
 		}
 
-		imap->buf.sock.fd = s;
+		imap->buf.sock.fd[0] = s;
+		imap->buf.sock.fd[1] = dup(s);
 
 		if (srvc->use_ssl &&
 		    ssl_socket_connect(&imap->buf.sock, 0, srvc->ssl_verify)) {
-- 
1.6.5.rc2.7.g4f8d3

^ permalink raw reply related

* Re: [PATCH] MSVC: fix build warnings
From: Michael Wookey @ 2009-10-02 23:28 UTC (permalink / raw)
  To: Junio C Hamano, git
In-Reply-To: <7v7hvd4flb.fsf@alter.siamese.dyndns.org>

2009/10/3 Junio C Hamano <gitster@pobox.com>:
> Michael Wookey <michaelwookey@gmail.com> writes:
>
>> diff --git a/builtin-branch.c b/builtin-branch.c
>> index 9f57992..cf6a9ca 100644
>> --- a/builtin-branch.c
>> +++ b/builtin-branch.c
>> @@ -93,7 +93,7 @@ static const char *branch_get_color(enum color_branch ix)
>>
>>  static int delete_branches(int argc, const char **argv, int force, int kinds)
>>  {
>> -     struct commit *rev, *head_rev = head_rev;
>
> I haven't tried, but the patch may break build with "gcc -Werror".
>
> This is a common and unfortunate idiom to tell the readers of the code
> that this initialization is unnecessary, gcc is not clever enough to
> notice and gives warnings, and we are squelching it, knowing what we are
> doing.

I can't build with -Werror on Ubuntu 9.04 (gcc 4.3.3) because of the following:

  http://article.gmane.org/gmane.comp.version-control.git/127477

With the current git.rc2, I also get the following warnings:

  builtin-mailinfo.c: In function 'handle_commit_msg':
  builtin-mailinfo.c:789: warning: ignoring return value of
'ftruncate', declared with attribute warn_unused_result

It would be nice to get those warnings removed.

I just tried my patch with gcc 4.2.1 (Mac OSX 10.6) and there are a
few warnings that are generated because some of the variables have had
their initial values removed. I can send a V2 if you like, however
these variable were initialised that way for a reason and it might not
be sensible to clean them up in the way I was proposing.

What would be a good method of fixing these warnings now that we have
the ability to compile with MSVC? Explicitly initialising the
variables (to something sane) or should we start to introduce compiler
specific pragmas (ugly...) that aim to clean the various build
warnings? I just want to reduce (and eventually remove) all the build
noise when building using MSVC.

From what I have seen so far, building with MSVC spews out a lot of
warnings. I am building with MSVC in both the IDE and from a build
console via:

    devenv git.sln /useenv /build "Debug|Win32"

If you compile using gcc with "-Wextra" you will see a similar amount
of build noise that gets generated. See the following for some
previous discussion:

  http://article.gmane.org/gmane.comp.version-control.git/128967

^ permalink raw reply

* Re: "Not currently on any branch"
From: Sean Estabrooks @ 2009-10-02 23:15 UTC (permalink / raw)
  To: Alex Riesen; +Cc: Tim, git
In-Reply-To: <81b0412b0910021446nb07e7e9l465f588168297fe9@mail.gmail.com>

On Fri, 2 Oct 2009 23:46:53 +0200
Alex Riesen <raa.lkml@gmail.com> wrote:

> On Fri, Oct 2, 2009 at 22:08, Tim <timothyjwashington@yahoo.ca> wrote:
> > What's the most straightforward & cleanest way to merge my changes in the
> > headless branch to my 'ui-integration' branch?
> 
> Assuming you use a Bourne shell:
> 
> $ prev=$(git rev-parse HEAD)
> $ git checkout ui-integration && git merge $prev
> 
> 

You could also rely on the reflog to avoid the need to store off the
previous HEAD value, so just:

$ git checkout ui-integration && git merge HEAD@{1}


Sean

^ permalink raw reply

* Re: [PATCH 1/6 (v4)] man page and technical discussion for rev-cache
From: Nick Edelen @ 2009-10-02 22:12 UTC (permalink / raw)
  To: Nick Edelen, Junio C Hamano, Nicolas Pitre, Johannes Schindelin,
	Sam Vilain
In-Reply-To: <op.uyuwkmv0tdk399@sirnot.private>

Before any code is introduced the full documentation is put forth.  This
provides a man page for the porcelain, and a technical doc in technical/.  The
latter describes the API, and discusses rev-cache's design, file format and
mechanics.

Signed-off-by: Nick Edelen <sirnot@gmail.com>

---
clean resend of patches.  this set fixes a small bug in graft handling, and
tweaks test for compatability.

  Documentation/git-rev-cache.txt       |  190 ++++++++++
  Documentation/technical/rev-cache.txt |  634 +++++++++++++++++++++++++++++++++
  2 files changed, 824 insertions(+), 0 deletions(-)

diff --git a/Documentation/git-rev-cache.txt b/Documentation/git-rev-cache.txt
new file mode 100644
index 0000000..5a713ad
--- /dev/null
+++ b/Documentation/git-rev-cache.txt
@@ -0,0 +1,190 @@
+git-rev-cache(1)
+================
+
+NAME
+----
+git-rev-cache - Add, walk and maintain revision cache slices
+
+SYNOPSIS
+--------
+'git-rev-cache' COMMAND [options] [<commit>...]
+
+DESCRIPTION
+-----------
+The revision cache ('rev-cache') provides a mechanism for significantly
+speeding up revision traversals.  It does this by creating an efficient
+database (cache) of commits, their related objects and topological relations.
+Independant of packs and the object store, this database is composed of
+rev-cache "slices" -- each a different file storing a given segment of commit
+history.  To map commits to their respective slices, a single index file is
+kept for the rev-cache.
+
+'git-rev-cache' provides a front-end for the rev-cache mechanism, intended for
+updating and maintaining rev-cache slices in the current repository.  New cache
+slice files can be 'add'ed, to keep the cache up-to-date; individual slices can
+be traversed; smaller slices can be 'fuse'd into a larger slice; and the
+rev-cache index can be regenerated.
+
+COMMANDS
+--------
+
+add
+~~~
+Add revisions to the cache by creating a new cache slice.  Reads a revision
+list from the command line, formatted as: `START START ... \--not END END ...`
+
+Options
+^^^^^^^
+
+\--all::
+	Include all refs in the new cache slice, like the \--all option in
+	'rev-list'.
+
+\--fresh/\--incremental::
+	Exclude everything already in the revision cache, analogous to
+	\--incremental in 'pack-objects'.
+
+\--stdin::
+	Read newline-seperated revisions from the standard input.  Use \--not
+	to exclude commits, as on the command line.
+
+\--legs::
+	Ensure newly-generated cache slice has no partial ends.  This means that
+	no commit has partially cached parents, in that all its parents are
+	cached or none of them are.  99.9% of users can ignore this command.
++
+\--legs will cause 'rev-cache' to expand potential slice end-points (creating
+"legs") until this condition is met, simplifying the cache slice structure.
+'rev-cache' itself does not care if a slice has legs or not, but the condition
+may reduce the required complexity of other applications that might use the
+revision cache.
+
+\--no-objects::
+	Non-commit objects are normally included along with the commit with
+	which they were introduced.  This is obviously very benificial, but can
+	take longer in cache slice generation.  Using this option will disable
+	non-commit object caching.
++
+\--no-objects is mainly intended for debugging or development purposes, but may
+find use in special situations (e.g. common traversal of only commits).
+
+Output
+^^^^^^
+
+On `stderr` 'add' outputs general information about the generated slice,
+including the number of objects and paths, and the start/end commits (prefix S
+indicates start, E an end).  Through `stdout` it emits only the SHA-1 of the
+slice.
+
+walk
+~~~~
+Analogous to a slice-oriented 'rev-list', 'walk' will traverse a region in a
+particular cache slice.  Interesting and uninteresting (delimited, as with
+'rev-list', with \--not) are specified on the command line, and output is the
+same as vanilla 'rev-list'.
+
+Options
+^^^^^^^
+
+\--objects::
+	Like 'rev-list', 'walk' will normally only list commits.  Use this
+	option to list non-commit objects as well, if they are present in the
+	cache slice.
+
+Output
+^^^^^^
+
+'walk' will simply dump the contents of the output commit list, work list, and
+pending object array.  The headers are outputed on `stderr`, the object hashes
+and names on `stdout`.
+
+fuse
+~~~~
+Merge several cache slices into a single large slice, like 'repack' for
+'rev-cache'.  On each invocation of 'add' a new file ("slice") is added to the
+revision cache directory, and after several additions the directory may become
+populated with many, relatively small slices.  Numerous smaller slices will
+yield poorer performance than a one or two large ones, because of the overhead
+of loading new slices into memory.
+
+Running 'fuse' every once in a while will solve this problem by coalescing all
+the cache slices into one larger slice.  For very large projects, using
+\--ignore-size is advisable to prevent overly large cache slices.  This can be
+set to run on garbage collection; see 'Automation' for more info.
+
+Note that 'fuse' uses the internal revision walker, so the options used in
+fusion override those of the cache slices upon which it operates.  For example,
+if some slices were generated with \--no-objects, yet 'fuse' was performed with
+non-commit objects, the resulting slice would still contain objects but would
+take longer to generate.
+
+Options
+^^^^^^^
+
+\--all::
+	Normally fuse will only include everything that's already in the
+	revision cache.  \--all tells it to start walking from the branch
+	heads, effectively a `add --all --fresh; fuse`
+	(pseudo-revcache-command).
+
+\--no-objects::
+	As in 'add', this option disables inclusion of non-commit objects.  If
+	some cache slices do contain such objects, the information will be lost.
+
+\--ignore-size[=N]::
+	Do not merge cache slices of size >=N (be aware that slices must be
+	mapped to memory).  N can have a suffix of "k" or "m", denoting N as
+	kilobytes and megabytes, respectively.  If N is not provided 'fuse'
+	will default to a size specified in `revcache.ignoresize`, or ~25MB if
+	the config var is not set.
+
+Output
+^^^^^^
+
+This command prints the SHA-1 of the new slice on `stdout`, and information
+about its work on `stderr` -- specifically which files it's removing.
+
+Automation
+^^^^^^^^^^
+
+Set the git configuration variable `gc.revcache` to run 'fuse' on garbage
+collection.  The arguments passed are `fuse \--all \--ignore-size`; i.e. 'gc'
+will keep everything cached into size-regulated slices.
+
+index
+~~~~~
+Regenerate the revision cache index.  If the rev-cache index file associating
+objects with cache slices gets corrupted, lost, or otherwise becomes unusable,
+'index' will quickly regenerate the file.  It's most likely that this won't be
+needed in every day use, as it is targeted towards debugging and development.
+
+alt
+~~~
+Create a cache slice pointer to another slice, identified by its full path:
+`fuse path/to/other/slice`
+
+This command is useful if you have several repositories sharing a common
+history.  Although space requirements for rev-cache are slim anyway, you can in
+this situation reduce it further by using slice pointers, pointing to relavant
+slices in other repositories.  Note that only one level of redirection is
+allowed, and the slice pointer will break if the original slice is removed.
+'fuse' will not touch slice pointers.
+
+NOTES
+-----
+In certain circumstances there may be some inconsistencies with object names
+between cached and non-cached walks.  Specifically, if two objects in a commit
+tree have the same content (= same SHA-1); or if objects of the same SHA-1 are
+introduced independantly in parallel branches.
+
+In the first case rev-cache will use the name of the youngest file, while
+vanilla rev-list will return the name of the entry first encountered in walking
+the tree.  The latter case is a result of rev-cache's internal topological
+ordering: the difference is the same between sorted and unsorted revision walks.
+
+See 'Discussion' for the underlying reasons for the discrepencies.
+
+DISCUSSION
+----------
+For an explanation of the API and its inner workings, see
+link:technical/rev-cache.txt[technical info on rev-cache].
diff --git a/Documentation/technical/rev-cache.txt b/Documentation/technical/rev-cache.txt
new file mode 100644
index 0000000..91fce8b
--- /dev/null
+++ b/Documentation/technical/rev-cache.txt
@@ -0,0 +1,634 @@
+rev-cache
+=========
+
+The revision cache API ('rev-cache') provides a method for efficiently storing
+and accessing commit branch sections.  Such branch slices are defined by a
+series of start/top (interesting) and end/bottom (uninteresting) commits.  Each
+slice contains information on commits in topological order.  Recorded with each
+commit is:
+
+* All intra-slice topological relations, encoded into path "channels" (see
+  'Mechanics' for full explanation).
+* Object meta-data: type, SHA-1, size, date (for commits).
+* Objects introduced by that commit, not present in the its cached parents.
+
+In addition to the API, basic structures are exported for the possibility of
+direct access.
+
+The API
+-------
+You can find the function prototypes in `revision.h`.
+
+Data Structures
+~~~~~~~~~~~~~~~
+The `rev_cache_info` struct holds all the options and flags for the API.
+
+----
+struct rev_cache_info {
+	/* generation flags */
+	unsigned objects : 1,
+		legs : 1,
+		make_index : 1,
+		fuse_me : 1;
+
+	/* index inclusion */
+	unsigned overwrite_all : 1;
+
+	/* traversal flags */
+	unsigned add_to_pending : 1;
+
+	/* fuse options */
+	unsigned int ignore_size;
+
+	/* reserved */
+	struct rev_cache_slice_map *maps,
+		*last_map;
+};
+----
+
+The fields:
+
+`objects`::
+	Add non-commit objects to slice.
+
+`legs`::
+	Ensure end/bottom commits have no children.
+
+`make_index`::
+	Integrate newly-made slice into index.
+
+`fuse_me`::
+	This is specified if a fuse is occuring, and slices are to be reused.
+	This option requires `maps` and `last_maps` to be initialized.
+
+`overwrite_all`::
+	When a cache slice is added to the index, sometimes overlap occures
+	between it and other slices.  Normally, original index entries are kept
+	unless the new entry represents a start commit (older entries are more
+	likely to lead to greater in-slice traversals).  This options overrides
+	that, and updates all entries of the new slice.
+
+`add_to_pending`::
+	Append unique non-commit objects to the `pending` object list in the
+	passed `rev_info` instance.
+
+`add_names`::
+	Include non-commit object names in the pending object entries if
+	`add_to_pending` is set.
+
+`ignore_size`::
+	If non-zero, ignore slices with size greater or equal to this during
+fusion.
+
+`maps`/`last_map`::
+	An array of slice mappings, indexed by their id in the slice index
+	header, to be re-used with `fuse_me`.  `last_map` points to the last
+	mapping used, and should be initialized to 0.
+
+Functions
+~~~~~~~~~
+
+init_rev_cache
+^^^^^^^^^^^^^^
+----
+void init_rev_cache_info(
+	struct rev_cache_info *rci OUT
+)
+----
+
+Initialize `rci` to default options.
+
+make_cache_slice
+^^^^^^^^^^^^^^^^
+----
+int make_cache_slice(
+	struct rev_cache_info *rci IN,
+	struct rev_info *revs IN,
+	struct commit_list **starts IN/OUT,
+	struct commit_list **ends IN/OUT,
+	unsigned char *cache_sha1 OUT
+)
+----
+
+Create a cache slice based on either `revs` (if non-NULL) *or* the `starts` and
+`ends` lists.  The actual list of start and end commits of the slice may be
+different from the parameters, based on what defines the branch segment, and
+this actual list is passed back through `starts` and `ends`.
+
+The cache slice is identified via a SHA-1 generated from the actual start/end
+commit lists.  `cache_sha1`, if non-NULL, can recieve the cache slice name.
+`rci` is used to specify generation options, but can be NULL if you want
+`make_cache_slice` to fall back on defaults.  Returns 0 on success, non-zero on
+failure.
+
+make_cache_index
+^^^^^^^^^^^^^^^^
+----
+int make_cache_index(
+	struct rev_cache_info *rci IN,
+	unsigned char *cache_sha1 IN,
+	int fd IN,
+	unsigned int size IN
+)
+----
+
+Add a slice to the rev-cache index.  `cache_sha1` is the identity hash of the
+cache slice; `fd` is a file descriptor of the cache slice opened with
+read/write privileges (the slice is not actually modified); `size` is the size
+of the cache slice.  Although there are currently no options for index
+updating, `rci` is a placeholder in case of future options.  Note that this
+function is normally called by `make_cache_slice`.  Returns 0 on success,
+non-zero on failure.
+
+open_cache_slice
+^^^^^^^^^^^^^^^^
+----
+int open_cache_slice(
+	unsigned char *sha1 IN,
+	int flags IN
+)
+----
+
+Returns a file descriptor to a cache slice described by `sha1` hash, using
+`flags` as the access mode.  This will follow cache slice pointers to one level
+of indirection.
+
+get_cache_slice
+^^^^^^^^^^^^^^^
+----
+unsigned char *get_cache_slice(
+	struct commit *commit IN
+)
+----
+
+Given a commit object `get_cache_slice` will search the revision cache index
+and return, if found, the cache slice SHA-1.
+
+traverse_cache_slice
+^^^^^^^^^^^^^^^^^^^^
+----
+int traverse_cache_slice(
+	struct rev_info *revs IN/OUT,
+	unsigned char *cache_sha1 IN,
+	struct commit *commit IN,
+	unsigned long *date_so_far IN/OUT,
+	int *slop_so_far IN/OUT,
+	struct commit_list ***queue OUT,
+	struct commit_list **work IN/OUT
+)
+----
+
+Traverse a specified cache slice.  An explanation of the each field:
+
+`revs`::
+	The revision walk instance.  `traverse_cache_slice` uses this for
+	general options (e.g. which objects are included) and slice traversal
+	options (in the `rev_cache_info` field).  If the `add_to_pending`
+	option is specified, non-commit objects are appended to the `pending`
+	object list field.
+
+`cache_sha1`::
+	SHA-1 identifying the cache slice to use.  This can be taken directly
+	from `get_cache_slice`.
+
+`commit`::
+	The current commit object in the revision walk, i.e. the commit which
+	inspired this slice traversal.  Although theoretically redundant in
+	view of the `work` list, this simplifies interaction with normal
+	revision walks, which pop commits from `work` before analyzing them.
+
+`date_so_far`::
+	The date of the oldest encountered interesting commit.  Passing NULL
+	will let `traverse_cache_slice` use defaults.
+
+`slop_so_far`::
+	The `slop` value, a la revision.c.  This is a counter used to determine
+	when to stop traversing, based on how many extra uninteresting commits
+	should be encountered.  NULL will enable defaults, as above.
+
+`queue`::
+	Refers to a pointer to the head of a FIFO commit list, recieving the
+	commits we've seen and added.
+
+`work`::
+	A date-ordered list of commits that have yet to be processed (i.e. seen
+	but not added).  Commits from here present in the slice are removed
+	(and, obviously, used as starting places for traversal), and any end
+	commits encountered are inserted.
+
+starts_from_slices
+^^^^^^^^^^^^^^^^^^
+----
+void starts_from_slices(
+	struct rev_info *revs OUT,
+	unsigned int flags IN,
+	unsigned char *which IN,
+	int n IN
+)
+----
+
+Will mark start-commits in certain rev-cache slices with `flag`, and added them
+to the pending list of `revs`.  If `n` is zero, `starts_from_slices` will use
+all slices.  Otherwise `which` will specify an *unseperated* list of cache
+SHA-1s to use (20 bytes each), and `n` will contain the number of slices (i.e.
+20 * `n` = size of `which`).
+
+fuse_cache_slices
+^^^^^^^^^^^^^^^^^
+----
+int fuse_cache_slices(
+	struct rev_cache_info *rci IN,
+	struct rev_info *revs IN
+)
+----
+
+Generate a slice based on `revs`, replacing all encountered slices with one
+(larger) slice.  The `ignore_size` field in `rci`, if non-zero, will dictate
+which cache slice sizes to ignore in both traversal and replacement.
+
+regenerate_cache_index
+^^^^^^^^^^^^^^^^^^^^^^
+----
+int regenerate_cache_index(
+	struct rev_cache_info *rci IN
+)
+----
+
+Remake the revision cache index, including all the slices.  Currently no
+options in `rci` exist for index (re)generation, but some may develop in the
+future.
+
+to/from_disked_rc_object/index_entry
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+----
+struct rc_object/index_entry *from_disked_rc_object/index_entry(
+	struct rc_object/index_entry_ondisk *src IN,
+	struct rc_object/index_entry *dst OUT
+)
+
+struct rc_object/index_entry_ondisk *to_disked_rc_object/index_entry(
+	struct rc_object/index_entry *src IN,
+	struct rc_object/index_entry_ondisk *dst OUT
+)
+----
+
+Functions to convert between the internal and storage (`_ondisk`) versions of
+object and index entry structures.  These are necessary for direct access to
+the cache slices.  If NULL is provided for `dst` a statically allocated
+structure is used, and a pointer to the struct is returned.  Otherwise the
+functions return `dst`.
+
+Example Usage
+-------------
+
+A few examples to demonstrate usage:
+
+.Creating a slice
+----
+/* pretend you're a porcelain for rev-cache reading from the command line */
+struct rev_info revs;
+struct rev_cache_info rci;
+
+init_revisions(&revs, 0);
+init_rci(&rci);
+
+flags = 0;
+for (i = 1; i < argc; i++) {
+        if (!strcmp(argv[i], "--not"))
+                flags ^= UNINTERESTING;
+        else if(!strcmp(argv[i], "--fresh"))
+                starts_from_slices(&revs, UNINTERESTING, 0, 0);
+        else
+                handle_revision_arg(argv[i], &revs, flags, 1);
+}
+
+/* we want to explicitly set certain options */
+rci.objects = 0;
+
+if (!make_cache_slice(&rci, &revs, 0, 0, cache_sha1))
+        printf("made slice!  it's called %s\n", sha1_to_hex(cache_sha1));
+----
+
+.Traversing a slice
+----
+/* let's say you're walking the tree with a 'work' list of current heads and a
+ * FILO output list 'out' */
+out = 0;
+outp = &out;
+
+while (work) {
+        struct commit *commit = pop_commit(&work);
+        struct object *object = &commit->object;
+        unsigned char *cache_sha1;
+
+        if (cache_sha1 = get_cache_slice(object->sha1)) {
+                /* note that this will instatiate any topo-relations
+                 * as it goes */
+                if (traverse_cache_slice(&revs, cache_sha1,
+                        commit, 0, 0, /* use defaults */
+                        &outp, &work) < 0)
+                        die("I'm overreacting to a non-fatal cache error");
+        } else {
+                struct commit_list *parents = commit->parents;
+
+                while (parents) {
+                        struct commit *p = parents->item;
+                        struct object *po = &p->object;
+
+                        parents = parents->next;
+                        if (po->flags & UNINTERESTING)
+                                continue;
+
+                        if (object->flags & UNINTERESTING)
+                                po->flags |= UNINTERESTING;
+                        else if (po->flags & SEEN)
+                                continue;
+
+                        if (!po->parsed)
+                                parse_commit(p);
+                        insert_by_date(p, &work);
+                }
+
+                if (object->flags & (SEEN | UNINTERESTING) == 0)
+                        outp = &commit_list_insert(commit, outp)->next;
+                object->flags |= SEEN;
+        }
+}
+----
+
+Some Internals
+--------------
+For more advanced usage, the slice and index file(s) may be accessed directly.
+Relavant structures are availabe in `rev-cache.h`.
+
+File Formats
+~~~~~~~~~~~~
+
+Cache Slices
+^^^^^^^^^^^^
+A slice has a basic fixed-size header, followed by a certain number of object
+entries, then a NULL-seperated list of object names.  Commits are sorted in
+topo-order, and each commit entry is followed by the objects added in that
+commit.
+
+----
+         -- +--------------------------------+
+header      | object number, etc...          |
+         -- +--------------------------------+
+commit      | commit info                    |
+entry       | path data                      |
+            +--------------------------------+
+            | tree/blob info                 |
+            +--------------------------------+
+            | tree/blob info                 |
+            +--------------------------------+
+            | ...                            |
+         -- +--------------------------------+
+commit      | commit info                    |
+entry       | path data                      |
+            +--------------------------------+
+            | tree/blob info                 |
+            +--------------------------------+
+            | ...                            |
+         -- +--------------------------------+
+...         ...
+         -- +--------------------------------+
+name list   | \0some_file_name\0             |
+(note       +--------------------------------+
+preceeding  | another_file\0                 |
+null)       ...                              |
+            +--------------------------------+
+----
+
+Here is the header:
+
+----
+struct rc_cache_slice_header {
+	char signature[8]; /* REVCACHE */
+	unsigned char version;
+	uint32_t ofs_objects;
+
+	uint32_t object_nr;
+	uint16_t path_nr;
+	uint32_t size;
+
+	unsigned char sha1[20];
+
+	uint32_t names_size;
+};
+----
+
+Explanations:
+
+`signature`::
+	The identifying signature of cache slice file.  Always "REVCACHE".
+`version`::
+	The version number, currently 1.
+`ofs_objects`::
+	The byte offset at which the commit/object listing starts.  Always
+	present at the 10th byte, regardless of file version.
+`object_nr`::
+	The total number of objects (commit + non-commit objects) present in
+	the slice.
+`path_nr`::
+	The total number of paths/channels used in encoding the topological
+	data.  Note that paths are reused (see 'Mechanics'), so there will
+	never be more than a few hundred paths (if that) used.
+`size`::
+	The size of the slice *excluding* the name list.  In other words, the
+	size of the portion mapped to memory.
+`sha1`::
+	The cache slice SHA-1.
+`names_size`::
+	The size of the name list.  `size` + `names_size` = size of slice
+
+Revision Cache Index
+^^^^^^^^^^^^^^^^^^^^
+The index is a single file that associates SHA-1s with cache slices and file
+positions.  It is somewhat similar to pack-file indexes, containing a fanout
+table and a list of index entries sorted by hash.
+
+----
+         -- +--------------------------------+
+header      | object #, cache #, etc.        |
+         -- +--------------------------------+
+sha1s of    | SHA-1                          |
+slices      | ...                            |
+         -- +--------------------------------+
+fanout      | fanout[0x00]                   |
+table       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+            | fanout[0xff]                   |
+         -- +--------------------------------+
+index       | SHA-1 of object                |
+entries     | index of cache slice SHA-1     |
+            | position in cache slice        |
+            +--------------------------------+
+            |                                |
+            ...
+            +--------------------------------+
+----
+
+The header:
+
+----
+struct rc_index_header {
+	char signature[8]; /* REVINDEX */
+	unsigned char version;
+	uint32_t ofs_objects;
+
+	uint32_t object_nr;
+	unsigned char cache_nr;
+
+	uint32_t max_date;
+};
+----
+
+Explanations:
+
+`signature`::
+	Always "REVINDEX".
+`version`::
+	Version number, currently 1.
+`ofs_objects`::
+	Offset at which the entry objects begin.  This is more obviously useful
+	in the index because the list of slice SHA-1s is variably-sized.
+`object_nr`::
+	Number of index entry objects present.
+`cache_nr`::
+	Number of cache slices to which the index maps, and hence the number of
+slice SHA-1s listed.
+`max_date`::
+	The oldest commit represented in the index.  This is used to help speed
+up lookup times by knowing what range of commits we definitely don't have
+cached.  Normal usage of 'rev-cache' would leave no "holes" in its coverage of
+commit history -- once a commit is cached, everything reachable from it should
+be cached as well.  Most of the time refs are added to rev-cache simultaneous
+as well.  This means that in most situations almost everything <= `max_date`
+will be cached.
+
+Mechanics
+~~~~~~~~~
+
+The most important part of rev-cache is its method of encoding topological
+relations.  To ensure fluid traversal and reconstruction, commits are related
+through high-level "streams"/"channels" rather than individual
+interconnections.  Intuitively, rev-cache stores history the way gitk shows it:
+commits strung up on lines, which interconnect at merges and branches.
+
+Each commit is associated to a given channel/path via a 'path id', and
+variable-length fields govern which paths (if any) are closed or opened at that
+object.  This means that topo-data can be preserved in only a few bytes extra
+per object entry.  Other information stored per entry is the sha-1 hash, type,
+date, size, name, and status in cache slice.  Here is format of an object
+entry, both on-disk and in-memory:
+
+----
+struct object_entry {
+        unsigned type : 3;
+        unsigned is_end : 1;
+        unsigned is_start : 1;
+        unsigned uninteresting : 1;
+        unsigned include : 1;
+        unsigned flags : 1;
+        unsigned char sha1[20];
+
+        unsigned char merge_nr;
+        unsigned char split_nr;
+        unsigned size_size : 3;
+        unsigned name_size : 3;
+
+        uint32_t date;
+        uint16_t path;
+
+        /* merge paths */
+        /* split paths */
+        /* size */
+        /* name index */
+};
+----
+
+An explanation of each field:
+
+`type`::
+	Object type
+`is_end`::
+	The commit has some parents outside the cache slice (all if slice has
+	legs)
+`is_start`::
+	The commit has no children in cache slice
+`uninteresting`::
+	Run-time flag, used in traversal
+`include`::
+	Run-time flag, used in traversal (initialization)
+`flags`::
+	Currently unused, extra bit
+`sha1`::
+	Object SHA-1 hash
+
+`merge_nr`::
+	The number of paths the current channel diverges into; the current path
+	ends upon any merge.
+`split_nr`::
+	The number of paths this commit ends; used on both merging and
+	branching.
+`size_size`::
+	Number of bytes the object size takes up.
+`name_size`::
+	Number of bytes the name index takes up.
+
+`date`::
+	The date of the commit.
+`path`::
+	The path ID of the channel with which this commit is associated.
+
+merge paths::
+	The path IDs (16-bit) that are to be created.  Overflow is not a
+	problem as path IDs are reused, leaving even complicated projects to
+	consume no more than a few hundred IDs.
+split paths::
+	The path IDs (16-bit) that are to be ended.
+size::
+	The size split into the minimum number of bytes.  That is, 1-8 bytes
+	representing the size, least-significant byte first.
+name index::
+	An offset for the null-seperated, object name list at the end of the
+	cache slice.  Also split into the minimum number of bytes.
+
+Each path ID refers to an index in a 'path array', which stores the current
+status (eg. active, interestingness) of each channel.
+
+Due to topo-relations and boundary tracking, all of a commit's parents must be
+encountered before the path is reallocated.  This is achieved by using a
+counter system per merge: starting at the parent number, the counter is
+decremented as each parent is encountered (dictated by 'split paths'); at 0 the
+path is cleared.
+
+Boundary tracking is necessary because non-commits are stored relative to the
+commit in which they were introduced.  If a series of commits is not included
+in the output, the last interesting commit must be parsed manually to ensure
+all objects are accounted for.
+
+To prevent list-objects from recursing into trees that we've already taken care
+of, the flag `FACE_VALUE` is introduced.  An object with this flag is not
+explored (= "taken at face value"), significantly reducing I/O and processing
+time.
+
+Notes
+~~~~~
+
+Due to rev-cache's internal storage format, walking may lead to some
+discrepencies between cached and uncached repositories.  Although noticeable to
+users directly calling rev-list, these are unused or corner cases and
+internally a non-issue.
+
+First note that rev-cache records commits in topological order.  Large portions
+of commit history will already be sorted topologically in the revision walk,
+yielding a different output for unsorted calls to rev-list.  A more obscure
+consquence occurs when two objects of the same SHA-1, but different name, are
+introduced seperately in parallel branches: different names might be shown for
+that object depending on which object entry was encountered first.
+
+A similar disparity arises when two objects of same SHA-1/different name are
+present in the same tree structure.  rev-cache, walking objects as they were
+introduced, lists the youngest file's name; rev-list, walking the full trees
+each commit, shows the first file encountered.
-- 
tg: (5bbb081..) t/revcache/docs (depends on: t/revcache/integration)

^ permalink raw reply related

* Re: [PATCH 4/6 (v4)] administrative functions for rev-cache, start of integration into git
From: Nick Edelen @ 2009-10-02 22:12 UTC (permalink / raw)
  To: Nick Edelen, Junio C Hamano, Nicolas Pitre, Johannes Schindelin,
	Sam Vilain
In-Reply-To: <op.uyuwkstatdk399@sirnot.private>

This patch, fourth, contains miscellaneous (maintenance) features:
  - support for cache slice fusion, index regeneration and object size caching
  - non-commit object generation refactored
  - porcelain updated to support feature additions

The beginnings of integration into git are present in this patch, mainly
centered on caching object size; the object generation is refactored to more
elegantly exploit this.  Fusion allows smaller (incremental) slices to be
coagulated into a larger slice, reducing overhead, while index regeneration
enables repair or cleaning of the cache index.

Note that tests for these features are included in the following patch, as they
take advantage of the rev-cache's integration into the revision walker.

Signed-off-by: Nick Edelen <sirnot@gmail.com>

---
  builtin-gc.c        |    9 +
  builtin-rev-cache.c |   77 ++++++-
  rev-cache.c         |  719 +++++++++++++++++++++++++++++++++++++++++++++------
  rev-cache.h         |    9 +-
  revision.h          |   16 +-
  5 files changed, 750 insertions(+), 80 deletions(-)

diff --git a/builtin-gc.c b/builtin-gc.c
index 7d3e9cc..c92511a 100644
--- a/builtin-gc.c
+++ b/builtin-gc.c
@@ -22,6 +22,7 @@ static const char * const builtin_gc_usage[] = {
  	NULL
  };

+static char do_rev_cache = 0;
  static int pack_refs = 1;
  static int aggressive_window = 250;
  static int gc_auto_threshold = 6700;
@@ -34,9 +35,14 @@ static const char *argv_reflog[] = {"reflog", "expire", "--all", NULL};
  static const char *argv_repack[MAX_ADD] = {"repack", "-d", "-l", NULL};
  static const char *argv_prune[] = {"prune", "--expire", NULL, NULL};
  static const char *argv_rerere[] = {"rerere", "gc", NULL};
+static const char *argv_rev_cache[] = {"rev-cache", "fuse", "--all", "--ignore-size", NULL};

  static int gc_config(const char *var, const char *value, void *cb)
  {
+	if (!strcmp(var, "gc.revcache")) {
+		do_rev_cache = 1;
+		return 0;
+	}
  	if (!strcmp(var, "gc.packrefs")) {
  		if (value && !strcmp(value, "notbare"))
  			pack_refs = -1;
@@ -244,6 +250,9 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
  	if (run_command_v_opt(argv_rerere, RUN_GIT_CMD))
  		return error(FAILED_RUN, argv_rerere[0]);

+	if (do_rev_cache && run_command_v_opt(argv_rev_cache, RUN_GIT_CMD))
+		return error(FAILED_RUN, argv_rev_cache[0]);
+
  	if (auto_gc && too_many_loose_objects())
  		warning("There are too many unreachable loose objects; "
  			"run 'git prune' to remove them.");
diff --git a/builtin-rev-cache.c b/builtin-rev-cache.c
index 6eb7065..b894c54 100644
--- a/builtin-rev-cache.c
+++ b/builtin-rev-cache.c
@@ -5,6 +5,8 @@
  #include "revision.h"
  #include "rev-cache.h"

+unsigned long default_ignore_size = 50 * 1024 * 1024; /* 50mb */
+
  /* porcelain for rev-cache.c */
  static int handle_add(int argc, const char *argv[]) /* args beyond this command */
  {
@@ -24,7 +26,7 @@ static int handle_add(int argc, const char *argv[]) /* args beyond this command
  		if (!strcmp(argv[i], "--stdin"))
  			dostdin = 1;
  		else if (!strcmp(argv[i], "--fresh") || !strcmp(argv[i], "--incremental"))
-			starts_from_slices(&revs, UNINTERESTING);
+			starts_from_slices(&revs, UNINTERESTING, 0, 0);
  		else if (!strcmp(argv[i], "--not"))
  			flags ^= UNINTERESTING;
  		else if (!strcmp(argv[i], "--legs"))
@@ -150,6 +152,57 @@ static int handle_walk(int argc, const char *argv[])
  	return 0;
  }

+static int handle_fuse(int argc, const char *argv[])
+{
+	struct rev_info revs;
+	struct rev_cache_info rci;
+	const char *args[5];
+	int i, argn = 0;
+	char add_all = 0;
+
+	init_revisions(&revs, 0);
+	init_rev_cache_info(&rci);
+	args[argn++] = "rev-list";
+
+	for (i = 0; i < argc; i++) {
+		if (!strcmp(argv[i], "--all")) {
+			args[argn++] = "--all";
+			setup_revisions(argn, args, &revs, 0);
+			add_all = 1;
+		} else if (!strcmp(argv[i], "--no-objects"))
+			rci.objects = 0;
+		else if (!strncmp(argv[i], "--ignore-size", 13)) {
+			unsigned long sz;
+
+			if (argv[i][13] == '=')
+				git_parse_ulong(argv[i] + 14, &sz);
+			else
+				sz = default_ignore_size;
+
+			rci.ignore_size = sz;
+		} else
+			continue;
+	}
+
+	if (!add_all)
+		starts_from_slices(&revs, 0, 0, 0);
+
+	return fuse_cache_slices(&rci, &revs);
+}
+
+static int handle_index(int argc, const char *argv[])
+{
+	return regenerate_cache_index(0);
+}
+
+static int handle_alt(int argc, const char *argv[])
+{
+	if (argc < 1)
+		return -1;
+
+	return make_cache_slice_pointer(0, argv[0]);
+}
+
  static int handle_help(void)
  {
  	char *usage = "\
@@ -180,12 +233,28 @@ commands:\n\
  	return 0;
  }

+static int rev_cache_config(const char *k, const char *v, void *cb)
+{
+	/* this could potentially be related to pack.windowmemory, but we want a max around 50mb,
+	 * and .windowmemory is often >700mb, with *large* variations */
+	if (!strcmp(k, "revcache.ignoresize")) {
+		int t;
+
+		t = git_config_ulong(k, v);
+		if (t)
+			default_ignore_size = t;
+	}
+
+	return 0;
+}
+
  int cmd_rev_cache(int argc, const char *argv[], const char *prefix)
  {
  	const char *arg;
  	int r;

  	git_config(git_default_config, NULL);
+	git_config(rev_cache_config, NULL);

  	if (argc > 1)
  		arg = argv[1];
@@ -196,8 +265,14 @@ int cmd_rev_cache(int argc, const char *argv[], const char *prefix)
  	argv += 2;
  	if (!strcmp(arg, "add"))
  		r = handle_add(argc, argv);
+	else if (!strcmp(arg, "fuse"))
+		r = handle_fuse(argc, argv);
  	else if (!strcmp(arg, "walk"))
  		r = handle_walk(argc, argv);
+	else if (!strcmp(arg, "index"))
+		r = handle_index(argc, argv);
+	else if (!strcmp(arg, "alt"))
+		r = handle_alt(argc, argv);
  	else
  		return handle_help();

diff --git a/rev-cache.c b/rev-cache.c
index ef6b58a..e401978 100644
--- a/rev-cache.c
+++ b/rev-cache.c
@@ -9,6 +9,13 @@
  #include "revision.h"
  #include "rev-cache.h"
  #include "run-command.h"
+#include "string-list.h"
+
+struct cache_slice_pointer {
+	char signature[8]; /* REVCOPTR */
+	char version;
+	char path[PATH_MAX + 1];
+};

  /* list resembles pack index format */
  static uint32_t fanout[0xff + 2];
@@ -259,27 +266,45 @@ unsigned char *get_cache_slice(struct commit *commit)

  /* traversal */

-static void handle_noncommit(struct rev_info *revs, unsigned char *ptr, struct rc_object_entry *entry)
+static unsigned long decode_size(unsigned char *str, int len);
+
+static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsigned char *ptr, struct rc_object_entry *entry)
  {
-	struct object *obj = 0;
+	struct blob *blob;
+	struct tree *tree;
+	struct object *obj;
+	unsigned long size;

+	size = decode_size(ptr + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
  	switch (entry->type) {
  	case OBJ_TREE:
-		if (revs->tree_objects)
-			obj = (struct object *)lookup_tree(entry->sha1);
+		if (!revs->tree_objects)
+			return;
+
+		tree = lookup_tree(entry->sha1);
+		if (!tree)
+			return;
+
+		tree->size = size;
+		commit->tree = tree;
+		obj = (struct object *)tree;
  		break;
+
  	case OBJ_BLOB:
-		if (revs->blob_objects)
-			obj = (struct object *)lookup_blob(entry->sha1);
-		break;
-	case OBJ_TAG:
-		if (revs->tag_objects)
-			obj = (struct object *)lookup_tag(entry->sha1);
+		if (!revs->blob_objects)
+			return;
+
+		blob = lookup_blob(entry->sha1);
+		if (!blob)
+			return;
+
+		obj = (struct object *)blob;
  		break;
-	}

-	if (!obj)
+	default:
+		/* tag objects aren't really supposed to be here */
  		return;
+	}

  	obj->flags |= FACE_VALUE;
  	add_pending_object(revs, obj, "");
@@ -375,7 +400,7 @@ static int traverse_cache_slice_1(struct rc_slice_header *head, unsigned char *m
  		/* add extra objects if necessary */
  		if (entry->type != OBJ_COMMIT) {
  			if (consume_children)
-				handle_noncommit(revs, map + index, entry);
+				handle_noncommit(revs, co, map + index, entry);

  			continue;
  		} else
@@ -409,6 +434,8 @@ static int traverse_cache_slice_1(struct rc_slice_header *head, unsigned char *m
  			if (last_objects[path]) {
  				parse_commit(last_objects[path]);

+				/* we needn't worry about the unique field; that will be valid as
+				 * long as we're not a end entry */
  				last_objects[path]->object.flags &= ~FACE_VALUE;
  				last_objects[path] = 0;
  			}
@@ -541,6 +568,48 @@ static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map,
  	return 0;
  }

+int open_cache_slice(unsigned char *sha1, int flags)
+{
+	int fd;
+	char signature[8];
+
+	fd = open(git_path("rev-cache/%s", sha1_to_hex(sha1)), flags);
+	if (fd <= 0)
+		goto end;
+
+	if (read(fd, signature, 8) != 8)
+		goto end;
+
+	/* a normal revision slice */
+	if (!memcmp(signature, "REVCACHE", 8)) {
+		lseek(fd, 0, SEEK_SET);
+		return fd;
+	}
+
+	/* slice pointer */
+	if (!memcmp(signature, "REVCOPTR", 8)) {
+		struct cache_slice_pointer ptr;
+
+		lseek(fd, 0, SEEK_SET);
+		if (read_in_full(fd, &ptr, sizeof(ptr)) != sizeof(ptr))
+			goto end;
+
+		if (ptr.version != SUPPORTED_REVCOPTR_VERSION)
+			goto end;
+
+		close(fd);
+		fd = open(ptr.path, flags);
+
+		return fd;
+	}
+
+end:
+	if (fd > 0)
+		close(fd);
+
+	return -1;
+}
+
  int traverse_cache_slice(struct rev_info *revs,
  	unsigned char *cache_sha1, struct commit *commit,
  	unsigned long *date_so_far, int *slop_so_far,
@@ -564,7 +633,7 @@ int traverse_cache_slice(struct rev_info *revs,

  	memset(&head, 0, sizeof(struct rc_slice_header));

-	fd = open(git_path("rev-cache/%s", sha1_to_hex(cache_sha1)), O_RDONLY);
+	fd = open_cache_slice(cache_sha1, O_RDONLY);
  	if (fd == -1)
  		goto end;
  	if (fstat(fd, &fi) || fi.st_size < sizeof(struct rc_slice_header))
@@ -591,6 +660,68 @@ end:

  /* generation */

+static int is_endpoint(struct commit *commit)
+{
+	struct commit_list *list = commit->parents;
+
+	while (list) {
+		if (!(list->item->object.flags & UNINTERESTING))
+			return 0;
+
+		list = list->next;
+	}
+
+	return 1;
+}
+
+/* ensures branch is self-contained: parents are either all interesting or all uninteresting */
+static void make_legs(struct rev_info *revs)
+{
+	struct commit_list *list, **plist;
+	int total = 0;
+
+	/* attach plist to end of commits list */
+	list = revs->commits;
+	while (list && list->next)
+		list = list->next;
+
+	if (list)
+		plist = &list->next;
+	else
+		return;
+
+	/* duplicates don't matter, as get_revision() ignores them */
+	for (list = revs->commits; list; list = list->next) {
+		struct commit *item = list->item;
+		struct commit_list *parents = item->parents;
+
+		if (item->object.flags & UNINTERESTING)
+			continue;
+		if (is_endpoint(item))
+			continue;
+
+		while (parents) {
+			struct commit *p = parents->item;
+			parents = parents->next;
+
+			if (!(p->object.flags & UNINTERESTING))
+				continue;
+
+			p->object.flags &= ~UNINTERESTING;
+			parse_commit(p);
+			plist = &commit_list_insert(p, plist)->next;
+
+			if (!(p->object.flags & SEEN))
+				total++;
+		}
+	}
+
+	if (total)
+		sort_in_topological_order(&revs->commits, 1);
+
+}
+
+
  struct path_track {
  	struct commit *commit;
  	int path; /* for keeping track of children */
@@ -680,7 +811,7 @@ static void handle_paths(struct commit *commit, struct rc_object_entry *object,
  	int child_nr, parent_nr, open_parent_nr, this_path;
  	struct commit_list *list;
  	struct commit *first_parent;
-	struct path_track **ppt, *pt;
+	struct pa\th_track **ppt, *pt;

  	/* we can only re-use a closed path once all it's children have been encountered,
  	 * as we need to keep track of commit boundaries */
@@ -779,31 +910,76 @@ static void handle_paths(struct commit *commit, struct rc_object_entry *object,
  }


-static void add_object_entry(const unsigned char *sha1, int type, struct rc_object_entry *nothisone,
+static int encode_size(unsigned long size, unsigned char *out)
+{
+	int len = 0;
+
+	while (size) {
+		*out++ = (unsigned char)(size & 0xff);
+		size >>= 8;
+		len++;
+	}
+
+	return len;
+}
+
+static unsigned long decode_size(unsigned char *str, int len)
+{
+	unsigned long size = 0;
+	int shift = 0;
+
+	while (len--) {
+		size |= (unsigned long)*str << shift;
+		shift += 8;
+		str++;
+	}
+
+	return size;
+}
+
+static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *entryp,
  	struct strbuf *merge_str, struct strbuf *split_str)
  {
-	struct rc_object_entry object;
+	struct rc_object_entry entry;
+	unsigned char size_str[7];
+	unsigned long size;
+	enum object_type type;
+	void *data;

-	if (!nothisone) {
-		memset(&object, 0, sizeof(object));
-		object.sha1 = (unsigned char *)sha1;
-		object.type = type;
+	if (entryp)
+		sha1 = entryp->sha1;
+
+	/* retrieve size data */
+	data = read_sha1_file(sha1, &type, &size);
+
+	if (data)
+		free(data);
+
+	/* initialize! */
+	if (!entryp) {
+		memset(&entry, 0, sizeof(entry));
+		entry.sha1 = (unsigned char *)sha1;
+		entry.type = type;

  		if (merge_str)
-			object.merge_nr = merge_str->len / sizeof(uint16_t);
+			entry.merge_nr = merge_str->len / sizeof(uint16_t);
  		if (split_str)
-			object.split_nr = split_str->len / sizeof(uint16_t);
+			entry.split_nr = split_str->len / sizeof(uint16_t);

-		nothisone = &object;
+		entryp = &entry;
  	}

-	strbuf_add(acc_buffer, to_disked_rc_object_entry(nothisone, 0), sizeof(struct rc_object_entry_ondisk));
+	entryp->size_size = encode_size(size, size_str);

-	if (merge_str && merge_str->len)
+	/* write the muvabitch */
+	strbuf_add(acc_buffer, to_disked_rc_object_entry(entryp, 0), sizeof(struct rc_object_entry_ondisk));
+
+	if (merge_str)
  		strbuf_add(acc_buffer, merge_str->buf, merge_str->len);
-	if (split_str && split_str->len)
+	if (split_str)
  		strbuf_add(acc_buffer, split_str->buf, split_str->len);

+	strbuf_add(acc_buffer, size_str, entryp->size_size);
  }

  /* returns non-zero to continue parsing, 0 to skip */
@@ -847,12 +1023,7 @@ continue_loop:

  static int dump_tree_callback(const unsigned char *sha1, const char *path, unsigned int mode)
  {
-	unsigned char data[21];
-
-	hashcpy(data, sha1);
-	data[20] = !!S_ISDIR(mode);
-
-	strbuf_add(acc_buffer, data, 21);
+	strbuf_add(acc_buffer, sha1, 20);

  	return 1;
  }
@@ -862,15 +1033,10 @@ static void tree_addremove(struct diff_options *options,
  	const unsigned char *sha1,
  	const char *concatpath)
  {
-	unsigned char data[21];
-
  	if (whatnow != '+')
  		return;

-	hashcpy(data, sha1);
-	data[20] = !!S_ISDIR(mode);
-
-	strbuf_add(acc_buffer, data, 21);
+	strbuf_add(acc_buffer, sha1, 20);
  }

  static void tree_change(struct diff_options *options,
@@ -879,26 +1045,10 @@ static void tree_change(struct diff_options *options,
  	const unsigned char *new_sha1,
  	const char *concatpath)
  {
-	unsigned char data[21];
-
  	if (!hashcmp(old_sha1, new_sha1))
  		return;

-	hashcpy(data, new_sha1);
-	data[20] = !!S_ISDIR(new_mode);
-
-	strbuf_add(acc_buffer, data, 21);
-}
-
-static int sort_type_hash(const void *a, const void *b)
-{
-	const unsigned char *sa = (const unsigned char *)a,
-		*sb = (const unsigned char *)b;
-
-	if (sa[20] == sb[20])
-		return hashcmp(sa, sb);
-
-	return sa[20] > sb[20] ? -1 : 1;
+	strbuf_add(acc_buffer, new_sha1, 20);
  }

  static int add_unique_objects(struct commit *commit)
@@ -909,6 +1059,7 @@ static int add_unique_objects(struct commit *commit)
  	int i, j, next;
  	char is_first = 1;

+	/* ...no, calculate unique objects */
  	strbuf_init(&os, 0);
  	strbuf_init(&ost, 0);
  	orig_buf = acc_buffer;
@@ -928,20 +1079,20 @@ static int add_unique_objects(struct commit *commit)

  		strbuf_setlen(acc_buffer, 0);
  		diff_tree_sha1(list->item->tree->object.sha1, commit->tree->object.sha1, "", &opts);
-		qsort(acc_buffer->buf, acc_buffer->len / 21, 21, (int (*)(const void *, const void *))hashcmp);
+		qsort(acc_buffer->buf, acc_buffer->len / 20, 20, (int (*)(const void *, const void *))hashcmp);

  		/* take intersection */
  		if (!is_first) {
-			for (next = i = j = 0; i < os.len; i += 21) {
+			for (next = i = j = 0; i < os.len; i += 20) {
  				while (j < ost.len && hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)) < 0)
-					j += 21;
+					j += 20;

  				if (j >= ost.len || hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)))
  					continue;

  				if (next != i)
-					memcpy(os.buf + next, os.buf + i, 21);
-				next += 21;
+					memcpy(os.buf + next, os.buf + i, 20);
+				next += 20;
  			}

  			if (next != i)
@@ -950,25 +1101,102 @@ static int add_unique_objects(struct commit *commit)
  			is_first = 0;
  	}

+	/* no parents (!) */
  	if (is_first) {
  		acc_buffer = &os;
  		dump_tree(commit->tree, dump_tree_callback);
  	}

-	if (os.len)
-		qsort(os.buf, os.len / 21, 21, sort_type_hash);
-
+	/* the ordering of non-commit objects dosn't really matter, so we're not gonna bother */
  	acc_buffer = orig_buf;
-	for (i = 0; i < os.len; i += 21)
-		add_object_entry((unsigned char *)(os.buf + i), os.buf[i + 20] ? OBJ_TREE : OBJ_BLOB, 0, 0, 0);
+	for (i = 0; i < os.len; i += 20)
+		add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0);

  	/* last but not least, the main tree */
-	add_object_entry(commit->tree->object.sha1, OBJ_TREE, 0, 0, 0);
+	add_object_entry(commit->tree->object.sha1, 0, 0, 0);
+
+	return i / 20 + 1;
+}
+
+static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *index)
+{
+	unsigned char *map = mapping->map;
+	int i = *index, object_nr = 0;
+	struct rc_object_entry *entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
+
+	i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
+	while (i < mapping->size) {
+		int pos = i;

-	strbuf_release(&ost);
-	strbuf_release(&os);
+		entry = RC_OBTAIN_OBJECT_ENTRY(map + i;
+		i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
+
+		if (entry->type == OBJ_COMMIT) {
+			*index = pos;
+			return object_nr;
+		}

-	return i / 21 + 1;
+		strbuf_add(acc_buffer, map + pos, i - pos);
+		object_nr++;
+	}
+
+	*index = 0;
+	return object_nr;
+}
+
+static int add_objects_verbatim(struct rev_cache_info *rci, struct commit *commit)
+{
+	struct rev_cache_slice_map *map;
+	char found = 0;
+	struct rc_index_entry *ie;
+	struct rc_object_entry *entry;
+	int object_nr, i;
+
+	if (!rci->maps)
+		return -1;
+
+	/* check if we can continue where we left off */
+	map = rci->last_map;
+	if (!map)
+		goto search_me;
+
+	i = map->last_index;
+	entry = RC_OBTAIN_OBJECT_ENTRY(map->map + i);
+	if (hashcmp(entry->sha1, commit->object.sha1))
+		goto search_me;
+
+	found = 1;
+
+search_me:
+	if (!found) {
+		ie = search_index(commit->object.sha1);
+		if (!ie || ie->cache_index >= idx_head.cache_nr)
+			return -2;
+
+		map = rci->maps + ie->cache_index;
+		if (!map->size)
+			return -3;
+
+		i = ie->pos;
+		entry = RC_OBTAIN_OBJECT_ENTRY(map->map + i);
+		if (entry->type != OBJ_COMMIT || hashcmp(entry->sha1, commit->object.sha1))
+			return -4;
+	}
+
+	/* can't handle end commits */
+	if (entry->is_end)
+		return -5;
+
+	object_nr = add_objects_verbatim_1(map, &i);
+
+	/* remember this */
+	if (i) {
+		rci->last_map = map;
+		map->last_index = i;
+	} else
+		rci->last_map = 0;
+
+	return object_nr;
  }

  static void init_revcache_directory(void)
@@ -983,9 +1211,14 @@ static void init_revcache_directory(void)

  void init_rev_cache_info(struct rev_cache_info *rci)
  {
+	memset(rci, 0, sizeof(struct rev_cache_info));
+
  	rci->objects = 1;
  	rci->legs = 0;
  	rci->make_index = 1;
+	rci->fuse_me = 0;
+
+	rci->overwrite_all = 0;

  	rci->add_to_pending = 1;

@@ -1065,9 +1298,13 @@ int make_cache_slice(struct rev_cache_info *rci,
  	if (prepare_revision_walk(revs))
  		die("died preparing revision walk");

+	if (rci->legs)
+		make_legs(revs);
+
  	object_nr = total_sz = 0;
  	while ((commit = get_revision(revs)) != 0) {
  		struct rc_object_entry object;
+		int t;

  		strbuf_setlen(&merge_paths, 0);
  		strbuf_setlen(&split_paths, 0);
@@ -1093,12 +1330,17 @@ int make_cache_slice(struct rev_cache_info *rci,

  		commit->indegree = 0;

-		add_object_entry(0, 0, &object, &merge_paths, &split_paths);
+		add_object_entry(0, &object, &merge_paths, &split_paths);
  		object_nr++;

-		/* add all unique children for this commit */
-		if (rci->objects && !object.is_end)
-			object_nr += add_unique_objects(commit);
+		if (rci->objects && !object.is_end) {
+			if (rci->fuse_me && (t = add_objects_verbatim(rci, commit)) >= 0)
+				/* yay!  we did it! */
+				object_nr += t;
+			else
+				/* add all unique children for this commit */
+				object_nr += add_unique_objects(commit);
+		}

  		/* print every ~1MB or so */
  		if (buffer.len > 1000000) {
@@ -1223,6 +1465,8 @@ int make_cache_index(struct rev_cache_info *rci, unsigned char *cache_sha1,
  	unsigned char *map;
  	unsigned long max_date;

+	maybe_fill_with_defaults(rci);
+
  	if (!idx_map)
  		init_index();

@@ -1287,7 +1531,7 @@ int make_cache_index(struct rev_cache_info *rci, unsigned char *cache_sha1,
  		} else
  			disked_entry = search_index_1(object_entry->sha1);

-		if (disked_entry && !object_entry->is_start)
+		if (disked_entry && !object_entry->is_start && !rci->overwrite_all)
  			continue;
  		else if (disked_entry) {
  			/* mmm, pointer arithmetic... tasty */  /* (entry - idx_map = offset, so cast is valid) */
@@ -1341,8 +1585,7 @@ int make_cache_index(struct rev_cache_info *rci, unsigned char *cache_sha1,
  }


-/* add start-commits from each cache slice (uninterestingness will be propogated) */
-void starts_from_slices(struct rev_info *revs, unsigned int flags)
+void starts_from_slices(struct rev_info *revs, unsigned int flags, unsigned char *which, int n)
  {
  	struct commit *commit;
  	int i;
@@ -1358,6 +1601,18 @@ void starts_from_slices(struct rev_info *revs, unsigned int flags)
  		if (!entry->is_start)
  			continue;

+		/* only include entries in 'which' slices */
+		if (n) {
+			int j;
+
+			for (j = 0; j < n; j++)
+				if (!hashcmp(idx_caches + entry->cache_index * 20, which + j * 20))
+					break;
+
+			if (j == n)
+				continue;
+		}
+
  		commit = lookup_commit(entry->sha1);
  		if (!commit)
  			continue;
@@ -1367,3 +1622,313 @@ void starts_from_slices(struct rev_info *revs, unsigned int flags)
  	}

  }
+
+
+struct slice_fd_time {
+	unsigned char sha1[20];
+	int fd;
+	struct stat fi;
+};
+
+int slice_time_sort(const void *a, const void *b)
+{
+	unsigned long at, bt;
+
+	at = ((struct slice_fd_time *)a)->fi.st_ctime;
+	bt = ((struct slice_fd_time *)b)->fi.st_ctime;
+
+	if (at == bt)
+		return 0;
+
+	return at > bt ? 1 : -1;
+}
+
+int regenerate_cache_index(struct rev_cache_info *rci)
+{
+	DIR *dirh;
+	int i;
+	struct slice_fd_time info;
+	struct strbuf slices;
+
+	/* first remove old index if it exists */
+	unlink_or_warn(git_path("rev-cache/index"));
+
+	strbuf_init(&slices, 0);
+
+	dirh = opendir(git_path("rev-cache"));
+	if (dirh) {
+		struct dirent *de;
+		struct stat fi;
+		int fd;
+		unsigned char sha1[20];
+
+		while ((de = readdir(dirh))) {
+			if (de->d_name[0] == '.')
+				continue;
+
+			if (get_sha1_hex(de->d_name, sha1))
+				continue;
+
+			/* open with RDWR because of mmap call in make_cache_index() */
+			fd = open_cache_slice(sha1, O_RDONLY);
+			if (fd < 0 || fstat(fd, &fi)) {
+				warning("bad cache found [%s]; fuse recommended", de->d_name);
+				if (fd > 0)
+					close(fd);
+				continue;
+			}
+
+			hashcpy(info.sha1, sha1);
+			info.fd = fd;
+			memcpy(&info.fi, &fi, sizeof(struct stat));
+
+			strbuf_add(&slices, &info, sizeof(info));
+		}
+
+		closedir(dirh);
+	}
+
+	/* we want oldest first -> upon overlap, older slices are more likely to have a larger section,
+	 * as of the overlapped commit */
+	qsort(slices.buf, slices.len / sizeof(info), sizeof(info), slice_time_sort);
+
+	for (i = 0; i < slices.len; i += sizeof(info)) {
+		struct slice_fd_time *infop = (struct slice_fd_time *)(slices.buf + i);
+		struct stat *fip = &infop->fi;
+		int fd = infop->fd;
+
+		if (make_cache_index(rci, infop->sha1, fd, fip->st_size) < 0)
+			die("error writing cache");
+
+		close(fd);
+	}
+
+	strbuf_release(&slices);
+
+	return 0;
+}
+
+static int add_slices_for_fuse(struct rev_cache_info *rci, struct string_list *files, struct strbuf *ignore)
+{
+	unsigned char sha1[20];
+	char base[PATH_MAX];
+	int baselen, i, slice_nr = 0;
+	struct stat fi;
+	DIR *dirh;
+	struct dirent *de;
+
+	strncpy(base, git_path("rev-cache"), sizeof(base));
+	baselen = strlen(base);
+
+	dirh = opendir(base);
+	if (!dirh)
+		return 0;
+
+	while ((de = readdir(dirh))) {
+		if (de->d_name[0] == '.')
+			continue;
+
+		base[baselen] = '/';
+		strncpy(base + baselen + 1, de->d_name, sizeof(base) - baselen - 1);
+
+		if (get_sha1_hex(de->d_name, sha1)) {
+			/* whatever it is, we don't need it... */
+			string_list_insert(base, files);
+			continue;
+		}
+
+		/* _theoretically_ it is possible a slice < ignore_size to map objects not covered by, yet reachable from,
+		 * a slice >= ignore_size, meaning that we could potentially delete an 'unfused' slice; but if that
+		 * ever *did* happen their cache structure'd be so fucked up they might as well refuse the entire thing.
+		 * and at any rate the worst it'd do is make rev-list revert to standard walking in that (small) bit.
+		 */
+		if (rci->ignore_size) {
+			if (stat(base, &fi))
+				warning("can't query file %s\n", base);
+			else if (fi.st_size >= rci->ignore_size) {
+				strbuf_add(ignore, sha1, 20);
+				continue;
+			}
+		} else {
+			/* check if a pointer */
+			struct cache_slice_pointer ptr;
+			int fd = open(base, O_RDONLY);
+
+			if (fd < 0)
+				goto dont_save;
+			if (sizeof(ptr) != read_in_full(fd, &ptr, sizeof(ptr)))
+				goto dont_save;
+
+			close(fd);
+			if (!strcmp(ptr.signature, "REVCOPTR")) {
+				strbuf_add(ignore, sha1, 20);
+				continue;
+			}
+		}
+
+dont_save:
+		for (i = idx_head.cache_nr - 1; i >= 0; i--) {
+			if (!hashcmp(idx_caches + i * 20, sha1))
+				break;
+		}
+
+		if (i >= 0)
+			rci->maps[i].size = 1;
+
+		string_list_insert(base, files);
+		slice_nr++;
+	}
+
+	closedir(dirh);
+
+	return slice_nr;
+}
+
+/* the most work-intensive attributes in the cache are the unique objects and size, both
+ * of which can be re-used.  although path structures will be isomorphic, path generation is
+ * not particularly expensive, and at any rate we need to re-sort the commits */
+int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
+{
+	unsigned char cache_sha1[20];
+	struct string_list files = {0, 0, 0, 1}; /* dup */
+	struct strbuf ignore;
+	int i;
+
+	maybe_fill_with_defaults(rci);
+
+	if (!idx_map)
+		init_index();
+	if (!idx_map)
+		return -1;
+
+	strbuf_init(&ignore, 0);
+	rci->maps = xcalloc(idx_head.cache_nr, sizeof(struct rev_cache_slice_map));
+	if (add_slices_for_fuse(rci, &files, &ignore) <= 1) {
+		printf("nothing to fuse\n");
+		return 1;
+	}
+
+	if (ignore.len) {
+		starts_from_slices(revs, UNINTERESTING, (unsigned char *)ignore.buf, ignore.len / 20);
+		strbuf_release(&ignore);
+	}
+
+	/* initialize mappings */
+	for (i = idx_head.cache_nr - 1; i >= 0; i--) {
+		struct rev_cache_slice_map *map = rci->maps + i;
+		struct stat fi;
+		int fd;
+
+		if (!map->size)
+			continue;
+		map->size = 0;
+
+		/* pointers are never fused, so we can use open directly */
+		fd = open(git_path("rev-cache/%s", sha1_to_hex(idx_caches + i * 20)), O_RDONLY);
+		if (fd <= 0 || fstat(fd, &fi))
+			continue;
+		if (fi.st_size < sizeof(struct rc_slice_header))
+			continue;
+
+		map->map = xmmap(0, fi.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+		if (map->map == MAP_FAILED)
+			continue;
+
+		close(fd);
+		map->size = fi.st_size;
+	}
+
+	rci->make_index = 0;
+	rci->fuse_me = 1;
+	if (make_cache_slice(rci, revs, 0, 0, cache_sha1) < 0)
+		die("can't make cache slice");
+
+	printf("%s\n", sha1_to_hex(cache_sha1));
+
+	/* clean up time! */
+	for (i = idx_head.cache_nr - 1; i >= 0; i--) {
+		struct rev_cache_slice_map *map = rci->maps + i;
+
+		if (!map->size)
+			continue;
+
+		munmap(map->map, map->size);
+	}
+	free(rci->maps);
+	cleanup_cache_slices();
+
+	for (i = 0; i < files.nr; i++) {
+		char *name = files.items[i].string;
+
+		fprintf(stderr, "removing %s\n", name);
+		unlink_or_warn(name);
+	}
+
+	string_list_clear(&files, 0);
+
+	return regenerate_cache_index(rci);
+}
+
+static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
+{
+	struct rc_slice_header head;
+	int fd, len, retval = -1;
+	unsigned char *map = MAP_FAILED;
+	struct stat fi;
+
+	len = strlen(slice_path);
+	if (len < 40)
+		return -2;
+	if (get_sha1_hex(slice_path + len - 40, sha1))
+		return -3;
+
+	fd = open(slice_path, O_RDONLY);
+	if (fd == -1)
+		goto end;
+	if (fstat(fd, &fi) || fi.st_size < sizeof(head))
+		goto end;
+
+	map = xmmap(0, sizeof(head), PROT_READ, MAP_PRIVATE, fd, 0);
+	if (map == MAP_FAILED)
+		goto end;
+	if (get_cache_slice_header(sha1, map, fi.st_size, &head))
+		goto end;
+
+	retval = 0;
+
+end:
+	if (map != MAP_FAILED)
+		munmap(map, sizeof(head));
+	if (fd > 0)
+		close(fd);
+
+	return retval;
+}
+
+int make_cache_slice_pointer(struct rev_cache_info *rci, const char *slice_path)
+{
+	struct cache_slice_pointer ptr;
+	int fd;
+	unsigned char sha1[20];
+
+	maybe_fill_with_defaults(rci);
+	rci->overwrite_all = 1;
+
+	if (verify_cache_slice(slice_path, sha1) < 0)
+		return -1;
+
+	strcpy(ptr.signature, "REVCOPTR");
+	ptr.version = SUPPORTED_REVCOPTR_VERSION;
+	strcpy(ptr.path, make_nonrelative_path(slice_path));
+
+	fd = open(git_path("rev-cache/%s", sha1_to_hex(sha1)), O_RDWR | O_CREAT | O_TRUNC, 0666);
+	if (fd < 0)
+		return -2;
+
+	write_in_full(fd, &ptr, sizeof(ptr));
+	make_cache_index(rci, sha1, fd, sizeof(ptr));
+
+	close(fd);
+
+	return 0;
+}
diff --git a/rev-cache.h b/rev-cache.h
index a76dc53..a1af337 100644
--- a/rev-cache.h
+++ b/rev-cache.h
@@ -3,6 +3,7 @@

  #define SUPPORTED_REVCACHE_VERSION 		1
  #define SUPPORTED_REVINDEX_VERSION		1
+#define SUPPORTED_REVCOPTR_VERSION		1

  #define RC_PATH_SIZE(x)	(sizeof(uint16_t) * (x))

@@ -10,6 +11,7 @@
  #define RC_OBTAIN_INDEX_ENTRY(p)			from_disked_rc_index_entry((struct rc_index_entry_ondisk *)(p), 0)

  #define RC_ACTUAL_OBJECT_ENTRY_SIZE(e)		(sizeof(struct rc_object_entry_ondisk) + RC_PATH_SIZE((e)->merge_nr + (e)->split_nr) + (e)->size_size)
+#define RC_ENTRY_SIZE_OFFSET(e)				(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->size_size)

  /* single index maps objects to cache files */
  struct rc_index_header {
@@ -90,6 +92,7 @@ struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondis
  struct rc_object_entry_ondisk *to_disked_rc_object_entry(struct rc_object_entry *src, struct rc_object_entry_ondisk *dst);

  extern unsigned char *get_cache_slice(struct commit *commit);
+extern int open_cache_slice(unsigned char *sha1, int flags);
  extern int traverse_cache_slice(struct rev_info *revs,
  	unsigned char *cache_sha1, struct commit *commit,
  	unsigned long *date_so_far, int *slop_so_far,
@@ -102,6 +105,10 @@ extern int make_cache_slice(struct rev_cache_info *rci,
  extern int make_cache_index(struct rev_cache_info *rci, unsigned char *cache_sha1,
  	int fd, unsigned int size);

-extern void starts_from_slices(struct rev_info *revs, unsigned int flags);
+extern void starts_from_slices(struct rev_info *revs, unsigned int flags, unsigned char *which, int n);
+extern int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs);
+extern int regenerate_cache_index(struct rev_cache_info *rci);
+extern int make_cache_slice_pointer(struct rev_cache_info *rci, const char *slice_path);

  #endif
+
diff --git a/revision.h b/revision.h
index 7db4b9e..cc5c259 100644
--- a/revision.h
+++ b/revision.h
@@ -22,17 +22,31 @@
  struct rev_info;
  struct log_info;

+struct rev_cache_slice_map {
+	unsigned char *map;
+	int size;
+	int last_index;
+};
+
  struct rev_cache_info {
  	/* generation flags */
  	unsigned objects : 1,
  		legs : 1,
-		make_index : 1;
+		make_index : 1,
+		fuse_me : 1;
+
+	/* index inclusion */
+	unsigned overwrite_all : 1;

  	/* traversal flags */
  	unsigned add_to_pending : 1;

  	/* fuse options */
  	unsigned int ignore_size;
+
+	/* reserved */
+	struct rev_cache_slice_map *maps,
+		*last_map;
  };

  struct rev_info {
-- 
tg: (5021136..) t/revcache/misc (depends on: t/revcache/objects)

^ permalink raw reply related

* Re: [PATCH 6/6 (v4)] support for path name caching in rev-cache
From: Nick Edelen @ 2009-10-02 22:12 UTC (permalink / raw)
  To: Nick Edelen, Junio C Hamano, Nicolas Pitre, Johannes Schindelin,
	Sam Vilain
In-Reply-To: <op.uyuwkwjotdk399@sirnot.private>

An update to caching mechanism, allowing path names to be cached for blob and
tree objects.  A list of names appearing in each cache slice is appended to the
end of the slice, which is referenced by variable-sized indexes per entry.
This allows pack-objects to more intelligently schedule unpacked/poorly packed
object, and enables proper duplication of rev-list's behaivor.

Signed-off-by: Nick Edelen <sirnot@gmail.com>

---
  builtin-rev-cache.c       |    3 +-
  rev-cache.c               |  332 +++++++++++++++++++++++++++++++++++++--------
  rev-cache.h               |   16 ++-
  revision.h                |    6 +-
  t/t6017-rev-cache-list.sh |    8 +-
  5 files changed, 300 insertions(+), 65 deletions(-)

diff --git a/builtin-rev-cache.c b/builtin-rev-cache.c
index 8f41123..4c1766d 100644
--- a/builtin-rev-cache.c
+++ b/builtin-rev-cache.c
@@ -177,13 +177,14 @@ static int handle_walk(int argc, const char *argv[])
  	fprintf(stderr, "pending:\n");
  	for (i = 0; i < revs.pending.nr; i++) {
  		struct object *obj = revs.pending.objects[i].item;
+		const char *name = revs.pending.objects[i].name;

  		/* unfortunately, despite our careful generation, object duplication *is* a possibility...
  		 * (eg. same object introduced into two different branches) */
  		if (obj->flags & SEEN)
  			continue;

-		printf("%s\n", sha1_to_hex(revs.pending.objects[i].item->sha1));
+		printf("%s %s\n", sha1_to_hex(revs.pending.objects[i].item->sha1), name);
  		obj->flags |= SEEN;
  	}

diff --git a/rev-cache.c b/rev-cache.c
index 4ef5287..6c96297 100644
--- a/rev-cache.c
+++ b/rev-cache.c
@@ -17,6 +17,14 @@ struct bad_slice {
  	struct bad_slice *next;
  };

+struct name_list {
+	unsigned char sha1[20];
+	unsigned int len;
+	struct name_list *next;
+
+	char buf[FLEX_ARRAY];
+};
+
  struct cache_slice_pointer {
  	char signature[8]; /* REVCOPTR */
  	char version;
@@ -29,10 +37,13 @@ static uint32_t fanout[0xff + 2];
  static unsigned char *idx_map;
  static int idx_size;
  static struct rc_index_header idx_head;
-static char no_idx, add_to_pending;
-static struct bad_slice *bad_slices;
+static char no_idx, add_to_pending, add_names;
  static unsigned char *idx_caches;

+static struct bad_slice *bad_slices;
+static struct name_list *name_lists, *cur_name_list;
+
+static struct strbuf *acc_name_buffer;
  static struct strbuf *acc_buffer;

  #define SLOP			5
@@ -79,7 +90,7 @@ struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondis
  	if (!dst)
  		dst = &entry[cur++ & 0x3];

-	dst->type = src->flags >> 5;
+	dst->type = src->flags >> 5 & 0x03;
  	dst->is_end = !!(src->flags & 0x10);
  	dst->is_start = !!(src->flags & 0x08);
  	dst->uninteresting = !!(src->flags & 0x04);
@@ -90,8 +101,9 @@ struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondis
  	dst->merge_nr = src->merge_nr;
  	dst->split_nr = src->split_nr;

-	dst->size_size = src->sizes >> 5;
-	dst->padding = src->sizes & 0x1f;
+	dst->size_size = src->sizes >> 5 & 0x03;
+	dst->name_size = src->sizes >> 2 & 0x03;
+	dst->padding = src->sizes & 0x02;

  	dst->date = ntohl(src->date);
  	dst->path = ntohs(src->path);
@@ -120,6 +132,7 @@ struct rc_object_entry_ondisk *to_disked_rc_object_entry(struct rc_object_entry
  	dst->split_nr = src->split_nr;

  	dst->sizes  = (unsigned char)src->size_size << 5;
+	dst->sizes |= (unsigned char)src->name_size << 2;
  	dst->sizes |= (unsigned char)src->padding;

  	dst->date = htonl(src->date);
@@ -192,6 +205,12 @@ static void cleanup_cache_slices(void)
  		idx_map = 0;
  	}

+	while (name_lists) {
+		struct name_list *nl = name_lists->next;
+		free(name_lists);
+		name_lists = nl;
+	}
+
  }

  static int init_index(void)
@@ -324,7 +343,7 @@ static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsig
  	struct blob *blob;
  	struct tree *tree;
  	struct object *obj;
-	unsigned long size;
+	unsigned long size, name_index;

  	size = decode_size(ptr + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
  	switch (entry->type) {
@@ -357,9 +376,23 @@ static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsig
  		return;
  	}

+	if (add_names && cur_name_list) {
+		name_index = decode_size(ptr + RC_ENTRY_NAME_OFFSET(entry), entry->name_size);
+
+		if (name_index >= cur_name_list->len)
+			name_index = 0;
+	} else
+		name_index = 0;
+
  	obj->flags |= FACE_VALUE;
-	if (add_to_pending)
-		add_pending_object(revs, obj, "");
+	if (add_to_pending) {
+		char *name = "";
+
+		if (name_index)
+			name = cur_name_list->buf + name_index;
+
+		add_pending_object(revs, obj, name);
+	}
  }

  static int setup_traversal(struct rc_slice_header *head, unsigned char *map, struct commit *commit, struct commit_list **work,
@@ -713,15 +746,44 @@ end:
  	return retval;
  }

-static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map, int len, struct rc_slice_header *head)
+static struct name_list *get_cache_slice_name_list(struct rc_slice_header *head, int fd)
+{
+	struct name_list *nl = name_lists;
+
+	while (nl) {
+		if (!hashcmp(nl->sha1, head->sha1))
+			break;
+		nl = nl->next;
+	}
+
+	if (nl)
+		return nl;
+
+	nl = xcalloc(1, sizeof(struct name_list) + head->name_size);
+	nl->len = head->name_size;
+	hashcpy(nl->sha1, head->sha1);
+
+	lseek(fd, head->size, SEEK_SET);
+	read_in_full(fd, nl->buf, head->name_size);
+
+	nl->next = name_lists;
+	name_lists = nl;
+
+	return nl;
+}
+
+static int get_cache_slice_header(int fd, unsigned char *cache_sha1, int len, struct rc_slice_header *head)
  {
  	int t;

-	memcpy(head, map, sizeof(struct rc_slice_header));
+	if (xread(fd, head, sizeof(struct rc_slice_header)) != sizeof(struct rc_slice_header))
+		return -1;
+
  	head->ofs_objects = ntohl(head->ofs_objects);
  	head->object_nr = ntohl(head->object_nr);
  	head->size = ntohl(head->size);
  	head->path_nr = ntohs(head->path_nr);
+	head->name_size = ntohl(head->name_size);

  	if (memcmp(head->signature, "REVCACHE", 8))
  		return -1;
@@ -730,10 +792,10 @@ static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map,
  	if (hashcmp(head->sha1, cache_sha1))
  		return -3;
  	t = sizeof(struct rc_slice_header);
-	if (t != head->ofs_objects || t >= len)
+	if (t != head->ofs_objects)
  		return -4;
-
-	head->size = len;
+	if (head->size + head->name_size != len)
+		return -5;

  	return 0;
  }
@@ -785,7 +847,7 @@ int traverse_cache_slice(struct rev_info *revs,
  	unsigned long *date_so_far, int *slop_so_far,
  	struct commit_list ***queue, struct commit_list **work)
  {
-	int fd = -1, retval = -3;
+	int fd = -1, t, retval;
  	struct stat fi;
  	struct rc_slice_header head;
  	struct rev_cache_info *rci;
@@ -801,26 +863,31 @@ int traverse_cache_slice(struct rev_info *revs,
  	/* load options */
  	rci = &revs->rev_cache_info;
  	add_to_pending = rci->add_to_pending;
+	add_names = rci->add_names;

  	memset(&head, 0, sizeof(struct rc_slice_header));
+#	define ERROR(x)		do { retval = (x); goto end; } while (0);

  	fd = open_cache_slice(cache_sha1, O_RDONLY);
  	if (fd == -1)
-		goto end;
+		ERROR(-1);
  	if (fstat(fd, &fi) || fi.st_size < sizeof(struct rc_slice_header))
-		goto end;
+		ERROR(-2);
+
+	if ((t = get_cache_slice_header(fd, cache_sha1, fi.st_size, &head)) < 0)
+		ERROR(-t);
+	if (add_names)
+		cur_name_list = get_cache_slice_name_list(&head, fd);

-	map = xmmap(0, fi.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	map = xmmap(0, head.size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
  	if (map == MAP_FAILED)
-		goto end;
-	if (get_cache_slice_header(cache_sha1, map, fi.st_size, &head))
-		goto end;
+		ERROR(-3);

  	retval = traverse_cache_slice_1(&head, map, revs, commit, date_so_far, slop_so_far, queue, work);

  end:
  	if (map != MAP_FAILED)
-		munmap(map, fi.st_size);
+		munmap(map, head.size);
  	if (fd != -1)
  		close(fd);

@@ -828,6 +895,7 @@ end:
  	if (retval)
  		mark_bad_slice(cache_sha1);

+#	undef ERROR
  	return retval;
  }

@@ -1112,23 +1180,110 @@ static unsigned long decode_size(unsigned char *str, int len)
  	return size;
  }

+
+#define NL_HASH_TABLE_SIZE		(0xffff + 1)
+#define NL_HASH_NUMBER			(NL_HASH_TABLE_SIZE >> 3)
+
+struct name_list_hash {
+	int ind;
+	struct name_list_hash *next;
+};
+
+static struct name_list_hash **nl_hash_table;
+static unsigned char *nl_hashes;
+
+/* FNV-1a hash */
+static unsigned int hash_name(const char *name)
+{
+	unsigned int hash = 2166136261ul;
+	const char *p = name;
+
+	while (*p) {
+		hash ^= *p++;
+		hash *= 16777619ul;
+	}
+
+	return hash & 0xffff;
+}
+
+static int name_in_list(const char *name)
+{
+	unsigned int h = hash_name(name);
+	struct name_list_hash *entry = nl_hash_table[h];
+
+	while (entry && strcmp(acc_name_buffer->buf + entry->ind, name))
+		entry = entry->next;
+
+	if (entry)
+		return entry->ind;
+
+	/* add name to buffer and create hash reference */
+	entry = xcalloc(1, sizeof(struct name_list_hash));
+	entry->ind = acc_name_buffer->len;
+	strbuf_add(acc_name_buffer, name, strlen(name) + 1);
+
+	entry->next = nl_hash_table[h];
+	nl_hash_table[h] = entry;
+
+	nl_hashes[h / 8] |= h % 8;
+
+	return entry->ind;
+}
+
+static void init_name_list_hash(void)
+{
+	nl_hash_table = xcalloc(NL_HASH_TABLE_SIZE, sizeof(struct name_list_hash));
+	nl_hashes = xcalloc(NL_HASH_NUMBER, 1);
+}
+
+static void cleanup_name_list_hash(void)
+{
+	int i;
+
+	for (i = 0; i < NL_HASH_NUMBER; i++) {
+		int j, ind = nl_hashes[i];
+
+		if (!ind)
+			continue;
+
+		for (j = 0; j < 8; j++) {
+			struct name_list_hash **entryp;
+
+			if (!(ind & 1 << j))
+				continue;
+
+			entryp = &nl_hash_table[i * 8 + j];
+			while (*entryp) {
+				struct name_list_hash *t = (*entryp)->next;
+
+				free(*entryp);
+				*entryp = t;
+			}
+		}
+	} /* code overhang! */
+
+	free(nl_hashes);
+	free(nl_hash_table);
+}
+
  static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *entryp,
-	struct strbuf *merge_str, struct strbuf *split_str)
+	struct strbuf *merge_str, struct strbuf *split_str, char *name, unsigned long size)
  {
  	struct rc_object_entry entry;
-	unsigned char size_str[7];
-	unsigned long size;
+	unsigned char size_str[7], name_str[7];
  	enum object_type type;
  	void *data;

  	if (entryp)
  		sha1 = entryp->sha1;

-	/* retrieve size data */
-	data = read_sha1_file(sha1, &type, &size);
+	if (!size) {
+		/* retrieve size data */
+		data = read_sha1_file(sha1, &type, &size);

-	if (data)
-		free(data);
+		if (data)
+			free(data);
+	}

  	/* initialize! */
  	if (!entryp) {
@@ -1146,6 +1301,9 @@ static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *

  	entryp->size_size = encode_size(size, size_str);

+	if (name)
+		entryp->name_size = encode_size(name_in_list(name), name_str);
+
  	/* write the muvabitch */
  	strbuf_add(acc_buffer, to_disked_rc_object_entry(entryp, 0), sizeof(struct rc_object_entry_ondisk));

@@ -1155,25 +1313,36 @@ static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *
  		strbuf_add(acc_buffer, split_str->buf, split_str->len);

  	strbuf_add(acc_buffer, size_str, entryp->size_size);
+
+	if (name)
+		strbuf_add(acc_buffer, name_str, entryp->name_size);
  }

  /* returns non-zero to continue parsing, 0 to skip */
  typedef int (*dump_tree_fn)(const unsigned char *, const char *, unsigned int); /* sha1, path, mode */

  /* we need to walk the trees by hash, so unfortunately we can't use traverse_trees in tree-walk.c */
-static int dump_tree(struct tree *tree, dump_tree_fn fn)
+static int dump_tree(struct tree *tree, dump_tree_fn fn, char *base)
  {
  	struct tree_desc desc;
  	struct name_entry entry;
  	struct tree *subtree;
-	int r;
+	char concatpath[PATH_MAX];
+	int r, baselen;

  	if (parse_tree(tree))
  		return -1;

+	baselen = strlen(base);
+	strcpy(concatpath, base);
+
  	init_tree_desc(&desc, tree->buffer, tree->size);
  	while (tree_entry(&desc, &entry)) {
-		switch (fn(entry.sha1, entry.path, entry.mode)) {
+		if (baselen + strlen(entry.path) + 1 >= PATH_MAX)
+			die("we have a problem: %s%s is too big for me to handle", base, entry.path);
+		strcpy(concatpath + baselen, entry.path);
+
+		switch (fn(entry.sha1, concatpath, entry.mode)) {
  		case 0:
  			goto continue_loop;
  		default:
@@ -1185,7 +1354,8 @@ static int dump_tree(struct tree *tree, dump_tree_fn fn)
  			if (!subtree)
  				return -2;

-			if ((r = dump_tree(subtree, fn)) < 0)
+			strcat(concatpath, "/");
+			if ((r = dump_tree(subtree, fn, concatpath)) < 0)
  				return r;
  		}

@@ -1199,6 +1369,9 @@ continue_loop:
  static int dump_tree_callback(const unsigned char *sha1, const char *path, unsigned int mode)
  {
  	strbuf_add(acc_buffer, sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, path, strlen(path) + 1);

  	return 1;
  }
@@ -1212,6 +1385,9 @@ static void tree_addremove(struct diff_options *options,
  		return;

  	strbuf_add(acc_buffer, sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, concatpath, strlen(concatpath) + 1);
  }

  static void tree_change(struct diff_options *options,
@@ -1224,12 +1400,15 @@ static void tree_change(struct diff_options *options,
  		return;

  	strbuf_add(acc_buffer, new_sha1, 20);
+	strbuf_add(acc_buffer, (char *)&acc_name_buffer->len, sizeof(size_t));
+
+	strbuf_add(acc_name_buffer, concatpath, strlen(concatpath) + 1);
  }

  static int add_unique_objects(struct commit *commit)
  {
  	struct commit_list *list;
-	struct strbuf os, ost, *orig_buf;
+	struct strbuf os, ost, names, *orig_name_buf, *orig_buf;
  	struct diff_options opts;
  	int i, j, next;
  	char is_first = 1;
@@ -1237,13 +1416,17 @@ static int add_unique_objects(struct commit *commit)
  	/* ...no, calculate unique objects */
  	strbuf_init(&os, 0);
  	strbuf_init(&ost, 0);
+	strbuf_init(&names, 0);
  	orig_buf = acc_buffer;
+	orig_name_buf = acc_name_buffer;
+	acc_name_buffer = &names;

  	diff_setup(&opts);
  	DIFF_OPT_SET(&opts, RECURSIVE);
  	DIFF_OPT_SET(&opts, TREE_IN_RECURSIVE);
  	opts.change = tree_change;
  	opts.add_remove = tree_addremove;
+#	define ENTRY_SIZE (20 + sizeof(size_t))

  	/* this is only called for non-ends (ie. all parents interesting) */
  	for (list = commit->parents; list; list = list->next) {
@@ -1254,20 +1437,20 @@ static int add_unique_objects(struct commit *commit)

  		strbuf_setlen(acc_buffer, 0);
  		diff_tree_sha1(list->item->tree->object.sha1, commit->tree->object.sha1, "", &opts);
-		qsort(acc_buffer->buf, acc_buffer->len / 20, 20, (int (*)(const void *, const void *))hashcmp);
+		qsort(acc_buffer->buf, acc_buffer->len / ENTRY_SIZE, ENTRY_SIZE, (int (*)(const void *, const void *))hashcmp);

  		/* take intersection */
  		if (!is_first) {
-			for (next = i = j = 0; i < os.len; i += 20) {
+			for (next = i = j = 0; i < os.len; i += ENTRY_SIZE) {
  				while (j < ost.len && hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)) < 0)
-					j += 20;
+					j += ENTRY_SIZE;

  				if (j >= ost.len || hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)))
  					continue;

  				if (next != i)
-					memcpy(os.buf + next, os.buf + i, 20);
-				next += 20;
+					memcpy(os.buf + next, os.buf + i, ENTRY_SIZE);
+				next += ENTRY_SIZE;
  			}

  			if (next != i)
@@ -1279,29 +1462,37 @@ static int add_unique_objects(struct commit *commit)
  	/* no parents (!) */
  	if (is_first) {
  		acc_buffer = &os;
-		dump_tree(commit->tree, dump_tree_callback);
+		dump_tree(commit->tree, dump_tree_callback, "");
  	}

  	/* the ordering of non-commit objects dosn't really matter, so we're not gonna bother */
  	acc_buffer = orig_buf;
-	for (i = 0; i < os.len; i += 20)
-		add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0);
+	acc_name_buffer = orig_name_buf;
+	for (i = 0; i < os.len; i += ENTRY_SIZE)
+		add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0, names.buf + *(size_t *)(os.buf + i + 20), 0);

  	/* last but not least, the main tree */
-	add_object_entry(commit->tree->object.sha1, 0, 0, 0);
+	add_object_entry(commit->tree->object.sha1, 0, 0, 0, 0, 0);

-	return i / 20 + 1;
+	strbuf_release(&ost);
+	strbuf_release(&os);
+	strbuf_release(&names);
+
+	return i / ENTRY_SIZE + 1;
+#	undef ENTRY_SIZE
  }

  static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *index)
  {
-	unsigned char *map = mapping->map;
  	int i = *index, object_nr = 0;
+	unsigned char *map = mapping->map;
  	struct rc_object_entry *entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
+	unsigned long size;

  	i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
  	while (i < mapping->size) {
-		int pos = i;
+		char *name;
+		int name_index, pos = i;

  		entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
  		i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
@@ -1311,7 +1502,15 @@ static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *inde
  			return object_nr;
  		}

-		strbuf_add(acc_buffer, map + pos, i - pos);
+		name_index = decode_size(map + pos + RC_ENTRY_NAME_OFFSET(entry), entry->name_size);
+		if (name_index && name_index < mapping->name_size)
+			name = mapping->names + name_index;
+		else
+			name = 0;
+
+		size = decode_size(map + pos + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
+
+		add_object_entry(0, entry, 0, 0, name, size);
  		object_nr++;
  	}

@@ -1396,6 +1595,7 @@ void init_rev_cache_info(struct rev_cache_info *rci)
  	rci->overwrite_all = 0;

  	rci->add_to_pending = 1;
+	rci->add_names = 1;

  	rci->ignore_size = 0;
  }
@@ -1420,9 +1620,9 @@ int make_cache_slice(struct rev_cache_info *rci,
  	struct rc_slice_header head;
  	struct commit *commit;
  	unsigned char sha1[20];
-	struct strbuf merge_paths, split_paths;
+	struct strbuf merge_paths, split_paths, namelist;
  	int object_nr, total_sz, fd;
-	char file[PATH_MAX], *newfile;
+	char file[PATH_MAX], null, *newfile;
  	struct rev_cache_info *trci;
  	git_SHA_CTX ctx;

@@ -1437,7 +1637,13 @@ int make_cache_slice(struct rev_cache_info *rci,
  	strbuf_init(&endlist, 0);
  	strbuf_init(&merge_paths, 0);
  	strbuf_init(&split_paths, 0);
+	strbuf_init(&namelist, 0);
  	acc_buffer = &buffer;
+	acc_name_buffer = &namelist;
+
+	null = 0;
+	strbuf_add(&namelist, &null, 1);
+	init_name_list_hash();

  	if (!revs) {
  		revs = &therevs;
@@ -1468,6 +1674,7 @@ int make_cache_slice(struct rev_cache_info *rci,
  	trci = &revs->rev_cache_info;
  	init_rev_cache_info(trci);
  	trci->add_to_pending = 0;
+	trci->add_names = 0;

  	setup_revisions(0, 0, revs, 0);
  	if (prepare_revision_walk(revs))
@@ -1505,7 +1712,7 @@ int make_cache_slice(struct rev_cache_info *rci,

  		commit->indegree = 0;

-		add_object_entry(0, &object, &merge_paths, &split_paths);
+		add_object_entry(0, &object, &merge_paths, &split_paths, 0, 0);
  		object_nr++;

  		if (rci->objects && !object.is_end) {
@@ -1531,10 +1738,16 @@ int make_cache_slice(struct rev_cache_info *rci,
  		total_sz += buffer.len;
  	}

+	/* write path name lookup list */
+	head.name_size = htonl(namelist.len);
+	write_in_full(fd, namelist.buf, namelist.len);
+
  	/* go ahead a free some stuff... */
  	strbuf_release(&buffer);
  	strbuf_release(&merge_paths);
  	strbuf_release(&split_paths);
+	strbuf_release(&namelist);
+	cleanup_name_list_hash();
  	if (path_sz)
  		free(paths);
  	while (path_track_alloc)
@@ -1992,6 +2205,7 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
  	for (i = idx_head.cache_nr - 1; i >= 0; i--) {
  		struct rev_cache_slice_map *map = rci->maps + i;
  		struct stat fi;
+		struct rc_slice_header head;
  		int fd;

  		if (!map->size)
@@ -2004,13 +2218,20 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
  			continue;
  		if (fi.st_size < sizeof(struct rc_slice_header))
  			continue;
+		if (get_cache_slice_header(fd, idx_caches + i * 20, fi.st_size, &head))
+			continue;

-		map->map = xmmap(0, fi.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+		map->map = xmmap(0, head.size, PROT_READ, MAP_PRIVATE, fd, 0);
  		if (map->map == MAP_FAILED)
  			continue;

+		lseek(fd, head.size, SEEK_SET);
+		map->names = xcalloc(head.name_size, 1);
+		read_in_full(fd, map->names, head.name_size);
+
  		close(fd);
-		map->size = fi.st_size;
+		map->size = head.size;
+		map->name_size = head.name_size;
  	}

  	rci->make_index = 0;
@@ -2027,6 +2248,7 @@ int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
  		if (!map->size)
  			continue;

+		free(map->names);
  		munmap(map->map, map->size);
  	}
  	free(rci->maps);
@@ -2048,7 +2270,6 @@ static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
  {
  	struct rc_slice_header head;
  	int fd, len, retval = -1;
-	unsigned char *map = MAP_FAILED;
  	struct stat fi;

  	len = strlen(slice_path);
@@ -2063,17 +2284,12 @@ static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
  	if (fstat(fd, &fi) || fi.st_size < sizeof(head))
  		goto end;

-	map = xmmap(0, sizeof(head), PROT_READ, MAP_PRIVATE, fd, 0);
-	if (map == MAP_FAILED)
-		goto end;
-	if (get_cache_slice_header(sha1, map, fi.st_size, &head))
+	if (get_cache_slice_header(fd, sha1, fi.st_size, &head))
  		goto end;

  	retval = 0;

  end:
-	if (map != MAP_FAILED)
-		munmap(map, sizeof(head));
  	if (fd > 0)
  		close(fd);

diff --git a/rev-cache.h b/rev-cache.h
index a1af337..0fa9b44 100644
--- a/rev-cache.h
+++ b/rev-cache.h
@@ -10,8 +10,14 @@
  #define RC_OBTAIN_OBJECT_ENTRY(p)			from_disked_rc_object_entry((struct rc_object_entry_ondisk *)(p), 0)
  #define RC_OBTAIN_INDEX_ENTRY(p)			from_disked_rc_index_entry((struct rc_index_entry_ondisk *)(p), 0)

-#define RC_ACTUAL_OBJECT_ENTRY_SIZE(e)		(sizeof(struct rc_object_entry_ondisk) + RC_PATH_SIZE((e)->merge_nr + (e)->split_nr) + (e)->size_size)
-#define RC_ENTRY_SIZE_OFFSET(e)				(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->size_size)
+#define RC_ACTUAL_OBJECT_ENTRY_SIZE(e)	(\
+	sizeof(struct rc_object_entry_ondisk) + \
+	RC_PATH_SIZE((e)->merge_nr + (e)->split_nr) + \
+	(e)->size_size + \
+	(e)->name_size\
+)
+#define RC_ENTRY_SIZE_OFFSET(e)			(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->name_size - (e)->size_size)
+#define RC_ENTRY_NAME_OFFSET(e)			(RC_ACTUAL_OBJECT_ENTRY_SIZE(e) - (e)->name_size)

  /* single index maps objects to cache files */
  struct rc_index_header {
@@ -50,6 +56,8 @@ struct rc_slice_header {
  	uint32_t size;

  	unsigned char sha1[20];
+
+	uint32_t name_size;
  };

  struct rc_object_entry_ondisk {
@@ -76,7 +84,8 @@ struct rc_object_entry {
  	unsigned char merge_nr; /* : 7 */
  	unsigned char split_nr; /* : 7 */
  	unsigned size_size:3;
-	unsigned padding:5;
+	unsigned name_size:3;
+	unsigned padding:2;

  	uint32_t date;
  	uint16_t path;
@@ -84,6 +93,7 @@ struct rc_object_entry {
  	/* merge paths */
  	/* split paths */
  	/* size */
+	/* name id */
  };

  struct rc_index_entry *from_disked_rc_index_entry(struct rc_index_entry_ondisk *src, struct rc_index_entry *dst);
diff --git a/revision.h b/revision.h
index cc5c259..c62e85b 100644
--- a/revision.h
+++ b/revision.h
@@ -26,6 +26,9 @@ struct rev_cache_slice_map {
  	unsigned char *map;
  	int size;
  	int last_index;
+
+	char *names;
+	int name_size;
  };

  struct rev_cache_info {
@@ -39,7 +42,8 @@ struct rev_cache_info {
  	unsigned overwrite_all : 1;

  	/* traversal flags */
-	unsigned add_to_pending : 1;
+	unsigned add_to_pending : 1,
+		add_names : 1;

  	/* fuse options */
  	unsigned int ignore_size;
diff --git a/t/t6017-rev-cache-list.sh b/t/t6017-rev-cache-list.sh
index 982fb15..f0f3bcf 100755
--- a/t/t6017-rev-cache-list.sh
+++ b/t/t6017-rev-cache-list.sh
@@ -4,8 +4,10 @@ test_description='git rev-cache tests'
  . ./test-lib.sh

  test_cmp_sorted() {
-	grep -io "[a-f0-9]*" $1 | sort >.tmpfile1 &&
-	grep -io "[a-f0-9]*" $2 | sort >.tmpfile2 &&
+# note that we're tip-toeing around the corner case of two objects/names
+# for the same SHA-1 => discrepencies between cached and non-cached walks
+	sort $1 >.tmpfile1 &&
+	sort $2 >.tmpfile2 &&
  	test_cmp .tmpfile1 .tmpfile2
  }

@@ -15,6 +17,8 @@ test_cmp_sorted() {
  # reuse
  test_expect_success 'init repo' '
  	echo bla >file &&
+	mkdir amaindir &&
+	echo watskeburt >amaindir/file &&
  	git add . &&
  	git commit -m "bla" &&

-- 
tg: (2b7d538..) t/revcache/names (depends on: t/revcache/docs)

^ permalink raw reply related

* Re: [PATCH 5/6 (v4)] full integration of rev-cache into git, completed test suite
From: Nick Edelen @ 2009-10-02 22:12 UTC (permalink / raw)
  To: Nick Edelen, Junio C Hamano, Nicolas Pitre, Johannes Schindelin,
	Sam Vilain
In-Reply-To: <op.uyuwkuoxtdk399@sirnot.private>

This patch provides a working integration of rev-cache into the revision
walker, along with some touch-ups:
  - integration into revision walker and list-objects
  - refactor object generation for more coherent code structure
  - more fluid handling of damaged cache slices (remembering bad slices)
  - numerous tests for both features from the previous patch, and the
integration's integrity

'Integration' is rather broad -- a more detailed description follows for each
aspect:
  - rev-cache
the traversal mechanism is updated to handle many of the non-prune options
rev-list does (date limiting, slop-handling, etc.), and is adjusted to allow
for non-fatal cache-traversal failures.

  - revision walker
both limited and unlimited traversal attempt to use the cache when possible,
smoothly falling back if it's not.

  - list-objects
object listing does not recurse into cached trees, and has been adjusted to
guarantee commit-tag-tree-blob ordering.

Signed-off-by: Nick Edelen <sirnot@gmail.com>

---
tweak test for compatability.

  builtin-rev-cache.c       |   40 ++++++++
  list-objects.c            |   46 ++++++++-
  rev-cache.c               |  231 +++++++++++++++++++++++++++++++++++++++------
  revision.c                |   88 ++++++++++++++---
  t/t6017-rev-cache-list.sh |  151 ++++++++++++++++++++++++++++-
  5 files changed, 501 insertions(+), 55 deletions(-)

diff --git a/builtin-rev-cache.c b/builtin-rev-cache.c
index b894c54..8f41123 100644
--- a/builtin-rev-cache.c
+++ b/builtin-rev-cache.c
@@ -4,6 +4,7 @@
  #include "diff.h"
  #include "revision.h"
  #include "rev-cache.h"
+#include "list-objects.h"

  unsigned long default_ignore_size = 50 * 1024 * 1024; /* 50mb */

@@ -78,6 +79,43 @@ static int handle_add(int argc, const char *argv[]) /* args beyond this command
  	return 0;
  }

+static void show_commit(struct commit *commit, void *data)
+{
+	printf("%s\n", sha1_to_hex(commit->object.sha1));
+}
+
+static void show_object(struct object *obj, const struct name_path *path, const char *last)
+{
+	printf("%s\n", sha1_to_hex(obj->sha1));
+}
+
+static int test_rev_list(int argc, const char *argv[])
+{
+	struct rev_info revs;
+	unsigned int flags = 0;
+	int i;
+
+	init_revisions(&revs, 0);
+
+	for (i = 0; i < argc; i++) {
+		if (!strcmp(argv[i], "--not"))
+			flags ^= UNINTERESTING;
+		else if (!strcmp(argv[i], "--objects"))
+			revs.tree_objects = revs.blob_objects = 1;
+		else
+			handle_revision_arg(argv[i], &revs, flags, 1);
+	}
+
+	setup_revisions(0, 0, &revs, 0);
+	revs.topo_order = 1;
+	revs.lifo = 1;
+	prepare_revision_walk(&revs);
+
+	traverse_commit_list(&revs, show_commit, show_object, 0);
+
+	return 0;
+}
+
  static int handle_walk(int argc, const char *argv[])
  {
  	struct commit *commit;
@@ -271,6 +309,8 @@ int cmd_rev_cache(int argc, const char *argv[], const char *prefix)
  		r = handle_walk(argc, argv);
  	else if (!strcmp(arg, "index"))
  		r = handle_index(argc, argv);
+	else if (!strcmp(arg, "test"))
+		r = test_rev_list(argc, argv);
  	else if (!strcmp(arg, "alt"))
  		r = handle_alt(argc, argv);
  	else
diff --git a/list-objects.c b/list-objects.c
index 8953548..b8c3370 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -74,22 +74,34 @@ static void process_tree(struct rev_info *revs,
  		die("bad tree object");
  	if (obj->flags & (UNINTERESTING | SEEN))
  		return;
+	if (obj->flags & FACE_VALUE) {
+		obj->flags |= SEEN;
+		show(obj, path, name);
+		/* not parsing the tree saves a lot of time! */
+		return;
+	}
+
  	if (parse_tree(tree) < 0)
  		die("bad tree object %s", sha1_to_hex(obj->sha1));
  	obj->flags |= SEEN;
  	show(obj, path, name);
+
  	me.up = path;
  	me.elem = name;
  	me.elem_len = strlen(name);
-
  	init_tree_desc(&desc, tree->buffer, tree->size);

  	while (tree_entry(&desc, &entry)) {
-		if (S_ISDIR(entry.mode))
+		if (S_ISDIR(entry.mode)) {
+			struct tree *subtree = lookup_tree(entry.sha1);
+			if (!subtree)
+				continue;
+
+			subtree->object.flags &= ~FACE_VALUE;
  			process_tree(revs,
-				     lookup_tree(entry.sha1),
+				     subtree,
  				     show, &me, entry.path);
-		else if (S_ISGITLINK(entry.mode))
+		} else if (S_ISGITLINK(entry.mode))
  			process_gitlink(revs, entry.sha1,
  					show, &me, entry.path);
  		else
@@ -136,6 +148,7 @@ void mark_edges_uninteresting(struct commit_list *list,

  static void add_pending_tree(struct rev_info *revs, struct tree *tree)
  {
+	tree->object.flags &= ~FACE_VALUE;
  	add_pending_object(revs, &tree->object, "");
  }

@@ -146,17 +159,27 @@ void traverse_commit_list(struct rev_info *revs,
  {
  	int i;
  	struct commit *commit;
+	enum object_type what = OBJ_TAG;
+	char face_value = 0;

  	while ((commit = get_revision(revs)) != NULL) {
-		add_pending_tree(revs, commit->tree);
+		if (!(commit->object.flags & FACE_VALUE))
+			add_pending_tree(revs, commit->tree);
+		else
+			face_value = 1;
  		show_commit(commit, data);
  	}
+
+loop_objects:
  	for (i = 0; i < revs->pending.nr; i++) {
  		struct object_array_entry *pending = revs->pending.objects + i;
  		struct object *obj = pending->item;
  		const char *name = pending->name;
  		if (obj->flags & (UNINTERESTING | SEEN))
  			continue;
+		if (obj->type != what && face_value)
+			continue;
+
  		if (obj->type == OBJ_TAG) {
  			obj->flags |= SEEN;
  			show_object(obj, NULL, name);
@@ -175,6 +198,19 @@ void traverse_commit_list(struct rev_info *revs,
  		die("unknown pending object %s (%s)",
  		    sha1_to_hex(obj->sha1), name);
  	}
+	if (face_value) {
+		switch (what) {
+		case OBJ_TAG:
+			what = OBJ_TREE;
+			goto loop_objects;
+		case OBJ_TREE:
+			what = OBJ_BLOB;
+			goto loop_objects;
+		default:
+			break;
+		}
+	}
+
  	if (revs->pending.nr) {
  		free(revs->pending.objects);
  		revs->pending.nr = 0;
diff --git a/rev-cache.c b/rev-cache.c
index e401978..4ef5287 100644
--- a/rev-cache.c
+++ b/rev-cache.c
@@ -11,6 +11,12 @@
  #include "run-command.h"
  #include "string-list.h"

+
+struct bad_slice {
+	unsigned char sha1[20];
+	struct bad_slice *next;
+};
+
  struct cache_slice_pointer {
  	char signature[8]; /* REVCOPTR */
  	char version;
@@ -23,8 +29,9 @@ static uint32_t fanout[0xff + 2];
  static unsigned char *idx_map;
  static int idx_size;
  static struct rc_index_header idx_head;
+static char no_idx, add_to_pending;
+static struct bad_slice *bad_slices;
  static unsigned char *idx_caches;
-static char no_idx;

  static struct strbuf *acc_buffer;

@@ -121,6 +128,30 @@ struct rc_object_entry_ondisk *to_disked_rc_object_entry(struct rc_object_entry
  	return dst;
  }

+static void mark_bad_slice(unsigned char *sha1)
+{
+	struct bad_slice *bad;
+
+	bad = xcalloc(sizeof(struct bad_slice), 1);
+	hashcpy(bad->sha1, sha1);
+
+	bad->next = bad_slices;
+	bad_slices = bad;
+}
+
+static int is_bad_slice(unsigned char *sha1)
+{
+	struct bad_slice *bad = bad_slices;
+
+	while (bad) {
+		if (!hashcmp(bad->sha1, sha1))
+			return 1;
+		bad = bad->next;
+	}
+
+	return 0;
+}
+
  static int get_index_head(unsigned char *map, int len, struct rc_index_header *head, uint32_t *fanout, unsigned char **caches)
  {
  	struct rc_index_header whead;
@@ -246,6 +277,7 @@ static struct rc_index_entry *search_index(unsigned char *sha1)
  unsigned char *get_cache_slice(struct commit *commit)
  {
  	struct rc_index_entry *ie;
+	unsigned char *sha1;

  	if (!idx_map) {
  		if (no_idx)
@@ -257,8 +289,13 @@ unsigned char *get_cache_slice(struct commit *commit)
  		return 0;

  	ie = search_index(commit->object.sha1);
-	if (ie && ie->cache_index < idx_head.cache_nr)
-		return idx_caches + ie->cache_index * 20;
+	if (ie && ie->cache_index < idx_head.cache_nr) {
+		sha1 = idx_caches + ie->cache_index * 20;
+
+		if (is_bad_slice(sha1))
+			return 0;
+		return sha1;
+	}

  	return 0;
  }
@@ -268,6 +305,20 @@ unsigned char *get_cache_slice(struct commit *commit)

  static unsigned long decode_size(unsigned char *str, int len);

+/* on failure */
+static void restore_commit(struct commit *commit)
+{
+	commit->object.flags &= ~(ADDED | SEEN | FACE_VALUE);
+
+	if (!commit->object.parsed) {
+		while (pop_commit(&commit->parents))
+			;
+
+		parse_commit(commit);
+	}
+
+}
+
  static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsigned char *ptr, struct rc_object_entry *entry)
  {
  	struct blob *blob;
@@ -307,23 +358,27 @@ static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsig
  	}

  	obj->flags |= FACE_VALUE;
-	add_pending_object(revs, obj, "");
+	if (add_to_pending)
+		add_pending_object(revs, obj, "");
  }

-static int setup_traversal(struct rc_slice_header *head, unsigned char *map, struct commit *commit, struct commit_list **work)
+static int setup_traversal(struct rc_slice_header *head, unsigned char *map, struct commit *commit, struct commit_list **work,
+	struct commit_list **unwork, int *ipath_nr, int *upath_nr, char *ioutside)
  {
  	struct rc_index_entry *iep;
  	struct rc_object_entry *oep;
  	struct commit_list *prev, *wp, **wpp;
  	int retval;

-	iep = search_index(commit->object.sha1), 0;
+	iep = search_index(commit->object.sha1);
  	oep = RC_OBTAIN_OBJECT_ENTRY(map + iep->pos);
+	if (commit->object.flags & UNINTERESTING) {
+		++*upath_nr;
+		oep->uninteresting = 1;
+	} else
+		++*ipath_nr;

-	/* the .uniniteresting bit isn't strictly necessary, as we check the object during traversal as well,
-	 * but we might as well initialize it while we're at it */
  	oep->include = 1;
-	oep->uninteresting = !!(commit->object.flags & UNINTERESTING);
  	to_disked_rc_object_entry(oep, (struct rc_object_entry_ondisk *)(map + iep->pos));
  	retval = iep->pos;

@@ -338,6 +393,10 @@ static int setup_traversal(struct rc_slice_header *head, unsigned char *map, str
  		/* is this in our cache slice? */
  		iep = search_index(obj->sha1);
  		if (!iep || hashcmp(idx_caches + iep->cache_index * 20, head->sha1)) {
+			/* there are interesing objects outside the slice */
+			if (!(obj->flags & UNINTERESTING))
+				*ioutside = 1;
+
  			prev = wp;
  			wp = wp->next;
  			wpp = &wp;
@@ -354,11 +413,20 @@ static int setup_traversal(struct rc_slice_header *head, unsigned char *map, str
  		oep->uninteresting = !!(obj->flags & UNINTERESTING);
  		to_disked_rc_object_entry(oep, (struct rc_object_entry_ondisk *)(map + iep->pos));

+		/* count even if not in slice so we can stop enumerating if possible */
+		if (obj->flags & UNINTERESTING)
+			++*upath_nr;
+		else
+			++*ipath_nr;
+
  		/* remove from work list */
  		co = pop_commit(wpp);
  		wp = *wpp;
  		if (prev)
  			prev->next = wp;
+
+		/* ...and store in temp list so we can restore work on failure */
+		commit_list_insert(co, unwork);
  	}

  	return retval;
@@ -375,13 +443,18 @@ static int traverse_cache_slice_1(struct rc_slice_header *head, unsigned char *m
  	unsigned long *date_so_far, int *slop_so_far,
  	struct commit_list ***queue, struct commit_list **work)
  {
-	struct commit_list *insert_cache = 0;
+	struct commit_list *insert_cache = 0, *myq = 0, **myqp = &myq, *mywork = 0, **myworkp = &mywork, *unwork = 0;
  	struct commit **last_objects, *co;
-	int i, total_path_nr = head->path_nr, retval = -1;
-	char consume_children = 0;
+	unsigned long date = date_so_far ? *date_so_far : ~0ul;
+	int i, ipath_nr = 0, upath_nr = 0, orig_obj_nr = 0,
+		total_path_nr = head->path_nr, retval = -1, slop = slop_so_far ? *slop_so_far : SLOP;
+	char consume_children = 0, ioutside = 0;
  	unsigned char *paths;

-	i = setup_traversal(head, map, commit, work);
+	/* take note in case we need to regress */
+	orig_obj_nr = revs->pending.nr;
+
+	i = setup_traversal(head, map, commit, work, &unwork, &ipath_nr, &upath_nr, &ioutside);
  	if (i < 0)
  		return -1;

@@ -429,6 +502,7 @@ static int traverse_cache_slice_1(struct rc_slice_header *head, unsigned char *m

  		if ((paths[path] & IPATH) && (paths[path] & UPATH)) {
  			paths[path] = UPATH;
+			ipath_nr--;

  			/* mark edge */
  			if (last_objects[path]) {
@@ -439,6 +513,7 @@ static int traverse_cache_slice_1(struct rc_slice_header *head, unsigned char *m
  				last_objects[path]->object.flags &= ~FACE_VALUE;
  				last_objects[path] = 0;
  			}
+			obj->flags |= BOUNDARY;
  		}

  		/* now we gotta re-assess the whole interesting thing... */
@@ -462,8 +537,10 @@ static int traverse_cache_slice_1(struct rc_slice_header *head, unsigned char *m
  						last_objects[p]->object.flags &= ~FACE_VALUE;
  						last_objects[p] = 0;
  					}
-				} else if (last_objects[p] && !last_objects[p]->object.parsed)
+					obj->flags |= BOUNDARY;
+				} else if (last_objects[p] && !last_objects[p]->object.parsed) {
  					commit_list_insert(co, &last_objects[p]->parents);
+				}

  				/* can't close a merge path until all are parents have been encountered */
  				if (GET_COUNT(paths[p])) {
@@ -473,14 +550,33 @@ static int traverse_cache_slice_1(struct rc_slice_header *head, unsigned char *m
  						continue;
  				}

+				if (paths[p] & IPATH)
+					ipath_nr--;
+				else
+					upath_nr--;
+
  				paths[p] = 0;
  				last_objects[p] = 0;
  			}
  		}

  		/* make topo relations */
-		if (last_objects[path] && !last_objects[path]->object.parsed)
+		if (last_objects[path] && !last_objects[path]->object.parsed) {
  			commit_list_insert(co, &last_objects[path]->parents);
+		}
+
+		/* we've been here already */
+		if (obj->flags & ADDED) {
+			if (entry->uninteresting && !(obj->flags & UNINTERESTING)) {
+				obj->flags |= UNINTERESTING;
+				mark_parents_uninteresting(co);
+				upath_nr--;
+			} else if (!entry->uninteresting)
+				ipath_nr--;
+
+			paths[path] = 0;
+			continue;
+		}

  		/* initialize commit */
  		if (!entry->is_end) {
@@ -493,24 +589,51 @@ static int traverse_cache_slice_1(struct rc_slice_header *head, unsigned char *m

  		if (entry->uninteresting)
  			obj->flags |= UNINTERESTING;
+		else if (co->date < date)
+			date = co->date;

  		/* we need to know what the edges are */
  		last_objects[path] = co;

  		/* add to list */
-		if (!(obj->flags & UNINTERESTING) || revs->show_all) {
-			if (entry->is_end)
-				insert_by_date_cached(co, work, insert_cache, &insert_cache);
-			else
-				*queue = &commit_list_insert(co, *queue)->next;
+		if (slop && !(revs->min_age != -1 && co->date > revs->min_age)) {
+
+			if (!(obj->flags & UNINTERESTING) || revs->show_all) {
+				if (entry->is_end)
+					myworkp = &commit_list_insert(co, myworkp)->next;
+				else
+					myqp = &commit_list_insert(co, myqp)->next;
+
+				/* add children to list as well */
+				if (obj->flags & UNINTERESTING)
+					consume_children = 0;
+				else
+					consume_children = 1;
+			}

-			/* add children to list as well */
-			if (obj->flags & UNINTERESTING)
-				consume_children = 0;
-			else
-				consume_children = 1;
  		}

+		/* should we continue? */
+		if (!slop) {
+			if (!upath_nr) {
+				break;
+			} else if (ioutside || revs->show_all) {
+				/* pass it back to rev-list
+				 * we purposely ignore everything outside this cache, so we don't needlessly traverse the whole
+				 * thing on uninteresting, but that does mean that we may need to bounce back
+				 * and forth a few times with rev-list */
+				myworkp = &commit_list_insert(co, myworkp)->next;
+
+				paths[path] = 0;
+				upath_nr--;
+			} else {
+				break;
+			}
+		} else if (!ipath_nr && co->date <= date)
+			slop--;
+		else
+			slop = SLOP;
+
  		/* open parents */
  		if (entry->merge_nr) {
  			int j, off = index + sizeof(struct rc_object_entry_ondisk);
@@ -525,6 +648,11 @@ static int traverse_cache_slice_1(struct rc_slice_header *head, unsigned char *m
  				if (paths[p] & flag)
  					continue;

+				if (flag == IPATH)
+					ipath_nr++;
+				else
+					upath_nr++;
+
  				paths[p] |= flag;
  			}

@@ -534,12 +662,54 @@ static int traverse_cache_slice_1(struct rc_slice_header *head, unsigned char *m

  	}

+	if (date_so_far)
+		*date_so_far = date;
+	if (slop_so_far)
+		*slop_so_far = slop;
  	retval = 0;

+	/* success: attach to given lists */
+	if (myqp != &myq) {
+		**queue = myq;
+		*queue = myqp;
+	}
+
+	while ((co = pop_commit(&mywork)) != 0)
+		insert_by_date_cached(co, work, insert_cache, &insert_cache);
+
+	/* free backup */
+	while (pop_commit(&unwork))
+		;
+
  end:
  	free(paths);
  	free(last_objects);

+	/* failure: restore work to previous condition
+	 * (cache corruption should *not* be fatal) */
+	if (retval) {
+		while ((co = pop_commit(&unwork)) != 0) {
+			restore_commit(co);
+			co->object.flags |= SEEN;
+			insert_by_date(co, work);
+		}
+
+		/* free lists */
+		while ((co = pop_commit(&myq)) != 0)
+			restore_commit(co);
+
+		while ((co = pop_commit(&mywork)) != 0)
+			restore_commit(co);
+
+		/* truncate object array */
+		for (i = orig_obj_nr; i < revs->pending.nr; i++) {
+			struct object *obj = revs->pending.objects[i].item;
+
+			obj->flags &= ~FACE_VALUE;
+		}
+		revs->pending.nr = orig_obj_nr;
+	}
+
  	return retval;
  }

@@ -630,6 +800,7 @@ int traverse_cache_slice(struct rev_info *revs,

  	/* load options */
  	rci = &revs->rev_cache_info;
+	add_to_pending = rci->add_to_pending;

  	memset(&head, 0, sizeof(struct rc_slice_header));

@@ -653,6 +824,10 @@ end:
  	if (fd != -1)
  		close(fd);

+	/* remember this! */
+	if (retval)
+		mark_bad_slice(cache_sha1);
+
  	return retval;
  }

@@ -811,7 +986,7 @@ static void handle_paths(struct commit *commit, struct rc_object_entry *object,
  	int child_nr, parent_nr, open_parent_nr, this_path;
  	struct commit_list *list;
  	struct commit *first_parent;
-	struct pa\th_track **ppt, *pt;
+	struct path_track **ppt, *pt;

  	/* we can only re-use a closed path once all it's children have been encountered,
  	 * as we need to keep track of commit boundaries */
@@ -1128,7 +1303,7 @@ static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *inde
  	while (i < mapping->size) {
  		int pos = i;

-		entry = RC_OBTAIN_OBJECT_ENTRY(map + i;
+		entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
  		i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);

  		if (entry->type == OBJ_COMMIT) {
diff --git a/revision.c b/revision.c
index c7fd35f..ed21885 100644
--- a/revision.c
+++ b/revision.c
@@ -12,6 +12,7 @@
  #include "patch-ids.h"
  #include "decorate.h"
  #include "log-tree.h"
+#include "rev-cache.h"

  volatile show_early_output_fn_t show_early_output;

@@ -638,6 +639,8 @@ static int limit_list(struct rev_info *revs)
  	struct commit_list *list = revs->commits;
  	struct commit_list *newlist = NULL;
  	struct commit_list **p = &newlist;
+	unsigned char *cache_sha1;
+	char used_cache;

  	while (list) {
  		struct commit_list *entry = list;
@@ -650,24 +653,39 @@ static int limit_list(struct rev_info *revs)

  		if (revs->max_age != -1 && (commit->date < revs->max_age))
  			obj->flags |= UNINTERESTING;
-		if (add_parents_to_list(revs, commit, &list, NULL) < 0)
-			return -1;
-		if (obj->flags & UNINTERESTING) {
-			mark_parents_uninteresting(commit);
-			if (revs->show_all)
-				p = &commit_list_insert(commit, p)->next;
-			slop = still_interesting(list, date, slop);
-			if (slop)
+
+		/* rev-cache to the rescue!!! */
+		used_cache = 0;
+		if (!revs->dont_cache_me && !(obj->flags & ADDED)) {
+			cache_sha1 = get_cache_slice(commit);
+			if (cache_sha1) {
+				if (traverse_cache_slice(revs, cache_sha1, commit, &date, &slop, &p, &list) < 0)
+					used_cache = 0;
+				else
+					used_cache = 1;
+			}
+		}
+
+		if (!used_cache) {
+			if (add_parents_to_list(revs, commit, &list, NULL) < 0)
+				return -1;
+			if (obj->flags & UNINTERESTING) {
+				mark_parents_uninteresting(commit); /* ME: why? */
+				if (revs->show_all)
+					p = &commit_list_insert(commit, p)->next;
+				slop = still_interesting(list, date, slop);
+				if (slop > 0)
+					continue;
+				/* If showing all, add the whole pending list to the end */
+				if (revs->show_all)
+					*p = list;
+				break;
+			}
+			if (revs->min_age != -1 && (commit->date > revs->min_age))
  				continue;
-			/* If showing all, add the whole pending list to the end */
-			if (revs->show_all)
-				*p = list;
-			break;
+			date = commit->date;
+			p = &commit_list_insert(commit, p)->next;
  		}
-		if (revs->min_age != -1 && (commit->date > revs->min_age))
-			continue;
-		date = commit->date;
-		p = &commit_list_insert(commit, p)->next;

  		show = show_early_output;
  		if (!show)
@@ -813,6 +831,8 @@ void init_revisions(struct rev_info *revs, const char *prefix)
  		revs->diffopt.prefix = prefix;
  		revs->diffopt.prefix_length = strlen(prefix);
  	}
+
+	init_rev_cache_info(&revs->rev_cache_info);
  }

  static void add_pending_commit_list(struct rev_info *revs,
@@ -1372,6 +1392,11 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch
  	if (revs->reflog_info && revs->graph)
  		die("cannot combine --walk-reflogs with --graph");

+	/* limits on caching
+	 * todo: implement this functionality */
+	if (revs->prune || revs->diff)
+		revs->dont_cache_me = 1;
+
  	return left;
  }

@@ -1654,6 +1679,8 @@ static int commit_match(struct commit *commit, struct rev_info *opt)
  {
  	if (!opt->grep_filter.pattern_list)
  		return 1;
+	if (!commit->object.parsed)
+		parse_commit(commit);
  	return grep_buffer(&opt->grep_filter,
  			   NULL, /* we say nothing, not even filename */
  			   commit->buffer, strlen(commit->buffer));
@@ -1717,6 +1744,7 @@ static struct commit *get_revision_1(struct rev_info *revs)
  	do {
  		struct commit_list *entry = revs->commits;
  		struct commit *commit = entry->item;
+		struct object *obj = &commit->object;

  		revs->commits = entry->next;
  		free(entry);
@@ -1733,11 +1761,39 @@ static struct commit *get_revision_1(struct rev_info *revs)
  			if (revs->max_age != -1 &&
  			    (commit->date < revs->max_age))
  				continue;
+
+			if (!revs->dont_cache_me) {
+				struct commit_list *queue = 0, **queuep = &queue;
+				unsigned char *cache_sha1;
+
+				if (obj->flags & ADDED)
+					goto skip_parenting;
+
+				cache_sha1 = get_cache_slice(commit);
+				if (cache_sha1) {
+					if (!traverse_cache_slice(revs, cache_sha1, commit, 0, 0, &queuep, &revs->commits)) {
+						struct commit_list *work = revs->commits;
+
+						/* attach queue to end of ->commits */
+						while (work && work->next)
+							work = work->next;
+
+						if (work)
+							work->next = queue;
+						else
+							revs->commits = queue;
+
+						goto skip_parenting;
+					}
+				}
+			}
+
  			if (add_parents_to_list(revs, commit, &revs->commits, NULL) < 0)
  				die("Failed to traverse parents of commit %s",
  				    sha1_to_hex(commit->object.sha1));
  		}

+skip_parenting:
  		switch (simplify_commit(revs, commit)) {
  		case commit_ignore:
  			continue;
diff --git a/t/t6017-rev-cache-list.sh b/t/t6017-rev-cache-list.sh
index dc0fc07..982fb15 100755
--- a/t/t6017-rev-cache-list.sh
+++ b/t/t6017-rev-cache-list.sh
@@ -38,6 +38,7 @@ test_expect_success 'init repo' '
  	git add . &&
  	git commit -m "omg" &&

+	sleep 2 &&
  	git branch b4 &&
  	git checkout b4 &&
  	echo shazam >file8 &&
@@ -46,7 +47,7 @@ test_expect_success 'init repo' '
  	git merge -m "merge b2" b2 &&

  	echo bam >smoke/pipe &&
-	git add .
+	git add . &&
  	git commit -m "bam" &&

  	git checkout master &&
@@ -71,18 +72,26 @@ test_expect_success 'init repo' '
  	git add . &&
  	git commit -m "lol" &&

+	sleep 2 &&
  	git checkout master &&
  	git merge -m "triple merge" b1 b11 &&
  	git rm -r d1 &&
+	sleep 2 &&
  	git commit -a -m "oh noes"
  '

-git-rev-list HEAD --not HEAD~3 >proper_commit_list_limited
-git-rev-list HEAD >proper_commit_list
-git-rev-list HEAD --objects >proper_object_list
+max_date=`git-rev-list --timestamp HEAD~1 --max-count=1 | grep -e "^[0-9]*" -o`
+min_date=`git-rev-list --timestamp b4 --max-count=1 | grep -e "^[0-9]*" -o`
+
+git-rev-list --topo-order HEAD --not HEAD~3 >proper_commit_list_limited
+git-rev-list --topo-order HEAD --not HEAD~2 >proper_commit_list_limited2
+git-rev-list --topo-order HEAD >proper_commit_list
+git-rev-list --objects HEAD >proper_object_list
+git-rev-list HEAD --max-age=$min_date --min-age=$max_date >proper_list_date_limited
+
+cache_sha1=`git-rev-cache add HEAD 2>output.err`

  test_expect_success 'make cache slice' '
-	git-rev-cache add HEAD 2>output.err &&
  	grep "final return value: 0" output.err
  '

@@ -102,11 +111,141 @@ test_expect_success 'test rev-caches walker directly (unlimited)' '
  	test_cmp_sorted list proper_commit_list
  '

+test_expect_success 'test rev-list traversal (limited)' '
+	git-rev-list HEAD --not HEAD~3 >list &&
+	test_cmp list proper_commit_list_limited
+'
+
+test_expect_success 'test rev-list traversal (unlimited)' '
+	git-rev-list HEAD >list &&
+	test_cmp list proper_commit_list
+'
+
  #do the same for objects
  test_expect_success 'test rev-caches walker with objects' '
  	git-rev-cache walk --objects HEAD >list &&
  	test_cmp_sorted list proper_object_list
  '

-test_done
+test_expect_success 'test rev-list with objects (topo order)' '
+	git-rev-list --topo-order --objects HEAD >list &&
+	test_cmp_sorted list proper_object_list
+'
+
+test_expect_success 'test rev-list with objects (no order)' '
+	git-rev-list --objects HEAD >list &&
+	test_cmp_sorted list proper_object_list
+'
+
+#verify age limiting
+test_expect_success 'test rev-list date limiting (topo order)' '
+	git-rev-list --topo-order --max-age=$min_date --min-age=$max_date HEAD >list &&
+	test_cmp_sorted list proper_list_date_limited
+'
+
+test_expect_success 'test rev-list date limiting (no order)' '
+	git-rev-list --max-age=$min_date --min-age=$max_date HEAD >list &&
+	test_cmp_sorted list proper_list_date_limited
+'
+
+#check partial cache slice
+test_expect_success 'saving old cache and generating partial slice' '
+	cp ".git/rev-cache/$cache_sha1" .git/rev-cache/.old &&
+	rm ".git/rev-cache/$cache_sha1" .git/rev-cache/index &&
+
+	git-rev-cache add HEAD~2 2>output.err &&
+	grep "final return value: 0" output.err
+'
+
+test_expect_success 'rev-list with wholly interesting partial slice' '
+	git-rev-list --topo-order HEAD >list &&
+	test_cmp list proper_commit_list
+'
+
+test_expect_success 'rev-list with partly uninteresting partial slice' '
+	git-rev-list --topo-order HEAD --not HEAD~3 >list &&
+	test_cmp list proper_commit_list_limited
+'
+
+test_expect_success 'rev-list with wholly uninteresting partial slice' '
+	git-rev-list --topo-order HEAD --not HEAD~2 >list &&
+	test_cmp list proper_commit_list_limited2
+'
+
+#try out index generation and fuse (note that --all == HEAD in this case)
+#probably should make a test for that too...
+test_expect_success 'test (non-)fusion of one slice' '
+	git-rev-cache fuse >output.err &&
+	grep "nothing to fuse" output.err
+'

+test_expect_success 'make fresh slice' '
+	git-rev-cache add --all --fresh 2>output.err &&
+	grep "final return value: 0" output.err
+'
+
+test_expect_success 'check dual slices' '
+	git-rev-list --topo-order HEAD~2 HEAD >list &&
+	test_cmp list proper_commit_list
+'
+
+test_expect_success 'regenerate index' '
+	rm .git/rev-cache/index &&
+	git-rev-cache index 2>output.err &&
+	grep "final return value: 0" output.err
+'
+
+test_expect_success 'fuse slices' '
+	test -e .git/rev-cache/.old &&
+	git-rev-cache fuse 2>output.err &&
+	grep "final return value: 0" output.err &&
+	test_cmp .git/rev-cache/$cache_sha1 .git/rev-cache/.old
+'
+
+#make sure we can smoothly handle corrupted caches
+test_expect_success 'corrupt slice' '
+	echo bla >.git/rev-cache/$cache_sha1
+'
+
+test_expect_success 'test rev-list traversal (limited) (corrupt slice)' '
+	git-rev-list --topo-order HEAD --not HEAD~3 >list &&
+	test_cmp list proper_commit_list_limited
+'
+
+test_expect_success 'test rev-list traversal (unlimited) (corrupt slice)' '
+	git-rev-list HEAD >list &&
+	test_cmp_sorted list proper_commit_list
+'
+
+test_expect_success 'corrupt index' '
+	echo blu >.git/rev-cache/index
+'
+
+test_expect_success 'test rev-list traversal (limited) (corrupt index)' '
+	git-rev-list --topo-order HEAD --not HEAD~3 >list &&
+	test_cmp list proper_commit_list_limited
+'
+
+test_expect_success 'test rev-list traversal (unlimited) (corrupt index)' '
+	git-rev-list HEAD >list &&
+	test_cmp_sorted list proper_commit_list
+'
+
+#test --ignore-size in fuse
+rm .git/rev-cache/*
+cache_sha1=`git-rev-cache add HEAD~2 2>output.err`
+
+test_expect_success 'make fragmented slices' '
+	git-rev-cache add HEAD~1 --not HEAD~2 2>>output.err &&
+	git-rev-cache add HEAD --fresh 2>>output.err &&
+	test `grep "final return value: 0" output.err | wc -l` -eq 3
+'
+
+cache_size=$(wc -c < .git/rev-cache/$cache_sha1)
+test_expect_success 'test --ignore-size function in fuse' '
+	git-rev-cache fuse --ignore-size=$cache_size 2>output.err &&
+	grep "final return value: 0" output.err &&
+	test -e .git/rev-cache/$cache_sha1
+'
+
+test_done
-- 
tg: (13365da..) t/revcache/integration (depends on: t/revcache/misc)

^ permalink raw reply related

* Re: [PATCH 3/6 (v4)] support for non-commit object caching in rev-cache
From: Nick Edelen @ 2009-10-02 22:12 UTC (permalink / raw)
  To: Nick Edelen, Junio C Hamano, Nicolas Pitre, Johannes Schindelin,
	Sam Vilain
In-Reply-To: <op.uyuwkrfntdk399@sirnot.private>

Summarized, this third patch contains:
  - support for non-commit object caching
  - expansion of porcelain to accomodate non-commit objects
  - appropriate tests

Objects are stored relative to the commit in which they were introduced --
commits are 'diffed' against their parents.  This will eliminate the need for
tree recursion in cached commits (significantly reducing I/O), and potentially
be useful to external applications.

Signed-off-by: Nick Edelen <sirnot@gmail.com>

---
  rev-cache.c               |  202 ++++++++++++++++++++++++++++++++++++++++++++-
  t/t6017-rev-cache-list.sh |    6 ++
  2 files changed, 206 insertions(+), 2 deletions(-)

diff --git a/rev-cache.c b/rev-cache.c
index 8951cdf..ef6b58a 100644
--- a/rev-cache.c
+++ b/rev-cache.c
@@ -259,6 +259,32 @@ unsigned char *get_cache_slice(struct commit *commit)

  /* traversal */

+static void handle_noncommit(struct rev_info *revs, unsigned char *ptr, struct rc_object_entry *entry)
+{
+	struct object *obj = 0;
+
+	switch (entry->type) {
+	case OBJ_TREE:
+		if (revs->tree_objects)
+			obj = (struct object *)lookup_tree(entry->sha1);
+		break;
+	case OBJ_BLOB:
+		if (revs->blob_objects)
+			obj = (struct object *)lookup_blob(entry->sha1);
+		break;
+	case OBJ_TAG:
+		if (revs->tag_objects)
+			obj = (struct object *)lookup_tag(entry->sha1);
+		break;
+	}
+
+	if (!obj)
+		return;
+
+	obj->flags |= FACE_VALUE;
+	add_pending_object(revs, obj, "");
+}
+
  static int setup_traversal(struct rc_slice_header *head, unsigned char *map, struct commit *commit, struct commit_list **work)
  {
  	struct rc_index_entry *iep;
@@ -347,9 +373,12 @@ static int traverse_cache_slice_1(struct rc_slice_header *head, unsigned char *m
  		i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);

  		/* add extra objects if necessary */
-		if (entry->type != OBJ_COMMIT)
+		if (entry->type != OBJ_COMMIT) {
+			if (consume_children)
+				handle_noncommit(revs, map + index, entry);
+
  			continue;
-		else
+		} else
  			consume_children = 0;

  		if (path >= total_path_nr)
@@ -777,6 +806,171 @@ static void add_object_entry(const unsigned char *sha1, int type, struct rc_obje

  }

+/* returns non-zero to continue parsing, 0 to skip */
+typedef int (*dump_tree_fn)(const unsigned char *, const char *, unsigned int); /* sha1, path, mode */
+
+/* we need to walk the trees by hash, so unfortunately we can't use traverse_trees in tree-walk.c */
+static int dump_tree(struct tree *tree, dump_tree_fn fn)
+{
+	struct tree_desc desc;
+	struct name_entry entry;
+	struct tree *subtree;
+	int r;
+
+	if (parse_tree(tree))
+		return -1;
+
+	init_tree_desc(&desc, tree->buffer, tree->size);
+	while (tree_entry(&desc, &entry)) {
+		switch (fn(entry.sha1, entry.path, entry.mode)) {
+		case 0:
+			goto continue_loop;
+		default:
+			break;
+		}
+
+		if (S_ISDIR(entry.mode)) {
+			subtree = lookup_tree(entry.sha1);
+			if (!subtree)
+				return -2;
+
+			if ((r = dump_tree(subtree, fn)) < 0)
+				return r;
+		}
+
+continue_loop:
+		continue;
+	}
+
+	return 0;
+}
+
+static int dump_tree_callback(const unsigned char *sha1, const char *path, unsigned int mode)
+{
+	unsigned char data[21];
+
+	hashcpy(data, sha1);
+	data[20] = !!S_ISDIR(mode);
+
+	strbuf_add(acc_buffer, data, 21);
+
+	return 1;
+}
+
+static void tree_addremove(struct diff_options *options,
+	int whatnow, unsigned mode,
+	const unsigned char *sha1,
+	const char *concatpath)
+{
+	unsigned char data[21];
+
+	if (whatnow != '+')
+		return;
+
+	hashcpy(data, sha1);
+	data[20] = !!S_ISDIR(mode);
+
+	strbuf_add(acc_buffer, data, 21);
+}
+
+static void tree_change(struct diff_options *options,
+	unsigned old_mode, unsigned new_mode,
+	const unsigned char *old_sha1,
+	const unsigned char *new_sha1,
+	const char *concatpath)
+{
+	unsigned char data[21];
+
+	if (!hashcmp(old_sha1, new_sha1))
+		return;
+
+	hashcpy(data, new_sha1);
+	data[20] = !!S_ISDIR(new_mode);
+
+	strbuf_add(acc_buffer, data, 21);
+}
+
+static int sort_type_hash(const void *a, const void *b)
+{
+	const unsigned char *sa = (const unsigned char *)a,
+		*sb = (const unsigned char *)b;
+
+	if (sa[20] == sb[20])
+		return hashcmp(sa, sb);
+
+	return sa[20] > sb[20] ? -1 : 1;
+}
+
+static int add_unique_objects(struct commit *commit)
+{
+	struct commit_list *list;
+	struct strbuf os, ost, *orig_buf;
+	struct diff_options opts;
+	int i, j, next;
+	char is_first = 1;
+
+	strbuf_init(&os, 0);
+	strbuf_init(&ost, 0);
+	orig_buf = acc_buffer;
+
+	diff_setup(&opts);
+	DIFF_OPT_SET(&opts, RECURSIVE);
+	DIFF_OPT_SET(&opts, TREE_IN_RECURSIVE);
+	opts.change = tree_change;
+	opts.add_remove = tree_addremove;
+
+	/* this is only called for non-ends (ie. all parents interesting) */
+	for (list = commit->parents; list; list = list->next) {
+		if (is_first)
+			acc_buffer = &os;
+		else
+			acc_buffer = &ost;
+
+		strbuf_setlen(acc_buffer, 0);
+		diff_tree_sha1(list->item->tree->object.sha1, commit->tree->object.sha1, "", &opts);
+		qsort(acc_buffer->buf, acc_buffer->len / 21, 21, (int (*)(const void *, const void *))hashcmp);
+
+		/* take intersection */
+		if (!is_first) {
+			for (next = i = j = 0; i < os.len; i += 21) {
+				while (j < ost.len && hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)) < 0)
+					j += 21;
+
+				if (j >= ost.len || hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)))
+					continue;
+
+				if (next != i)
+					memcpy(os.buf + next, os.buf + i, 21);
+				next += 21;
+			}
+
+			if (next != i)
+				strbuf_setlen(&os, next);
+		} else
+			is_first = 0;
+	}
+
+	if (is_first) {
+		acc_buffer = &os;
+		dump_tree(commit->tree, dump_tree_callback);
+	}
+
+	if (os.len)
+		qsort(os.buf, os.len / 21, 21, sort_type_hash);
+
+	acc_buffer = orig_buf;
+	for (i = 0; i < os.len; i += 21)
+		add_object_entry((unsigned char *)(os.buf + i), os.buf[i + 20] ? OBJ_TREE : OBJ_BLOB, 0, 0, 0);
+
+	/* last but not least, the main tree */
+	add_object_entry(commit->tree->object.sha1, OBJ_TREE, 0, 0, 0);
+
+	strbuf_release(&ost);
+	strbuf_release(&os);
+
+	return i / 21 + 1;
+}
+
  static void init_revcache_directory(void)
  {
  	struct stat fi;
@@ -902,6 +1096,10 @@ int make_cache_slice(struct rev_cache_info *rci,
  		add_object_entry(0, 0, &object, &merge_paths, &split_paths);
  		object_nr++;

+		/* add all unique children for this commit */
+		if (rci->objects && !object.is_end)
+			object_nr += add_unique_objects(commit);
+
  		/* print every ~1MB or so */
  		if (buffer.len > 1000000) {
  			write_in_full(fd, buffer.buf, buffer.len);
diff --git a/t/t6017-rev-cache-list.sh b/t/t6017-rev-cache-list.sh
index f59f568..dc0fc07 100755
--- a/t/t6017-rev-cache-list.sh
+++ b/t/t6017-rev-cache-list.sh
@@ -102,5 +102,11 @@ test_expect_success 'test rev-caches walker directly (unlimited)' '
  	test_cmp_sorted list proper_commit_list
  '

+#do the same for objects
+test_expect_success 'test rev-caches walker with objects' '
+	git-rev-cache walk --objects HEAD >list &&
+	test_cmp_sorted list proper_object_list
+'
+
  test_done

-- 
tg: (ec20331..) t/revcache/objects (depends on: t/revcache/basic)

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox