git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: git@vger.kernel.org
Subject: [PATCH 8/9] read-cache.c: read prefix-compressed names in index on-disk version v4
Date: Tue,  3 Apr 2012 15:53:15 -0700	[thread overview]
Message-ID: <1333493596-14202-9-git-send-email-gitster@pobox.com> (raw)
In-Reply-To: <1333493596-14202-1-git-send-email-gitster@pobox.com>

Because the entries are sorted by path, adjacent entries in the index tend
to share the leading components of them, and it makes sense to only store
the differences in later entries.  In the v4 on-disk format of the index,
each on-disk cache entry stores the number of bytes to be stripped from
the end of the previous name, and the bytes to append to the result, to
come up with its name.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 read-cache.c |   58 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 51 insertions(+), 7 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index c159351..1c173f7 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -12,6 +12,8 @@
 #include "commit.h"
 #include "blob.h"
 #include "resolve-undo.h"
+#include "strbuf.h"
+#include "varint.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int really);
 
@@ -1236,6 +1238,7 @@ struct ondisk_cache_entry_extended {
 	char name[FLEX_ARRAY]; /* more */
 };
 
+/* These are only used for v3 or lower */
 #define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7)
 #define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len)
 #define ondisk_cache_entry_extended_size(len) align_flex_name(ondisk_cache_entry_extended,len)
@@ -1252,7 +1255,7 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size)
 	if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
 		return error("bad signature");
 	hdr_version = ntohl(hdr->hdr_version);
-	if (hdr_version < 2 || 3 < hdr_version)
+	if (hdr_version < 2 || 4 < hdr_version)
 		return error("bad index version %d", hdr_version);
 	git_SHA1_Init(&c);
 	git_SHA1_Update(&c, hdr, size - 20);
@@ -1331,8 +1334,30 @@ static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *on
 	return ce;
 }
 
+/*
+ * Adjacent cache entries tend to share the leading paths, so it makes
+ * sense to only store the differences in later entries.  In the v4
+ * on-disk format of the index, each on-disk cache entry stores the
+ * number of bytes to be stripped from the end of the previous name,
+ * and the bytes to append to the result, to come up with its name.
+ */
+static unsigned long expand_name_field(struct strbuf *name, const char *cp_)
+{
+	const unsigned char *ep, *cp = (const unsigned char *)cp_;
+	size_t len = decode_varint(&cp);
+
+	if (name->len < len)
+		die("malformed name field in the index");
+	strbuf_remove(name, name->len - len, len);
+	for (ep = cp; *ep; ep++)
+		; /* find the end */
+	strbuf_add(name, cp, ep - cp);
+	return (const char *)ep + 1 - cp_;
+}
+
 static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk,
-					    unsigned long *ent_size)
+					    unsigned long *ent_size,
+					    struct strbuf *previous_name)
 {
 	struct cache_entry *ce;
 	size_t len;
@@ -1357,10 +1382,22 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk,
 	else
 		name = ondisk->name;
 
-	if (len == CE_NAMEMASK)
-		len = strlen(name);
-	ce = cache_entry_from_ondisk(ondisk, flags, name, len);
-	*ent_size = ondisk_ce_size(ce);
+	if (!previous_name) {
+		/* v3 and earlier */
+		if (len == CE_NAMEMASK)
+			len = strlen(name);
+		ce = cache_entry_from_ondisk(ondisk, flags, name, len);
+
+		*ent_size = ondisk_ce_size(ce);
+	} else {
+		unsigned long consumed;
+		consumed = expand_name_field(previous_name, name);
+		ce = cache_entry_from_ondisk(ondisk, flags,
+					     previous_name->buf,
+					     previous_name->len);
+
+		*ent_size = (name - ((char *)ondisk)) + consumed;
+	}
 	return ce;
 }
 
@@ -1373,6 +1410,7 @@ int read_index_from(struct index_state *istate, const char *path)
 	struct cache_header *hdr;
 	void *mmap;
 	size_t mmap_size;
+	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 
 	errno = EBUSY;
 	if (istate->initialized)
@@ -1410,6 +1448,11 @@ int read_index_from(struct index_state *istate, const char *path)
 	istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
 	istate->initialized = 1;
 
+	if (hdr->hdr_version == htonl(4))
+		previous_name = &previous_name_buf;
+	else
+		previous_name = NULL;
+
 	src_offset = sizeof(*hdr);
 	for (i = 0; i < istate->cache_nr; i++) {
 		struct ondisk_cache_entry *disk_ce;
@@ -1417,11 +1460,12 @@ int read_index_from(struct index_state *istate, const char *path)
 		unsigned long consumed;
 
 		disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
-		ce = create_from_disk(disk_ce, &consumed);
+		ce = create_from_disk(disk_ce, &consumed, previous_name);
 		set_index_entry(istate, i, ce);
 
 		src_offset += consumed;
 	}
+	strbuf_release(&previous_name_buf);
 	istate->timestamp.sec = st.st_mtime;
 	istate->timestamp.nsec = ST_MTIME_NSEC(st);
 
-- 
1.7.10.rc4.54.g1d5dd3

  parent reply	other threads:[~2012-04-03 22:53 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-04-03 22:53 [PATCH 0/9] Prefix-compress on-disk index entries Junio C Hamano
2012-04-03 22:53 ` [PATCH 1/9] varint: make it available outside the context of pack Junio C Hamano
2012-04-03 22:53 ` [PATCH 2/9] cache.h: hide on-disk index details Junio C Hamano
2012-04-03 22:53 ` [PATCH 3/9] read-cache.c: allow unaligned mapping of the index file Junio C Hamano
2012-04-03 22:53 ` [PATCH 4/9] read-cache.c: make create_from_disk() report number of bytes it consumed Junio C Hamano
2012-04-03 22:53 ` [PATCH 5/9] read-cache.c: report the header version we do not understand Junio C Hamano
2012-04-03 22:53 ` [PATCH 6/9] read-cache.c: move code to copy ondisk to incore cache to a helper function Junio C Hamano
2012-04-03 22:53 ` [PATCH 7/9] read-cache.c: move code to copy incore to ondisk " Junio C Hamano
2012-04-03 22:53 ` Junio C Hamano [this message]
2012-04-03 22:53 ` [PATCH 9/9] read-cache.c: write index v4 format Junio C Hamano
2012-04-04  1:44 ` [PATCH 0/9] Prefix-compress on-disk index entries David Barr
2012-04-04 15:33   ` Junio C Hamano
2012-04-04 16:57     ` Junio C Hamano
2012-04-04 16:58       ` [PATCH 2/2] update-index: upgrade/downgrade on-disk index version Junio C Hamano
2012-04-04 12:34 ` [PATCH 0/9] Prefix-compress on-disk index entries Nguyen Thai Ngoc Duy
2012-04-04 18:44   ` Junio C Hamano
2012-04-06  8:41     ` David Barr
2012-05-02  1:58       ` Nguyen Thai Ngoc Duy
2012-05-02  4:26         ` David Barr
2012-04-27 22:58 ` [PATCH 1/2] unpack-trees: preserve the index file version of original Junio C Hamano
2012-04-27 23:02   ` [PATCH 2/2] index-v4: document the entry format Junio C Hamano
2012-04-30 17:20     ` Thomas Rast
2012-05-01  4:00       ` Junio C Hamano
2012-05-01 21:43         ` Thomas Rast
2012-05-02 15:12         ` Shawn Pearce
2012-05-02 17:04           ` Junio C Hamano
2012-05-02 17:13             ` Shawn Pearce

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1333493596-14202-9-git-send-email-gitster@pobox.com \
    --to=gitster@pobox.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).