All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Joshua Redstone" <joshua.redstone@fb.com>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 3/3] Support compressing index when GIT_ZCACHE=1
Date: Sun,  5 Feb 2012 15:30:05 +0700	[thread overview]
Message-ID: <1328430605-4566-4-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1328430605-4566-1-git-send-email-pclouds@gmail.com>


Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 cache.h      |    1 +
 read-cache.c |  118 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 112 insertions(+), 7 deletions(-)

diff --git a/cache.h b/cache.h
index 10afd71..112bc52 100644
--- a/cache.h
+++ b/cache.h
@@ -99,6 +99,7 @@ unsigned long git_deflate_bound(git_zstream *, unsigned long);
  */
 
 #define CACHE_SIGNATURE 0x44495243	/* "DIRC" */
+#define ZCACHE_SIGNATURE 0x4452435A	/* "DRCZ" */
 struct cache_header {
 	unsigned int hdr_signature;
 	unsigned int hdr_version;
diff --git a/read-cache.c b/read-cache.c
index 7b9a989..45c1712 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1182,12 +1182,17 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int reall
 	return refresh_cache_ent(&the_index, ce, really, NULL, NULL);
 }
 
-static int verify_hdr(struct cache_header *hdr, unsigned long size)
+static int verify_hdr(struct cache_header *hdr, unsigned long size,
+		      int *deflated)
 {
 	git_SHA_CTX c;
 	unsigned char sha1[20];
 
-	if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
+	if (hdr->hdr_signature == htonl(CACHE_SIGNATURE))
+		*deflated = 0;
+	else if (hdr->hdr_signature == htonl(ZCACHE_SIGNATURE))
+		*deflated = 1;
+	else
 		return error("bad signature");
 	if (hdr->hdr_version != htonl(2) && hdr->hdr_version != htonl(3))
 		return error("bad index version");
@@ -1273,6 +1278,43 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk)
 	return ce;
 }
 
+static int inflate_cache_entries(struct index_state *istate,
+				 const unsigned char *mmap, size_t mmap_size,
+				 unsigned long src_offset)
+{
+	unsigned char buf[sizeof(struct ondisk_cache_entry) + PATH_MAX];
+	struct ondisk_cache_entry *disk_ce;
+	struct cache_entry *ce;
+	struct git_zstream stream;
+	int i, status;
+
+	memset(&stream, 0, sizeof(stream));
+	stream.next_in = (unsigned char*)mmap + src_offset;
+	stream.avail_in = mmap_size - src_offset;
+	stream.next_out = buf;
+	stream.avail_out = sizeof(buf);
+	git_inflate_init(&stream);
+
+	for (i = 0; i < istate->cache_nr; i++) {
+		int remaining;
+		do {
+			status = git_inflate(&stream, Z_FINISH);
+		} while (status == Z_OK);
+
+		disk_ce = (struct ondisk_cache_entry *)buf;
+		ce = create_from_disk(disk_ce);
+		set_index_entry(istate, i, ce);
+
+		remaining = stream.next_out - (buf + ondisk_ce_size(ce));
+		memmove(buf, buf + ondisk_ce_size(ce), remaining);
+		stream.next_out = buf + remaining;
+		stream.avail_out = sizeof(buf) - remaining;
+	}
+	assert(status == Z_STREAM_END);
+	git_inflate_end(&stream);
+	return stream.next_in - mmap;
+}
+
 static int read_cache_entries(struct index_state *istate,
 			      const char *mmap, unsigned long src_offset)
 {
@@ -1300,6 +1342,7 @@ int read_index_from(struct index_state *istate, const char *path)
 	struct cache_header *hdr;
 	void *mmap;
 	size_t mmap_size;
+	int deflated;
 
 	errno = EBUSY;
 	if (istate->initialized)
@@ -1329,7 +1372,7 @@ int read_index_from(struct index_state *istate, const char *path)
 		die_errno("unable to map index file");
 
 	hdr = mmap;
-	if (verify_hdr(hdr, mmap_size) < 0)
+	if (verify_hdr(hdr, mmap_size, &deflated) < 0)
 		goto unmap;
 
 	istate->cache_nr = ntohl(hdr->hdr_entries);
@@ -1337,7 +1380,11 @@ int read_index_from(struct index_state *istate, const char *path)
 	istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
 	istate->initialized = 1;
 
-	src_offset = read_cache_entries(istate, mmap, sizeof(*hdr));
+	if (deflated)
+		src_offset = inflate_cache_entries(istate, mmap, mmap_size,
+						   sizeof(*hdr));
+	else
+		src_offset = read_cache_entries(istate, mmap, sizeof(*hdr));
 	istate->timestamp.sec = st.st_mtime;
 	istate->timestamp.nsec = ST_MTIME_NSEC(st);
 
@@ -1594,6 +1641,10 @@ int write_index(struct index_state *istate, int newfd)
 	struct stat st;
 	void *ce_ondisk = NULL;
 	int ce_ondisk_size = 0;
+	struct git_zstream stream;
+	int deflate, status;
+	unsigned char *dbuf_out;
+	unsigned char *dbuf_in;
 
 	for (i = removed = extended = 0; i < entries; i++) {
 		if (cache[i]->ce_flags & CE_REMOVE)
@@ -1607,7 +1658,8 @@ int write_index(struct index_state *istate, int newfd)
 		}
 	}
 
-	hdr.hdr_signature = htonl(CACHE_SIGNATURE);
+	deflate = getenv("GIT_ZCACHE") != NULL;
+	hdr.hdr_signature = htonl(deflate ? ZCACHE_SIGNATURE : CACHE_SIGNATURE);
 	/* for extended format, increase version so older git won't try to read it */
 	hdr.hdr_version = htonl(extended ? 3 : 2);
 	hdr.hdr_entries = htonl(entries - removed);
@@ -1616,6 +1668,17 @@ int write_index(struct index_state *istate, int newfd)
 	if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0)
 		return -1;
 
+	if (deflate) {
+		dbuf_out = xmalloc(WRITE_BUFFER_SIZE);
+		dbuf_in = xmalloc(WRITE_BUFFER_SIZE);
+		memset(&stream, 0, sizeof(stream));
+		stream.next_out = dbuf_out;
+		stream.avail_out = WRITE_BUFFER_SIZE;
+		stream.next_in = dbuf_in;
+		stream.avail_in = 0;
+		git_deflate_init(&stream, zlib_compression_level);
+	}
+
 	for (i = 0; i < entries; i++) {
 		struct cache_entry *ce = cache[i];
 		int size;
@@ -1625,11 +1688,52 @@ int write_index(struct index_state *istate, int newfd)
 		if (!ce_uptodate(ce) && is_racy_timestamp(istate, ce))
 			ce_smudge_racily_clean_entry(ce);
 		size = ce_prepare_ondisk_entry(ce, &ce_ondisk, &ce_ondisk_size);
-		if (ce_write(&c, newfd, ce_ondisk, size) < 0)
-			return -1;
+		if (!deflate) {
+			if (ce_write(&c, newfd, ce_ondisk, size) < 0)
+				return -1;
+			continue;
+		}
+
+		if (stream.avail_in)
+			memmove(dbuf_in, stream.next_in, stream.avail_in);
+		memcpy(dbuf_in + stream.avail_in, ce_ondisk, size);
+		stream.next_in = dbuf_in;
+		stream.avail_in += size;
+		do {
+			status = git_deflate(&stream, 0);
+			if (stream.next_out > dbuf_out) {
+				size = stream.next_out - dbuf_out;
+				if (ce_write(&c, newfd, dbuf_out, size) < 0)
+					return -1;
+				stream.next_out = dbuf_out;
+				stream.avail_out = WRITE_BUFFER_SIZE;
+			}
+		} while (status == Z_OK);
 	}
 	free(ce_ondisk);
 
+	if (deflate) {
+		do {
+			status = git_deflate(&stream, Z_FINISH);
+			if (stream.next_out > dbuf_out) {
+				int size = stream.next_out - dbuf_out;
+				if (ce_write(&c, newfd, dbuf_out, size) < 0)
+					return -1;
+				stream.next_out = dbuf_out;
+				stream.avail_out = WRITE_BUFFER_SIZE;
+			}
+		} while (status == Z_OK);
+
+		git_deflate_end(&stream);
+		if (stream.next_out > dbuf_out) {
+			int size = stream.next_out - dbuf_out;
+			if (ce_write(&c, newfd, dbuf_out, size) < 0)
+				return -1;
+		}
+		free(dbuf_in);
+		free(dbuf_out);
+	}
+
 	/* Write extension data here */
 	if (istate->cache_tree) {
 		struct strbuf sb = STRBUF_INIT;
-- 
1.7.8.36.g69ee2

  parent reply	other threads:[~2012-02-05  8:31 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-02-05  8:30 [PATCH 0/3] On compresing large index Nguyễn Thái Ngọc Duy
2012-02-05  8:30 ` [PATCH 1/3] read-cache: factor out cache entries reading code Nguyễn Thái Ngọc Duy
2012-02-05  8:30 ` [PATCH 2/3] read-cache: reduce malloc/free during writing index Nguyễn Thái Ngọc Duy
2012-02-05  8:30 ` Nguyễn Thái Ngọc Duy [this message]
2012-02-05 21:22 ` [PATCH 0/3] On compresing large index Thomas Rast
2012-02-06  1:35   ` Nguyen Thai Ngoc Duy
2012-02-06 15:54     ` Joshua Redstone

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1328430605-4566-4-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=joshua.redstone@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.