From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Joshua Redstone" <joshua.redstone@fb.com>,
"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 3/3] Support compressing index when GIT_ZCACHE=1
Date: Sun, 5 Feb 2012 15:30:05 +0700 [thread overview]
Message-ID: <1328430605-4566-4-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1328430605-4566-1-git-send-email-pclouds@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
cache.h | 1 +
read-cache.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 112 insertions(+), 7 deletions(-)
diff --git a/cache.h b/cache.h
index 10afd71..112bc52 100644
--- a/cache.h
+++ b/cache.h
@@ -99,6 +99,7 @@ unsigned long git_deflate_bound(git_zstream *, unsigned long);
*/
#define CACHE_SIGNATURE 0x44495243 /* "DIRC" */
+#define ZCACHE_SIGNATURE 0x4452435A /* "DRCZ" */
struct cache_header {
unsigned int hdr_signature;
unsigned int hdr_version;
diff --git a/read-cache.c b/read-cache.c
index 7b9a989..45c1712 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1182,12 +1182,17 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int reall
return refresh_cache_ent(&the_index, ce, really, NULL, NULL);
}
-static int verify_hdr(struct cache_header *hdr, unsigned long size)
+static int verify_hdr(struct cache_header *hdr, unsigned long size,
+ int *deflated)
{
git_SHA_CTX c;
unsigned char sha1[20];
- if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
+ if (hdr->hdr_signature == htonl(CACHE_SIGNATURE))
+ *deflated = 0;
+ else if (hdr->hdr_signature == htonl(ZCACHE_SIGNATURE))
+ *deflated = 1;
+ else
return error("bad signature");
if (hdr->hdr_version != htonl(2) && hdr->hdr_version != htonl(3))
return error("bad index version");
@@ -1273,6 +1278,43 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk)
return ce;
}
+static int inflate_cache_entries(struct index_state *istate,
+ const unsigned char *mmap, size_t mmap_size,
+ unsigned long src_offset)
+{
+ unsigned char buf[sizeof(struct ondisk_cache_entry) + PATH_MAX];
+ struct ondisk_cache_entry *disk_ce;
+ struct cache_entry *ce;
+ struct git_zstream stream;
+ int i, status;
+
+ memset(&stream, 0, sizeof(stream));
+ stream.next_in = (unsigned char*)mmap + src_offset;
+ stream.avail_in = mmap_size - src_offset;
+ stream.next_out = buf;
+ stream.avail_out = sizeof(buf);
+ git_inflate_init(&stream);
+
+ for (i = 0; i < istate->cache_nr; i++) {
+ int remaining;
+ do {
+ status = git_inflate(&stream, Z_FINISH);
+ } while (status == Z_OK);
+
+ disk_ce = (struct ondisk_cache_entry *)buf;
+ ce = create_from_disk(disk_ce);
+ set_index_entry(istate, i, ce);
+
+ remaining = stream.next_out - (buf + ondisk_ce_size(ce));
+ memmove(buf, buf + ondisk_ce_size(ce), remaining);
+ stream.next_out = buf + remaining;
+ stream.avail_out = sizeof(buf) - remaining;
+ }
+ assert(status == Z_STREAM_END);
+ git_inflate_end(&stream);
+ return stream.next_in - mmap;
+}
+
static int read_cache_entries(struct index_state *istate,
const char *mmap, unsigned long src_offset)
{
@@ -1300,6 +1342,7 @@ int read_index_from(struct index_state *istate, const char *path)
struct cache_header *hdr;
void *mmap;
size_t mmap_size;
+ int deflated;
errno = EBUSY;
if (istate->initialized)
@@ -1329,7 +1372,7 @@ int read_index_from(struct index_state *istate, const char *path)
die_errno("unable to map index file");
hdr = mmap;
- if (verify_hdr(hdr, mmap_size) < 0)
+ if (verify_hdr(hdr, mmap_size, &deflated) < 0)
goto unmap;
istate->cache_nr = ntohl(hdr->hdr_entries);
@@ -1337,7 +1380,11 @@ int read_index_from(struct index_state *istate, const char *path)
istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
istate->initialized = 1;
- src_offset = read_cache_entries(istate, mmap, sizeof(*hdr));
+ if (deflated)
+ src_offset = inflate_cache_entries(istate, mmap, mmap_size,
+ sizeof(*hdr));
+ else
+ src_offset = read_cache_entries(istate, mmap, sizeof(*hdr));
istate->timestamp.sec = st.st_mtime;
istate->timestamp.nsec = ST_MTIME_NSEC(st);
@@ -1594,6 +1641,10 @@ int write_index(struct index_state *istate, int newfd)
struct stat st;
void *ce_ondisk = NULL;
int ce_ondisk_size = 0;
+ struct git_zstream stream;
+ int deflate, status;
+ unsigned char *dbuf_out;
+ unsigned char *dbuf_in;
for (i = removed = extended = 0; i < entries; i++) {
if (cache[i]->ce_flags & CE_REMOVE)
@@ -1607,7 +1658,8 @@ int write_index(struct index_state *istate, int newfd)
}
}
- hdr.hdr_signature = htonl(CACHE_SIGNATURE);
+ deflate = getenv("GIT_ZCACHE") != NULL;
+ hdr.hdr_signature = htonl(deflate ? ZCACHE_SIGNATURE : CACHE_SIGNATURE);
/* for extended format, increase version so older git won't try to read it */
hdr.hdr_version = htonl(extended ? 3 : 2);
hdr.hdr_entries = htonl(entries - removed);
@@ -1616,6 +1668,17 @@ int write_index(struct index_state *istate, int newfd)
if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0)
return -1;
+ if (deflate) {
+ dbuf_out = xmalloc(WRITE_BUFFER_SIZE);
+ dbuf_in = xmalloc(WRITE_BUFFER_SIZE);
+ memset(&stream, 0, sizeof(stream));
+ stream.next_out = dbuf_out;
+ stream.avail_out = WRITE_BUFFER_SIZE;
+ stream.next_in = dbuf_in;
+ stream.avail_in = 0;
+ git_deflate_init(&stream, zlib_compression_level);
+ }
+
for (i = 0; i < entries; i++) {
struct cache_entry *ce = cache[i];
int size;
@@ -1625,11 +1688,52 @@ int write_index(struct index_state *istate, int newfd)
if (!ce_uptodate(ce) && is_racy_timestamp(istate, ce))
ce_smudge_racily_clean_entry(ce);
size = ce_prepare_ondisk_entry(ce, &ce_ondisk, &ce_ondisk_size);
- if (ce_write(&c, newfd, ce_ondisk, size) < 0)
- return -1;
+ if (!deflate) {
+ if (ce_write(&c, newfd, ce_ondisk, size) < 0)
+ return -1;
+ continue;
+ }
+
+ if (stream.avail_in)
+ memmove(dbuf_in, stream.next_in, stream.avail_in);
+ memcpy(dbuf_in + stream.avail_in, ce_ondisk, size);
+ stream.next_in = dbuf_in;
+ stream.avail_in += size;
+ do {
+ status = git_deflate(&stream, 0);
+ if (stream.next_out > dbuf_out) {
+ size = stream.next_out - dbuf_out;
+ if (ce_write(&c, newfd, dbuf_out, size) < 0)
+ return -1;
+ stream.next_out = dbuf_out;
+ stream.avail_out = WRITE_BUFFER_SIZE;
+ }
+ } while (status == Z_OK);
}
free(ce_ondisk);
+ if (deflate) {
+ do {
+ status = git_deflate(&stream, Z_FINISH);
+ if (stream.next_out > dbuf_out) {
+ int size = stream.next_out - dbuf_out;
+ if (ce_write(&c, newfd, dbuf_out, size) < 0)
+ return -1;
+ stream.next_out = dbuf_out;
+ stream.avail_out = WRITE_BUFFER_SIZE;
+ }
+ } while (status == Z_OK);
+
+ git_deflate_end(&stream);
+ if (stream.next_out > dbuf_out) {
+ int size = stream.next_out - dbuf_out;
+ if (ce_write(&c, newfd, dbuf_out, size) < 0)
+ return -1;
+ }
+ free(dbuf_in);
+ free(dbuf_out);
+ }
+
/* Write extension data here */
if (istate->cache_tree) {
struct strbuf sb = STRBUF_INIT;
--
1.7.8.36.g69ee2
next prev parent reply other threads:[~2012-02-05 8:31 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-02-05 8:30 [PATCH 0/3] On compresing large index Nguyễn Thái Ngọc Duy
2012-02-05 8:30 ` [PATCH 1/3] read-cache: factor out cache entries reading code Nguyễn Thái Ngọc Duy
2012-02-05 8:30 ` [PATCH 2/3] read-cache: reduce malloc/free during writing index Nguyễn Thái Ngọc Duy
2012-02-05 8:30 ` Nguyễn Thái Ngọc Duy [this message]
2012-02-05 21:22 ` [PATCH 0/3] On compresing large index Thomas Rast
2012-02-06 1:35 ` Nguyen Thai Ngoc Duy
2012-02-06 15:54 ` Joshua Redstone
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1328430605-4566-4-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
--cc=joshua.redstone@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.