From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Joshua Redstone" <joshua.redstone@fb.com>,
"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 3/3] Support compressing index when GIT_ZCACHE=1
Date: Sun, 5 Feb 2012 15:30:05 +0700 [thread overview]
Message-ID: <1328430605-4566-4-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1328430605-4566-1-git-send-email-pclouds@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
cache.h | 1 +
read-cache.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 112 insertions(+), 7 deletions(-)
diff --git a/cache.h b/cache.h
index 10afd71..112bc52 100644
--- a/cache.h
+++ b/cache.h
@@ -99,6 +99,7 @@ unsigned long git_deflate_bound(git_zstream *, unsigned long);
*/
#define CACHE_SIGNATURE 0x44495243 /* "DIRC" */
+#define ZCACHE_SIGNATURE 0x4452435A /* "DRCZ" */
struct cache_header {
unsigned int hdr_signature;
unsigned int hdr_version;
diff --git a/read-cache.c b/read-cache.c
index 7b9a989..45c1712 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1182,12 +1182,17 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int reall
return refresh_cache_ent(&the_index, ce, really, NULL, NULL);
}
-static int verify_hdr(struct cache_header *hdr, unsigned long size)
+static int verify_hdr(struct cache_header *hdr, unsigned long size,
+ int *deflated)
{
git_SHA_CTX c;
unsigned char sha1[20];
- if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
+ if (hdr->hdr_signature == htonl(CACHE_SIGNATURE))
+ *deflated = 0;
+ else if (hdr->hdr_signature == htonl(ZCACHE_SIGNATURE))
+ *deflated = 1;
+ else
return error("bad signature");
if (hdr->hdr_version != htonl(2) && hdr->hdr_version != htonl(3))
return error("bad index version");
@@ -1273,6 +1278,43 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk)
return ce;
}
+static int inflate_cache_entries(struct index_state *istate,
+ const unsigned char *mmap, size_t mmap_size,
+ unsigned long src_offset)
+{
+ unsigned char buf[sizeof(struct ondisk_cache_entry) + PATH_MAX];
+ struct ondisk_cache_entry *disk_ce;
+ struct cache_entry *ce;
+ struct git_zstream stream;
+ int i, status;
+
+ memset(&stream, 0, sizeof(stream));
+ stream.next_in = (unsigned char*)mmap + src_offset;
+ stream.avail_in = mmap_size - src_offset;
+ stream.next_out = buf;
+ stream.avail_out = sizeof(buf);
+ git_inflate_init(&stream);
+
+ for (i = 0; i < istate->cache_nr; i++) {
+ int remaining;
+ do {
+ status = git_inflate(&stream, Z_FINISH);
+ } while (status == Z_OK);
+
+ disk_ce = (struct ondisk_cache_entry *)buf;
+ ce = create_from_disk(disk_ce);
+ set_index_entry(istate, i, ce);
+
+ remaining = stream.next_out - (buf + ondisk_ce_size(ce));
+ memmove(buf, buf + ondisk_ce_size(ce), remaining);
+ stream.next_out = buf + remaining;
+ stream.avail_out = sizeof(buf) - remaining;
+ }
+ assert(status == Z_STREAM_END);
+ git_inflate_end(&stream);
+ return stream.next_in - mmap;
+}
+
static int read_cache_entries(struct index_state *istate,
const char *mmap, unsigned long src_offset)
{
@@ -1300,6 +1342,7 @@ int read_index_from(struct index_state *istate, const char *path)
struct cache_header *hdr;
void *mmap;
size_t mmap_size;
+ int deflated;
errno = EBUSY;
if (istate->initialized)
@@ -1329,7 +1372,7 @@ int read_index_from(struct index_state *istate, const char *path)
die_errno("unable to map index file");
hdr = mmap;
- if (verify_hdr(hdr, mmap_size) < 0)
+ if (verify_hdr(hdr, mmap_size, &deflated) < 0)
goto unmap;
istate->cache_nr = ntohl(hdr->hdr_entries);
@@ -1337,7 +1380,11 @@ int read_index_from(struct index_state *istate, const char *path)
istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
istate->initialized = 1;
- src_offset = read_cache_entries(istate, mmap, sizeof(*hdr));
+ if (deflated)
+ src_offset = inflate_cache_entries(istate, mmap, mmap_size,
+ sizeof(*hdr));
+ else
+ src_offset = read_cache_entries(istate, mmap, sizeof(*hdr));
istate->timestamp.sec = st.st_mtime;
istate->timestamp.nsec = ST_MTIME_NSEC(st);
@@ -1594,6 +1641,10 @@ int write_index(struct index_state *istate, int newfd)
struct stat st;
void *ce_ondisk = NULL;
int ce_ondisk_size = 0;
+ struct git_zstream stream;
+ int deflate, status;
+ unsigned char *dbuf_out;
+ unsigned char *dbuf_in;
for (i = removed = extended = 0; i < entries; i++) {
if (cache[i]->ce_flags & CE_REMOVE)
@@ -1607,7 +1658,8 @@ int write_index(struct index_state *istate, int newfd)
}
}
- hdr.hdr_signature = htonl(CACHE_SIGNATURE);
+ deflate = getenv("GIT_ZCACHE") != NULL;
+ hdr.hdr_signature = htonl(deflate ? ZCACHE_SIGNATURE : CACHE_SIGNATURE);
/* for extended format, increase version so older git won't try to read it */
hdr.hdr_version = htonl(extended ? 3 : 2);
hdr.hdr_entries = htonl(entries - removed);
@@ -1616,6 +1668,17 @@ int write_index(struct index_state *istate, int newfd)
if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0)
return -1;
+ if (deflate) {
+ dbuf_out = xmalloc(WRITE_BUFFER_SIZE);
+ dbuf_in = xmalloc(WRITE_BUFFER_SIZE);
+ memset(&stream, 0, sizeof(stream));
+ stream.next_out = dbuf_out;
+ stream.avail_out = WRITE_BUFFER_SIZE;
+ stream.next_in = dbuf_in;
+ stream.avail_in = 0;
+ git_deflate_init(&stream, zlib_compression_level);
+ }
+
for (i = 0; i < entries; i++) {
struct cache_entry *ce = cache[i];
int size;
@@ -1625,11 +1688,52 @@ int write_index(struct index_state *istate, int newfd)
if (!ce_uptodate(ce) && is_racy_timestamp(istate, ce))
ce_smudge_racily_clean_entry(ce);
size = ce_prepare_ondisk_entry(ce, &ce_ondisk, &ce_ondisk_size);
- if (ce_write(&c, newfd, ce_ondisk, size) < 0)
- return -1;
+ if (!deflate) {
+ if (ce_write(&c, newfd, ce_ondisk, size) < 0)
+ return -1;
+ continue;
+ }
+
+ if (stream.avail_in)
+ memmove(dbuf_in, stream.next_in, stream.avail_in);
+ memcpy(dbuf_in + stream.avail_in, ce_ondisk, size);
+ stream.next_in = dbuf_in;
+ stream.avail_in += size;
+ do {
+ status = git_deflate(&stream, 0);
+ if (stream.next_out > dbuf_out) {
+ size = stream.next_out - dbuf_out;
+ if (ce_write(&c, newfd, dbuf_out, size) < 0)
+ return -1;
+ stream.next_out = dbuf_out;
+ stream.avail_out = WRITE_BUFFER_SIZE;
+ }
+ } while (status == Z_OK);
}
free(ce_ondisk);
+ if (deflate) {
+ do {
+ status = git_deflate(&stream, Z_FINISH);
+ if (stream.next_out > dbuf_out) {
+ int size = stream.next_out - dbuf_out;
+ if (ce_write(&c, newfd, dbuf_out, size) < 0)
+ return -1;
+ stream.next_out = dbuf_out;
+ stream.avail_out = WRITE_BUFFER_SIZE;
+ }
+ } while (status == Z_OK);
+
+ git_deflate_end(&stream);
+ if (stream.next_out > dbuf_out) {
+ int size = stream.next_out - dbuf_out;
+ if (ce_write(&c, newfd, dbuf_out, size) < 0)
+ return -1;
+ }
+ free(dbuf_in);
+ free(dbuf_out);
+ }
+
/* Write extension data here */
if (istate->cache_tree) {
struct strbuf sb = STRBUF_INIT;
--
1.7.8.36.g69ee2
next prev parent reply other threads:[~2012-02-05 8:31 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-02-05 8:30 [PATCH 0/3] On compresing large index Nguyễn Thái Ngọc Duy
2012-02-05 8:30 ` [PATCH 1/3] read-cache: factor out cache entries reading code Nguyễn Thái Ngọc Duy
2012-02-05 8:30 ` [PATCH 2/3] read-cache: reduce malloc/free during writing index Nguyễn Thái Ngọc Duy
2012-02-05 8:30 ` Nguyễn Thái Ngọc Duy [this message]
2012-02-05 21:22 ` [PATCH 0/3] On compresing large index Thomas Rast
2012-02-06 1:35 ` Nguyen Thai Ngoc Duy
2012-02-06 15:54 ` Joshua Redstone
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1328430605-4566-4-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
--cc=joshua.redstone@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).