* [PATCH 1/3] read-cache: factor out cache entries reading code
2012-02-05 8:30 [PATCH 0/3] On compresing large index Nguyễn Thái Ngọc Duy
@ 2012-02-05 8:30 ` Nguyễn Thái Ngọc Duy
2012-02-05 8:30 ` [PATCH 2/3] read-cache: reduce malloc/free during writing index Nguyễn Thái Ngọc Duy
` (2 subsequent siblings)
3 siblings, 0 replies; 7+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-02-05 8:30 UTC (permalink / raw)
To: git; +Cc: Joshua Redstone, Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
read-cache.c | 32 ++++++++++++++++++++------------
1 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/read-cache.c b/read-cache.c
index a51bba1..2dbf923 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1273,10 +1273,28 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk)
return ce;
}
+static int read_cache_entries(struct index_state *istate,
+ const char *mmap, unsigned long src_offset)
+{
+ const char *buf = mmap + src_offset;
+ int i;
+
+ for (i = 0; i < istate->cache_nr; i++) {
+ struct ondisk_cache_entry *disk_ce;
+ struct cache_entry *ce;
+
+ disk_ce = (struct ondisk_cache_entry *)buf;
+ ce = create_from_disk(disk_ce);
+ set_index_entry(istate, i, ce);
+ buf += ondisk_ce_size(ce);
+ }
+ return buf - mmap;
+}
+
/* remember to discard_cache() before reading a different cache! */
int read_index_from(struct index_state *istate, const char *path)
{
- int fd, i;
+ int fd;
struct stat st;
unsigned long src_offset;
struct cache_header *hdr;
@@ -1319,17 +1337,7 @@ int read_index_from(struct index_state *istate, const char *path)
istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
istate->initialized = 1;
- src_offset = sizeof(*hdr);
- for (i = 0; i < istate->cache_nr; i++) {
- struct ondisk_cache_entry *disk_ce;
- struct cache_entry *ce;
-
- disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
- ce = create_from_disk(disk_ce);
- set_index_entry(istate, i, ce);
-
- src_offset += ondisk_ce_size(ce);
- }
+ src_offset = read_cache_entries(istate, mmap, sizeof(*hdr));
istate->timestamp.sec = st.st_mtime;
istate->timestamp.nsec = ST_MTIME_NSEC(st);
--
1.7.8.36.g69ee2
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/3] read-cache: reduce malloc/free during writing index
2012-02-05 8:30 [PATCH 0/3] On compresing large index Nguyễn Thái Ngọc Duy
2012-02-05 8:30 ` [PATCH 1/3] read-cache: factor out cache entries reading code Nguyễn Thái Ngọc Duy
@ 2012-02-05 8:30 ` Nguyễn Thái Ngọc Duy
2012-02-05 8:30 ` [PATCH 3/3] Support compressing index when GIT_ZCACHE=1 Nguyễn Thái Ngọc Duy
2012-02-05 21:22 ` [PATCH 0/3] On compresing large index Thomas Rast
3 siblings, 0 replies; 7+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-02-05 8:30 UTC (permalink / raw)
To: git; +Cc: Joshua Redstone, Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
read-cache.c | 26 ++++++++++++++++++--------
1 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/read-cache.c b/read-cache.c
index 2dbf923..7b9a989 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1521,12 +1521,19 @@ static void ce_smudge_racily_clean_entry(struct cache_entry *ce)
}
}
-static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce)
+static int ce_prepare_ondisk_entry(struct cache_entry *ce,
+ void **ondisk_p, int *ondisk_size)
{
int size = ondisk_ce_size(ce);
- struct ondisk_cache_entry *ondisk = xcalloc(1, size);
+ struct ondisk_cache_entry *ondisk;
char *name;
- int result;
+
+ if (size <= *ondisk_size)
+ ondisk = *ondisk_p;
+ else {
+ ondisk = *ondisk_p = xrealloc(*ondisk_p, size);
+ *ondisk_size = size;
+ }
ondisk->ctime.sec = htonl(ce->ce_ctime.sec);
ondisk->mtime.sec = htonl(ce->ce_mtime.sec);
@@ -1549,10 +1556,7 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce)
else
name = ondisk->name;
memcpy(name, ce->name, ce_namelen(ce));
-
- result = ce_write(c, fd, ondisk, size);
- free(ondisk);
- return result;
+ return size;
}
static int has_racy_timestamp(struct index_state *istate)
@@ -1588,6 +1592,8 @@ int write_index(struct index_state *istate, int newfd)
struct cache_entry **cache = istate->cache;
int entries = istate->cache_nr;
struct stat st;
+ void *ce_ondisk = NULL;
+ int ce_ondisk_size = 0;
for (i = removed = extended = 0; i < entries; i++) {
if (cache[i]->ce_flags & CE_REMOVE)
@@ -1612,13 +1618,17 @@ int write_index(struct index_state *istate, int newfd)
for (i = 0; i < entries; i++) {
struct cache_entry *ce = cache[i];
+ int size;
+
if (ce->ce_flags & CE_REMOVE)
continue;
if (!ce_uptodate(ce) && is_racy_timestamp(istate, ce))
ce_smudge_racily_clean_entry(ce);
- if (ce_write_entry(&c, newfd, ce) < 0)
+ size = ce_prepare_ondisk_entry(ce, &ce_ondisk, &ce_ondisk_size);
+ if (ce_write(&c, newfd, ce_ondisk, size) < 0)
return -1;
}
+ free(ce_ondisk);
/* Write extension data here */
if (istate->cache_tree) {
--
1.7.8.36.g69ee2
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/3] Support compressing index when GIT_ZCACHE=1
2012-02-05 8:30 [PATCH 0/3] On compresing large index Nguyễn Thái Ngọc Duy
2012-02-05 8:30 ` [PATCH 1/3] read-cache: factor out cache entries reading code Nguyễn Thái Ngọc Duy
2012-02-05 8:30 ` [PATCH 2/3] read-cache: reduce malloc/free during writing index Nguyễn Thái Ngọc Duy
@ 2012-02-05 8:30 ` Nguyễn Thái Ngọc Duy
2012-02-05 21:22 ` [PATCH 0/3] On compresing large index Thomas Rast
3 siblings, 0 replies; 7+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-02-05 8:30 UTC (permalink / raw)
To: git; +Cc: Joshua Redstone, Nguyễn Thái Ngọc Duy
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
cache.h | 1 +
read-cache.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 112 insertions(+), 7 deletions(-)
diff --git a/cache.h b/cache.h
index 10afd71..112bc52 100644
--- a/cache.h
+++ b/cache.h
@@ -99,6 +99,7 @@ unsigned long git_deflate_bound(git_zstream *, unsigned long);
*/
#define CACHE_SIGNATURE 0x44495243 /* "DIRC" */
+#define ZCACHE_SIGNATURE 0x4452435A /* "DRCZ" */
struct cache_header {
unsigned int hdr_signature;
unsigned int hdr_version;
diff --git a/read-cache.c b/read-cache.c
index 7b9a989..45c1712 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1182,12 +1182,17 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int reall
return refresh_cache_ent(&the_index, ce, really, NULL, NULL);
}
-static int verify_hdr(struct cache_header *hdr, unsigned long size)
+static int verify_hdr(struct cache_header *hdr, unsigned long size,
+ int *deflated)
{
git_SHA_CTX c;
unsigned char sha1[20];
- if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
+ if (hdr->hdr_signature == htonl(CACHE_SIGNATURE))
+ *deflated = 0;
+ else if (hdr->hdr_signature == htonl(ZCACHE_SIGNATURE))
+ *deflated = 1;
+ else
return error("bad signature");
if (hdr->hdr_version != htonl(2) && hdr->hdr_version != htonl(3))
return error("bad index version");
@@ -1273,6 +1278,43 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk)
return ce;
}
+static int inflate_cache_entries(struct index_state *istate,
+ const unsigned char *mmap, size_t mmap_size,
+ unsigned long src_offset)
+{
+ unsigned char buf[sizeof(struct ondisk_cache_entry) + PATH_MAX];
+ struct ondisk_cache_entry *disk_ce;
+ struct cache_entry *ce;
+ struct git_zstream stream;
+ int i, status;
+
+ memset(&stream, 0, sizeof(stream));
+ stream.next_in = (unsigned char*)mmap + src_offset;
+ stream.avail_in = mmap_size - src_offset;
+ stream.next_out = buf;
+ stream.avail_out = sizeof(buf);
+ git_inflate_init(&stream);
+
+ for (i = 0; i < istate->cache_nr; i++) {
+ int remaining;
+ do {
+ status = git_inflate(&stream, Z_FINISH);
+ } while (status == Z_OK);
+
+ disk_ce = (struct ondisk_cache_entry *)buf;
+ ce = create_from_disk(disk_ce);
+ set_index_entry(istate, i, ce);
+
+ remaining = stream.next_out - (buf + ondisk_ce_size(ce));
+ memmove(buf, buf + ondisk_ce_size(ce), remaining);
+ stream.next_out = buf + remaining;
+ stream.avail_out = sizeof(buf) - remaining;
+ }
+ assert(status == Z_STREAM_END);
+ git_inflate_end(&stream);
+ return stream.next_in - mmap;
+}
+
static int read_cache_entries(struct index_state *istate,
const char *mmap, unsigned long src_offset)
{
@@ -1300,6 +1342,7 @@ int read_index_from(struct index_state *istate, const char *path)
struct cache_header *hdr;
void *mmap;
size_t mmap_size;
+ int deflated;
errno = EBUSY;
if (istate->initialized)
@@ -1329,7 +1372,7 @@ int read_index_from(struct index_state *istate, const char *path)
die_errno("unable to map index file");
hdr = mmap;
- if (verify_hdr(hdr, mmap_size) < 0)
+ if (verify_hdr(hdr, mmap_size, &deflated) < 0)
goto unmap;
istate->cache_nr = ntohl(hdr->hdr_entries);
@@ -1337,7 +1380,11 @@ int read_index_from(struct index_state *istate, const char *path)
istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
istate->initialized = 1;
- src_offset = read_cache_entries(istate, mmap, sizeof(*hdr));
+ if (deflated)
+ src_offset = inflate_cache_entries(istate, mmap, mmap_size,
+ sizeof(*hdr));
+ else
+ src_offset = read_cache_entries(istate, mmap, sizeof(*hdr));
istate->timestamp.sec = st.st_mtime;
istate->timestamp.nsec = ST_MTIME_NSEC(st);
@@ -1594,6 +1641,10 @@ int write_index(struct index_state *istate, int newfd)
struct stat st;
void *ce_ondisk = NULL;
int ce_ondisk_size = 0;
+ struct git_zstream stream;
+ int deflate, status;
+ unsigned char *dbuf_out;
+ unsigned char *dbuf_in;
for (i = removed = extended = 0; i < entries; i++) {
if (cache[i]->ce_flags & CE_REMOVE)
@@ -1607,7 +1658,8 @@ int write_index(struct index_state *istate, int newfd)
}
}
- hdr.hdr_signature = htonl(CACHE_SIGNATURE);
+ deflate = getenv("GIT_ZCACHE") != NULL;
+ hdr.hdr_signature = htonl(deflate ? ZCACHE_SIGNATURE : CACHE_SIGNATURE);
/* for extended format, increase version so older git won't try to read it */
hdr.hdr_version = htonl(extended ? 3 : 2);
hdr.hdr_entries = htonl(entries - removed);
@@ -1616,6 +1668,17 @@ int write_index(struct index_state *istate, int newfd)
if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0)
return -1;
+ if (deflate) {
+ dbuf_out = xmalloc(WRITE_BUFFER_SIZE);
+ dbuf_in = xmalloc(WRITE_BUFFER_SIZE);
+ memset(&stream, 0, sizeof(stream));
+ stream.next_out = dbuf_out;
+ stream.avail_out = WRITE_BUFFER_SIZE;
+ stream.next_in = dbuf_in;
+ stream.avail_in = 0;
+ git_deflate_init(&stream, zlib_compression_level);
+ }
+
for (i = 0; i < entries; i++) {
struct cache_entry *ce = cache[i];
int size;
@@ -1625,11 +1688,52 @@ int write_index(struct index_state *istate, int newfd)
if (!ce_uptodate(ce) && is_racy_timestamp(istate, ce))
ce_smudge_racily_clean_entry(ce);
size = ce_prepare_ondisk_entry(ce, &ce_ondisk, &ce_ondisk_size);
- if (ce_write(&c, newfd, ce_ondisk, size) < 0)
- return -1;
+ if (!deflate) {
+ if (ce_write(&c, newfd, ce_ondisk, size) < 0)
+ return -1;
+ continue;
+ }
+
+ if (stream.avail_in)
+ memmove(dbuf_in, stream.next_in, stream.avail_in);
+ memcpy(dbuf_in + stream.avail_in, ce_ondisk, size);
+ stream.next_in = dbuf_in;
+ stream.avail_in += size;
+ do {
+ status = git_deflate(&stream, 0);
+ if (stream.next_out > dbuf_out) {
+ size = stream.next_out - dbuf_out;
+ if (ce_write(&c, newfd, dbuf_out, size) < 0)
+ return -1;
+ stream.next_out = dbuf_out;
+ stream.avail_out = WRITE_BUFFER_SIZE;
+ }
+ } while (status == Z_OK);
}
free(ce_ondisk);
+ if (deflate) {
+ do {
+ status = git_deflate(&stream, Z_FINISH);
+ if (stream.next_out > dbuf_out) {
+ int size = stream.next_out - dbuf_out;
+ if (ce_write(&c, newfd, dbuf_out, size) < 0)
+ return -1;
+ stream.next_out = dbuf_out;
+ stream.avail_out = WRITE_BUFFER_SIZE;
+ }
+ } while (status == Z_OK);
+
+ git_deflate_end(&stream);
+ if (stream.next_out > dbuf_out) {
+ int size = stream.next_out - dbuf_out;
+ if (ce_write(&c, newfd, dbuf_out, size) < 0)
+ return -1;
+ }
+ free(dbuf_in);
+ free(dbuf_out);
+ }
+
/* Write extension data here */
if (istate->cache_tree) {
struct strbuf sb = STRBUF_INIT;
--
1.7.8.36.g69ee2
^ permalink raw reply related [flat|nested] 7+ messages in thread