From: Kevin Willford <kewillf@microsoft.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, peff@peff.net, peartben@gmail.com,
Kevin Willford <kewillf@microsoft.com>
Subject: [PATCH 3/3] read-cache: avoid allocating every ondisk entry when writing
Date: Mon, 21 Aug 2017 15:24:32 -0600 [thread overview]
Message-ID: <20170821212432.47364-4-kewillf@microsoft.com> (raw)
In-Reply-To: <20170821212432.47364-1-kewillf@microsoft.com>
When writing the index for each entry an ondisk struct will be
allocated and freed in ce_write_entry. We can do better by
using a ondisk struct on the stack for each entry.
This is accomplished by using a stack ondisk_cache_entry_extended
outside looping through the entries in do_write_index. Only the
fixed fields of this struct are used when writing and depending on
whether it is extended or not the flags2 field will be written.
The name field is not used and instead the cache_entry name field
is used directly when writing out the name. Because ce_write is
using a buffer and memcpy to fill the buffer before flushing to disk,
we don't have to worry about doing multiple ce_write calls.
Running the p0007-write-cache.sh tests would save anywhere
between 3-7% when the index had over a million entries with no
performance degradation on small repos.
Signed-off-by: Kevin Willford <kewillf@microsoft.com>
---
read-cache.c | 50 +++++++++++++++++++++++++-------------------------
1 file changed, 25 insertions(+), 25 deletions(-)
diff --git a/read-cache.c b/read-cache.c
index 47220cc30d..694bed8d82 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1499,6 +1499,7 @@ struct ondisk_cache_entry_extended {
};
/* These are only used for v3 or lower */
+#define align_padding_size(size, len) ((size + (len) + 8) & ~7) - (size + len)
#define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7)
#define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len)
#define ondisk_cache_entry_extended_size(len) align_flex_name(ondisk_cache_entry_extended,len)
@@ -2032,7 +2033,7 @@ static void ce_smudge_racily_clean_entry(struct cache_entry *ce)
}
/* Copy miscellaneous fields but not the name */
-static char *copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
+static void copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
struct cache_entry *ce)
{
short flags;
@@ -2056,32 +2057,35 @@ static char *copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
struct ondisk_cache_entry_extended *ondisk2;
ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
ondisk2->flags2 = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16);
- return ondisk2->name;
- }
- else {
- return ondisk->name;
}
}
static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
- struct strbuf *previous_name)
+ struct strbuf *previous_name, struct ondisk_cache_entry *ondisk)
{
int size;
- struct ondisk_cache_entry *ondisk;
int saved_namelen = saved_namelen; /* compiler workaround */
- char *name;
int result;
+ static unsigned char padding[8] = { 0x00 };
if (ce->ce_flags & CE_STRIP_NAME) {
saved_namelen = ce_namelen(ce);
ce->ce_namelen = 0;
}
+ if (ce->ce_flags & CE_EXTENDED)
+ size = offsetof(struct ondisk_cache_entry_extended, name);
+ else
+ size = offsetof(struct ondisk_cache_entry, name);
+
if (!previous_name) {
- size = ondisk_ce_size(ce);
- ondisk = xcalloc(1, size);
- name = copy_cache_entry_to_ondisk(ondisk, ce);
- memcpy(name, ce->name, ce_namelen(ce));
+ int len = ce_namelen(ce);
+ copy_cache_entry_to_ondisk(ondisk, ce);
+ result = ce_write(c, fd, ondisk, size);
+ if (!result)
+ result = ce_write(c, fd, ce->name, len);
+ if (!result)
+ result = ce_write(c, fd, padding, align_padding_size(size, len));
} else {
int common, to_remove, prefix_size;
unsigned char to_remove_vi[16];
@@ -2094,16 +2098,12 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
to_remove = previous_name->len - common;
prefix_size = encode_varint(to_remove, to_remove_vi);
- if (ce->ce_flags & CE_EXTENDED)
- size = offsetof(struct ondisk_cache_entry_extended, name);
- else
- size = offsetof(struct ondisk_cache_entry, name);
- size += prefix_size + (ce_namelen(ce) - common + 1);
-
- ondisk = xcalloc(1, size);
- name = copy_cache_entry_to_ondisk(ondisk, ce);
- memcpy(name, to_remove_vi, prefix_size);
- memcpy(name + prefix_size, ce->name + common, ce_namelen(ce) - common);
+ copy_cache_entry_to_ondisk(ondisk, ce);
+ result = ce_write(c, fd, ondisk, size);
+ if (!result)
+ result = ce_write(c, fd, to_remove_vi, prefix_size);
+ if (!result)
+ result = ce_write(c, fd, ce->name + common, ce_namelen(ce) - common + 1);
strbuf_splice(previous_name, common, to_remove,
ce->name + common, ce_namelen(ce) - common);
@@ -2113,8 +2113,6 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
ce->ce_flags &= ~CE_STRIP_NAME;
}
- result = ce_write(c, fd, ondisk, size);
- free(ondisk);
return result;
}
@@ -2196,6 +2194,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
struct cache_entry **cache = istate->cache;
int entries = istate->cache_nr;
struct stat st;
+ struct ondisk_cache_entry_extended ondisk;
struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
int drop_cache_tree = 0;
@@ -2232,6 +2231,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
return -1;
previous_name = (hdr_version == 4) ? &previous_name_buf : NULL;
+
for (i = 0; i < entries; i++) {
struct cache_entry *ce = cache[i];
if (ce->ce_flags & CE_REMOVE)
@@ -2251,7 +2251,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
drop_cache_tree = 1;
}
- if (ce_write_entry(&c, newfd, ce, previous_name) < 0)
+ if (ce_write_entry(&c, newfd, ce, previous_name, (struct ondisk_cache_entry *)&ondisk) < 0)
err = -1;
if (err)
--
2.14.1.205.g2812f3410d
next prev parent reply other threads:[~2017-08-21 21:17 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-08-21 21:24 [PATCH 0/3] read-cache: use stack ondisk struct when writing index Kevin Willford
2017-08-21 21:24 ` [PATCH 1/3] perf: add test for writing the index Kevin Willford
2017-08-21 21:24 ` [PATCH 2/3] read-cache: fix memory leak in do_write_index Kevin Willford
2017-08-21 23:02 ` Junio C Hamano
2017-08-21 21:24 ` Kevin Willford [this message]
2017-08-21 23:42 ` [PATCH 3/3] read-cache: avoid allocating every ondisk entry when writing Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170821212432.47364-4-kewillf@microsoft.com \
--to=kewillf@microsoft.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=peartben@gmail.com \
--cc=peff@peff.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).