From: Chris Mason <mason@suse.com>
To: Linus Torvalds <torvalds@osdl.org>, git@vger.kernel.org
Subject: [PATCH] write-tree performance problems
Date: Tue, 19 Apr 2005 12:50:09 -0400 [thread overview]
Message-ID: <200504191250.10286.mason@suse.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1654 bytes --]
Hello everyone,
I did a quick experiment with applying/commit 100 patches from the suse kernel
into a kernel git tree, which quilt can do in 2 seconds. git needs 1m5s.
The primary performance problem during each commit is write-tree recalculating
the hash of each directory, even though the contents of most directories are
not changing. I've attached a very quick and dirty mod of write-tree.c, it
takes an optional tree id (sha1) and list of directories. The hash of any
directories not in the list are read in from existing files instead of being
recalculated.
You have to pass each sub dir with a modified file. So, if you change
fs/super.c and fs/ext3/super.c, you would call "write-tree sha1 fs fs/ext3"
With this patch, the time to apply 100 commits goes down to 22 seconds. It
could be faster (and easier to use) if the index stored the hash of trees
instead of just blobs, but that would be a larger change.
I was able to get the commit time down to 13 seconds by changing read-tree.c,
update-cache.c and read-cache.c to store/read the index in tmpfs instead of
on the local filesystem. I haven't attached the patch for that, but it seems
easiest to move .git/index into .git/index_dir/index, and let the user decide
where to put index_dir.
Quick speed summary, apply/commit 100 patches
quilt push -a : 2s
git (unmodified): 1m5s
git (tree hash reduction) 22s
git (tree hash, tmpfs index) 13s
This patch is against pasky's tree from this morning, but also applies to
linus' tree. It's nasty stuff, but will hopefully get some discussion
started on speeding things up.
-chris
[-- Attachment #2: fast-dirs.diff --]
[-- Type: text/x-diff, Size: 4526 bytes --]
--- a/write-tree.c
+++ b/write-tree.c
@@ -4,6 +4,8 @@
* Copyright (C) Linus Torvalds, 2005
*/
#include "cache.h"
+static char **dirs;
+static int num_dirs = 0;
static int check_valid_sha1(unsigned char *sha1)
{
@@ -27,15 +29,47 @@ static int prepend_integer(char *buffer,
return i;
}
+static int find_sha(char *buffer, int size, const char *base, int baselen, char *returnsha1)
+{
+ while(size) {
+ int len = strlen(buffer)+1;
+ unsigned char *sha1 = buffer + len;
+ char *path = strchr(buffer, ' ')+1;
+ unsigned int mode;
+
+ if (size < len + 20 || sscanf(buffer, "%o", &mode) != 1)
+ die("corrupt 'tree' file");
+ buffer = sha1 + 20;
+ size -= len + 20;
+ if (strncmp(path, base, baselen) == 0 &&
+ strlen(path) == baselen) {
+ memcpy(returnsha1, sha1, 20);
+ return 0;
+ }
+ }
+ return -1;
+}
+
#define ORIG_OFFSET (40) /* Enough space to add the header of "tree <size>\0" */
-static int write_tree(struct cache_entry **cachep, int maxentries, const char *base, int baselen, unsigned char *returnsha1)
+static int write_tree(struct cache_entry **cachep, int maxentries, const char *base, int baselen, unsigned char *returnsha1, char *treesha)
{
unsigned char subdir_sha1[20];
unsigned long size, offset;
char *buffer;
int i, nr;
-
+ char *tree = NULL;
+ unsigned long tree_size;
+ char type[20];
+ if (treesha) {
+ tree = read_sha1_file(treesha, type, &tree_size);
+ if (!tree) {
+ die("unable to read sha1 file");
+ } else {
+ }
+ if (strcmp(type, "tree"))
+ die("expected a tree node");
+ }
/* Guess at some random initial size */
size = 8192;
buffer = malloc(size);
@@ -55,15 +89,60 @@ static int write_tree(struct cache_entry
sha1 = ce->sha1;
mode = ntohl(ce->ce_mode);
-
/* Do we have _further_ subdirectories? */
filename = pathname + baselen;
dirname = strchr(filename, '/');
if (dirname) {
int subdir_written;
-
- subdir_written = write_tree(cachep + nr, maxentries - nr, pathname, dirname-pathname+1, subdir_sha1);
- nr += subdir_written;
+ int dirlen = dirname - pathname;
+ int dirmatch = 1;
+ if (tree && num_dirs > 0) {
+ dirmatch = 0;
+ for(i = 0 ; i < num_dirs; i++) {
+ int len = strlen(dirs[i]);
+ if (len <= baselen)
+ continue;
+ if (memcmp(dirs[i], pathname, dirlen)==0 &&
+ pathname[dirlen] == '/') {
+ dirmatch = 1;
+ break;
+ }
+ }
+ if (!dirmatch && find_sha(tree, tree_size,
+ filename,
+ dirname-filename,
+ subdir_sha1)) {
+ dirmatch = 1;
+ }
+ }
+ if (!dirmatch) {
+ /* eat all the entries in this dir */
+ while(++nr < maxentries) {
+ char *p;
+ ce = cachep[nr];
+ p = strchr(ce->name + baselen, '/');
+ if (!p)
+ break;
+ if (p - ce->name != dirname-pathname)
+ break;
+ if (memcmp(pathname, ce->name, p-ce->name))
+ break;
+ }
+ } else {
+ unsigned char thisdir_sha1[20];
+ char *p = thisdir_sha1;
+ if (num_dirs && tree) {
+ if (find_sha(tree, tree_size, filename,
+ dirname-filename, p)) {
+ num_dirs = 0;
+ p = NULL;
+ }
+ } else {
+ p = NULL;
+ }
+ subdir_written = write_tree(cachep + nr, maxentries - nr, pathname, dirname-pathname+1, subdir_sha1, p);
+ nr += subdir_written;
+ }
/* Now we need to write out the directory entry into this tree.. */
mode = S_IFDIR;
@@ -92,9 +172,10 @@ static int write_tree(struct cache_entry
i = prepend_integer(buffer, offset - ORIG_OFFSET, ORIG_OFFSET);
i -= 5;
memcpy(buffer+i, "tree ", 5);
-
write_sha1_file(buffer + i, offset - i, returnsha1);
free(buffer);
+ if (tree)
+ free(tree);
return nr;
}
@@ -103,7 +184,19 @@ int main(int argc, char **argv)
int i, unmerged;
int entries = read_cache();
unsigned char sha1[20];
+ unsigned char treesha1[20];
+ char *p = NULL;
+ if (argc > 1) {
+ if (argc < 3)
+ die("usage: write-tree [sha1 dir1 dir2 ...]");
+ num_dirs = argc - 2;
+ dirs = argv + 2;
+ if (get_sha1_hex(argv[1], treesha1) < 0)
+ die("bad sha1 given");
+ p = treesha1;
+
+ }
if (entries <= 0)
die("write-tree: no cache contents to write");
@@ -123,7 +216,7 @@ int main(int argc, char **argv)
die("write-tree: not able to write tree");
/* Ok, write it out */
- if (write_tree(active_cache, entries, "", 0, sha1) != entries)
+ if (write_tree(active_cache, entries, "", 0, sha1, p) != entries)
die("write-tree: internal error");
printf("%s\n", sha1_to_hex(sha1));
return 0;
next reply other threads:[~2005-04-19 16:46 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-04-19 16:50 Chris Mason [this message]
2005-04-19 17:36 ` [PATCH] write-tree performance problems Linus Torvalds
2005-04-19 18:11 ` Chris Mason
2005-04-19 19:03 ` Linus Torvalds
2005-04-19 21:08 ` Chris Mason
2005-04-19 21:23 ` Linus Torvalds
2005-04-20 0:49 ` Chris Mason
2005-04-20 1:09 ` Linus Torvalds
2005-04-20 6:43 ` Linus Torvalds
2005-04-20 7:38 ` H. Peter Anvin
2005-04-20 9:08 ` WARNING! Object DB conversion (was Re: [PATCH] write-tree performance problems) Linus Torvalds
2005-04-20 10:04 ` Ingo Molnar
2005-04-20 12:11 ` Jon Seymour
2005-04-20 13:24 ` Martin Uecker
2005-04-20 13:35 ` Morten Welinder
2005-04-20 13:41 ` Jon Seymour
2005-04-20 14:30 ` C. Scott Ananian
2005-04-20 15:19 ` Martin Uecker
2005-04-20 15:28 ` C. Scott Ananian
2005-04-20 15:57 ` Martin Uecker
2005-04-20 16:33 ` Martin Uecker
2005-04-20 13:30 ` Blob chunking code. [First look.] C. Scott Ananian
2005-04-20 17:31 ` Blob chunking code. [Second look] C. Scott Ananian
2005-04-20 14:13 ` WARNING! Object DB conversion (was Re: [PATCH] write-tree performance problems) David Woodhouse
2005-04-20 14:59 ` Linus Torvalds
2005-04-20 22:29 ` David Woodhouse
[not found] ` <2cfc4032050420050655265d3a@mail.gmail.com>
2005-04-20 14:29 ` Linus Torvalds
2005-04-20 14:35 ` C. Scott Ananian
2005-04-20 15:22 ` [PATCH] write-tree performance problems Chris Mason
2005-04-20 15:30 ` C. Scott Ananian
2005-04-20 15:46 ` Linus Torvalds
2005-04-20 15:52 ` C. Scott Ananian
2005-04-20 16:21 ` Linus Torvalds
2005-04-20 15:40 ` Linus Torvalds
2005-04-20 16:10 ` David Willmore
2005-04-20 16:33 ` Linus Torvalds
2005-04-20 16:41 ` Linus Torvalds
2005-04-20 16:37 ` Chris Mason
2005-04-20 17:06 ` Linus Torvalds
2005-04-20 17:23 ` Chris Mason
2005-04-20 17:52 ` Linus Torvalds
2005-04-20 19:04 ` Chris Mason
2005-04-20 19:19 ` Linus Torvalds
2005-04-20 19:47 ` Linus Torvalds
2005-04-20 18:07 ` David S. Miller
2005-04-19 22:09 ` David Lang
2005-04-19 22:21 ` Linus Torvalds
2005-04-19 23:00 ` David Lang
2005-04-19 23:09 ` Linus Torvalds
2005-04-19 23:42 ` David Lang
2005-04-19 23:59 ` Linus Torvalds
2005-04-19 21:52 ` Christopher Li
2005-04-19 18:51 ` Olivier Galibert
2005-04-19 22:47 ` C. Scott Ananian
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200504191250.10286.mason@suse.com \
--to=mason@suse.com \
--cc=git@vger.kernel.org \
--cc=torvalds@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).