From: Kjetil Barvik <barvik@broadpark.no>
To: git@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
Kjetil Barvik <barvik@broadpark.no>
Subject: [PATCH/RFC v3 2/2] create_directories() inside entry.c: only check each directory once!
Date: Wed, 07 Jan 2009 14:24:49 +0100 [thread overview]
Message-ID: <1231334689-17135-3-git-send-email-barvik@broadpark.no> (raw)
In-Reply-To: <1231334689-17135-1-git-send-email-barvik@broadpark.no>
When we do an 'git checkout' after some time we end up in the
'checkout_entry()' function inside entry.c, and from here we call the
'create_directories()' function to make sure the all the directories
exists for the possible new file or entry.
The 'create_directories()' function happily started to check that all
path component exists. This resulted in tons and tons of calls to
lstat() or stat() when we checkout files nested deep inside a
directory.
We try to avoid this by remembering the last checked and possible
newly created directory.
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
:100644 100644 768ba38... ec1297f... M cache.h
:100644 100644 aa2ee46... 36d6f98... M entry.c
:100644 100644 28e2759... 0a03e65... M unpack-trees.c
cache.h | 1 +
entry.c | 82 +++++++++++++++++++++++++++++++++++++++++++------------
unpack-trees.c | 1 +
3 files changed, 66 insertions(+), 18 deletions(-)
diff --git a/cache.h b/cache.h
index 768ba3825f3015828381490b0c387177a4f71578..ec1297ff5621cc9eb7fce51cc025f18a030ac9ea 100644
--- a/cache.h
+++ b/cache.h
@@ -718,6 +718,7 @@ struct checkout {
refresh_cache:1;
};
+extern void clear_created_dirs_cache(void);
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
#define LSTAT_DIR (1u << 0)
diff --git a/entry.c b/entry.c
index aa2ee46a84033585d8e07a585610c5a697af82c2..36d6f98c1f59a86a5e9c117e1181c1169208168f 100644
--- a/entry.c
+++ b/entry.c
@@ -1,33 +1,67 @@
#include "cache.h"
#include "blob.h"
-static void create_directories(const char *path, const struct checkout *state)
+static char dirs_path[PATH_MAX];
+static int dirs_len = 0;
+
+static inline int
+greatest_common_created_dirs_prefix(int len, const char *name)
{
- int len = strlen(path);
- char *buf = xmalloc(len + 1);
- const char *slash = path;
+ int max_len, match_len = 0, i = 0;
- while ((slash = strchr(slash+1, '/')) != NULL) {
- struct stat st;
- int stat_status;
+ max_len = len < dirs_len ? len : dirs_len;
+ while (i < max_len && name[i] == dirs_path[i]) {
+ if (name[i] == '/') match_len = i;
+ i++;
+ }
+ if (i == dirs_len && len > dirs_len && name[dirs_len] == '/')
+ match_len = dirs_len;
+ return match_len;
+}
- len = slash - path;
- memcpy(buf, path, len);
- buf[len] = 0;
+void clear_created_dirs_cache(void)
+{
+ dirs_path[0] = '\0';
+ dirs_len = 0;
+}
- if (len <= state->base_dir_len)
+static void
+create_directories(int len, const char *path, const struct checkout *state)
+{
+ int i, max_len, last_slash, stat_status;
+ struct stat st;
+
+ /* Check the cache for previously checked or created
+ * directories (and components) within this function. There
+ * is no need to check or re-create directory components more
+ * than once!
+ */
+ max_len = len < PATH_MAX ? len : PATH_MAX;
+ i = last_slash = greatest_common_created_dirs_prefix(max_len, path);
+
+ while (i < max_len) {
+ do {
+ dirs_path[i] = path[i];
+ i++;
+ } while (i < max_len && path[i] != '/');
+ if (i >= max_len)
+ break;
+ last_slash = i;
+ dirs_path[last_slash] = '\0';
+
+ if (last_slash <= state->base_dir_len)
/*
* checkout-index --prefix=<dir>; <dir> is
* allowed to be a symlink to an existing
* directory.
*/
- stat_status = stat(buf, &st);
+ stat_status = stat(dirs_path, &st);
else
/*
* if there currently is a symlink, we would
* want to replace it with a real directory.
*/
- stat_status = lstat(buf, &st);
+ stat_status = lstat(dirs_path, &st);
if (!stat_status && S_ISDIR(st.st_mode))
continue; /* ok, it is already a directory. */
@@ -38,14 +72,20 @@ static void create_directories(const char *path, const struct checkout *state)
* error codepath; we do not care, as we unlink and
* mkdir again in such a case.
*/
- if (mkdir(buf, 0777)) {
+ if (mkdir(dirs_path, 0777)) {
if (errno == EEXIST && state->force &&
- !unlink(buf) && !mkdir(buf, 0777))
+ !unlink(dirs_path) && !mkdir(dirs_path, 0777))
continue;
- die("cannot create directory at %s", buf);
+ die("cannot create directory at %s", dirs_path);
}
}
- free(buf);
+ /* Update the cache of already created/checked directories */
+ if (last_slash > 0 && last_slash < PATH_MAX) {
+ dirs_path[last_slash] = '\0';
+ dirs_len = last_slash;
+ } else {
+ clear_created_dirs_cache();
+ }
}
static void remove_subtree(const char *path)
@@ -55,6 +95,11 @@ static void remove_subtree(const char *path)
char pathbuf[PATH_MAX];
char *name;
+ /* To be utterly safe we invalidate the cache of the
+ * previously created directories.
+ */
+ clear_created_dirs_cache();
+
if (!dir)
die("cannot opendir %s (%s)", path, strerror(errno));
strcpy(pathbuf, path);
@@ -201,6 +246,7 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *t
memcpy(path, state->base_dir, len);
strcpy(path + len, ce->name);
+ len += ce_namelen(ce);
if (!lstat(path, &st)) {
unsigned changed = ce_match_stat(ce, &st, CE_MATCH_IGNORE_VALID);
@@ -229,6 +275,6 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *t
return error("unable to unlink old '%s' (%s)", path, strerror(errno));
} else if (state->not_new)
return 0;
- create_directories(path, state);
+ create_directories(len, path, state);
return write_entry(ce, path, state, 0);
}
diff --git a/unpack-trees.c b/unpack-trees.c
index 28e275981a21b033459ef9c7e420cce4bf7e5513..0a03e65f9c9d869ab2d8b3c337f032ff2b8e7b2f 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -119,6 +119,7 @@ static int check_updates(struct unpack_trees_options *o)
}
}
+ clear_created_dirs_cache();
for (i = 0; i < index->cache_nr; i++) {
struct cache_entry *ce = index->cache[i];
--
1.6.1.rc1.49.g7f705
prev parent reply other threads:[~2009-01-07 13:26 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-01-07 13:24 [PATCH/RFC v3 0/2] git checkout: optimise away lots of lstat() calls Kjetil Barvik
2009-01-07 13:24 ` [PATCH/RFC v3 1/2] Optimised, faster, more effective symlink/directory detection Kjetil Barvik
2009-01-09 10:20 ` Pete Harlan
2009-01-07 13:24 ` Kjetil Barvik [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1231334689-17135-3-git-send-email-barvik@broadpark.no \
--to=barvik@broadpark.no \
--cc=git@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).