* [PATCH v10 1/5] lstat_cache(): more cache effective symlink/directory detection
2009-01-18 15:14 [PATCH v10 0/5] git checkout: optimise away lots of lstat() calls Kjetil Barvik
@ 2009-01-18 15:14 ` Kjetil Barvik
2009-01-18 15:14 ` [PATCH v10 2/5] lstat_cache(): introduce has_symlink_or_noent_leading_path() function Kjetil Barvik
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Kjetil Barvik @ 2009-01-18 15:14 UTC (permalink / raw)
To: git; +Cc: Kjetil Barvik
Make the cache functionality more effective. Previously when A/B/C/D
was in the cache and A/B/C/E/file.c was called for, there was no match
at all from the cache. Now we use the fact that the paths "A", "A/B"
and "A/B/C" are already tested, and we only need to do an lstat() call
on "A/B/C/E".
We only cache/store the last path regardless of its type. Since the
cache functionality is always used with alphabetically sorted names
(at least it seems so for me), there is no need to store both the last
symlink-leading path and the last real-directory path. Note that if
the cache is not called with (mostly) alphabetically sorted names,
neither the old, nor this new one, would be very effective.
Previously, when symlink A/B/C/S was cached/stored in the symlink-
leading path, and A/B/C/file.c was called for, it was not easy to use
the fact that we already knew that the paths "A", "A/B" and "A/B/C"
are real directories.
Avoid copying the first path components of the name 2 zillion times
when we test new path components. Since we always cache/store the
last path, we can copy each component as we test those directly into
the cache. Previously we ended up doing a memcpy() for the full
path/name right before each lstat() call, and when updating the cache
for each time we have tested a new path component.
We also use less memory, that is, PATH_MAX bytes less memory on the
stack and PATH_MAX bytes less memory on the heap.
Thanks to Junio C Hamano, Linus Torvalds and Rene Scharfe for valuable
comments to this patch!
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
symlinks.c | 165 +++++++++++++++++++++++++++++++++++++++++++++---------------
1 files changed, 125 insertions(+), 40 deletions(-)
diff --git a/symlinks.c b/symlinks.c
index 5a5e781..49fb4d8 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -1,64 +1,149 @@
#include "cache.h"
-struct pathname {
- int len;
+static struct cache_def {
char path[PATH_MAX];
-};
+ int len;
+ int flags;
+} cache;
-/* Return matching pathname prefix length, or zero if not matching */
-static inline int match_pathname(int len, const char *name, struct pathname *match)
+/*
+ * Returns the length (on a path component basis) of the longest
+ * common prefix match of 'name' and the cached path string.
+ */
+static inline int longest_match_lstat_cache(int len, const char *name)
{
- int match_len = match->len;
- return (len > match_len &&
- name[match_len] == '/' &&
- !memcmp(name, match->path, match_len)) ? match_len : 0;
+ int max_len, match_len = 0, i = 0;
+
+ max_len = len < cache.len ? len : cache.len;
+ while (i < max_len && name[i] == cache.path[i]) {
+ if (name[i] == '/')
+ match_len = i;
+ i++;
+ }
+ /* Is the cached path string a substring of 'name'? */
+ if (i == cache.len && cache.len < len && name[cache.len] == '/')
+ match_len = cache.len;
+ /* Is 'name' a substring of the cached path string? */
+ else if ((i == len && len < cache.len && cache.path[len] == '/') ||
+ (i == len && len == cache.len))
+ match_len = len;
+ return match_len;
}
-static inline void set_pathname(int len, const char *name, struct pathname *match)
+static inline void reset_lstat_cache(void)
{
- if (len < PATH_MAX) {
- match->len = len;
- memcpy(match->path, name, len);
- match->path[len] = 0;
- }
+ cache.path[0] = '\0';
+ cache.len = 0;
+ cache.flags = 0;
}
-int has_symlink_leading_path(int len, const char *name)
+#define FL_DIR (1 << 0)
+#define FL_SYMLINK (1 << 1)
+#define FL_LSTATERR (1 << 2)
+#define FL_ERR (1 << 3)
+
+/*
+ * Check if name 'name' of length 'len' has a symlink leading
+ * component, or if the directory exists and is real.
+ *
+ * To speed up the check, some information is allowed to be cached.
+ * This can be indicated by the 'track_flags' argument.
+ */
+static int lstat_cache(int len, const char *name,
+ int track_flags)
{
- static struct pathname link, nonlink;
- char path[PATH_MAX];
+ int match_len, last_slash, last_slash_dir;
+ int match_flags, ret_flags, save_flags, max_len;
struct stat st;
- char *sp;
- int known_dir;
/*
- * See if the last known symlink cache matches.
+ * Check to see if we have a match from the cache for the
+ * symlink path type.
*/
- if (match_pathname(len, name, &link))
- return 1;
-
+ match_len = last_slash = longest_match_lstat_cache(len, name);
+ match_flags = cache.flags & track_flags & FL_SYMLINK;
+ if (match_flags && match_len == cache.len)
+ return match_flags;
/*
- * Get rid of the last known directory part
+ * If we now have match_len > 0, we would know that the
+ * matched part will always be a directory.
+ *
+ * Also, if we are tracking directories and 'name' is a
+ * substring of the cache on a path component basis, we can
+ * return immediately.
*/
- known_dir = match_pathname(len, name, &nonlink);
+ match_flags = track_flags & FL_DIR;
+ if (match_flags && len == match_len)
+ return match_flags;
- while ((sp = strchr(name + known_dir + 1, '/')) != NULL) {
- int thislen = sp - name ;
- memcpy(path, name, thislen);
- path[thislen] = 0;
+ /*
+ * Okay, no match from the cache so far, so now we have to
+ * check the rest of the path components.
+ */
+ ret_flags = FL_DIR;
+ last_slash_dir = last_slash;
+ max_len = len < PATH_MAX ? len : PATH_MAX;
+ while (match_len < max_len) {
+ do {
+ cache.path[match_len] = name[match_len];
+ match_len++;
+ } while (match_len < max_len && name[match_len] != '/');
+ if (match_len >= max_len)
+ break;
+ last_slash = match_len;
+ cache.path[last_slash] = '\0';
- if (lstat(path, &st))
- return 0;
- if (S_ISDIR(st.st_mode)) {
- set_pathname(thislen, path, &nonlink);
- known_dir = thislen;
+ if (lstat(cache.path, &st)) {
+ ret_flags = FL_LSTATERR;
+ } else if (S_ISDIR(st.st_mode)) {
+ last_slash_dir = last_slash;
continue;
- }
- if (S_ISLNK(st.st_mode)) {
- set_pathname(thislen, path, &link);
- return 1;
+ } else if (S_ISLNK(st.st_mode)) {
+ ret_flags = FL_SYMLINK;
+ } else {
+ ret_flags = FL_ERR;
}
break;
}
- return 0;
+
+ /*
+ * At the end update the cache. Note that max 2 different
+ * path types, FL_SYMLINK and FL_DIR, can be cached for the
+ * moment!
+ */
+ save_flags = ret_flags & track_flags & FL_SYMLINK;
+ if (save_flags && last_slash > 0 && last_slash < PATH_MAX) {
+ cache.path[last_slash] = '\0';
+ cache.len = last_slash;
+ cache.flags = save_flags;
+ } else if (track_flags & FL_DIR &&
+ last_slash_dir > 0 && last_slash_dir < PATH_MAX) {
+ /*
+ * We have a separate test for the directory case,
+ * since it could be that we have found a symlink and
+ * the track_flags says that we cannot cache this
+ * fact, so the cache would then have been left empty
+ * in this case.
+ *
+ * But if we are allowed to track real directories, we
+ * can still cache the path components before the last
+ * one (the found symlink component).
+ */
+ cache.path[last_slash_dir] = '\0';
+ cache.len = last_slash_dir;
+ cache.flags = FL_DIR;
+ } else {
+ reset_lstat_cache();
+ }
+ return ret_flags;
+}
+
+/*
+ * Return non-zero if path 'name' has a leading symlink component
+ */
+int has_symlink_leading_path(int len, const char *name)
+{
+ return lstat_cache(len, name,
+ FL_SYMLINK|FL_DIR) &
+ FL_SYMLINK;
}
--
1.6.1.83.gd727f
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v10 2/5] lstat_cache(): introduce has_symlink_or_noent_leading_path() function
2009-01-18 15:14 [PATCH v10 0/5] git checkout: optimise away lots of lstat() calls Kjetil Barvik
2009-01-18 15:14 ` [PATCH v10 1/5] lstat_cache(): more cache effective symlink/directory detection Kjetil Barvik
@ 2009-01-18 15:14 ` Kjetil Barvik
2009-01-18 15:14 ` [PATCH v10 3/5] lstat_cache(): introduce has_dirs_only_path() function Kjetil Barvik
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Kjetil Barvik @ 2009-01-18 15:14 UTC (permalink / raw)
To: git; +Cc: Kjetil Barvik
In some cases, especially inside the unpack-trees.c file, and inside
the verify_absent() function, we can avoid some unnecessary calls to
lstat(), if the lstat_cache() function can also be told to keep track
of non-existing directories.
So we update the lstat_cache() function to handle this new fact,
introduce a new wrapper function, and the result is that we save lots
of lstat() calls for a removed directory which previously contained
lots of files, when we call this new wrapper of lstat_cache() instead
of the old one.
We do similar changes inside the unlink_entry() function, since if we
can already say that the leading directory component of a pathname
does not exist, it is not necessary to try to remove a pathname below
it!
Thanks to Junio C Hamano, Linus Torvalds and Rene Scharfe for valuable
comments to this patch!
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
cache.h | 1 +
symlinks.c | 94 +++++++++++++++++++++++++++++++++++--------------------
unpack-trees.c | 4 +-
3 files changed, 63 insertions(+), 36 deletions(-)
diff --git a/cache.h b/cache.h
index 8e1af26..518e4c7 100644
--- a/cache.h
+++ b/cache.h
@@ -717,6 +717,7 @@ struct checkout {
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
extern int has_symlink_leading_path(int len, const char *name);
+extern int has_symlink_or_noent_leading_path(int len, const char *name);
extern struct alternate_object_database {
struct alternate_object_database *next;
diff --git a/symlinks.c b/symlinks.c
index 49fb4d8..c69556a 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -4,6 +4,7 @@ static struct cache_def {
char path[PATH_MAX];
int len;
int flags;
+ int track_flags;
} cache;
/*
@@ -30,21 +31,23 @@ static inline int longest_match_lstat_cache(int len, const char *name)
return match_len;
}
-static inline void reset_lstat_cache(void)
+static inline void reset_lstat_cache(int track_flags)
{
cache.path[0] = '\0';
cache.len = 0;
cache.flags = 0;
+ cache.track_flags = track_flags;
}
#define FL_DIR (1 << 0)
-#define FL_SYMLINK (1 << 1)
-#define FL_LSTATERR (1 << 2)
-#define FL_ERR (1 << 3)
+#define FL_NOENT (1 << 1)
+#define FL_SYMLINK (1 << 2)
+#define FL_LSTATERR (1 << 3)
+#define FL_ERR (1 << 4)
/*
* Check if name 'name' of length 'len' has a symlink leading
- * component, or if the directory exists and is real.
+ * component, or if the directory exists and is real, or not.
*
* To speed up the check, some information is allowed to be cached.
* This can be indicated by the 'track_flags' argument.
@@ -56,25 +59,35 @@ static int lstat_cache(int len, const char *name,
int match_flags, ret_flags, save_flags, max_len;
struct stat st;
- /*
- * Check to see if we have a match from the cache for the
- * symlink path type.
- */
- match_len = last_slash = longest_match_lstat_cache(len, name);
- match_flags = cache.flags & track_flags & FL_SYMLINK;
- if (match_flags && match_len == cache.len)
- return match_flags;
- /*
- * If we now have match_len > 0, we would know that the
- * matched part will always be a directory.
- *
- * Also, if we are tracking directories and 'name' is a
- * substring of the cache on a path component basis, we can
- * return immediately.
- */
- match_flags = track_flags & FL_DIR;
- if (match_flags && len == match_len)
- return match_flags;
+ if (cache.track_flags != track_flags) {
+ /*
+ * As a safeguard we clear the cache if the value of
+ * track_flags does not match with the last supplied
+ * value.
+ */
+ reset_lstat_cache(track_flags);
+ match_len = last_slash = 0;
+ } else {
+ /*
+ * Check to see if we have a match from the cache for
+ * the 2 "excluding" path types.
+ */
+ match_len = last_slash = longest_match_lstat_cache(len, name);
+ match_flags = cache.flags & track_flags & (FL_NOENT|FL_SYMLINK);
+ if (match_flags && match_len == cache.len)
+ return match_flags;
+ /*
+ * If we now have match_len > 0, we would know that
+ * the matched part will always be a directory.
+ *
+ * Also, if we are tracking directories and 'name' is
+ * a substring of the cache on a path component basis,
+ * we can return immediately.
+ */
+ match_flags = track_flags & FL_DIR;
+ if (match_flags && len == match_len)
+ return match_flags;
+ }
/*
* Okay, no match from the cache so far, so now we have to
@@ -95,6 +108,8 @@ static int lstat_cache(int len, const char *name,
if (lstat(cache.path, &st)) {
ret_flags = FL_LSTATERR;
+ if (errno == ENOENT)
+ ret_flags |= FL_NOENT;
} else if (S_ISDIR(st.st_mode)) {
last_slash_dir = last_slash;
continue;
@@ -107,11 +122,11 @@ static int lstat_cache(int len, const char *name,
}
/*
- * At the end update the cache. Note that max 2 different
- * path types, FL_SYMLINK and FL_DIR, can be cached for the
- * moment!
+ * At the end update the cache. Note that max 3 different
+ * path types, FL_NOENT, FL_SYMLINK and FL_DIR, can be cached
+ * for the moment!
*/
- save_flags = ret_flags & track_flags & FL_SYMLINK;
+ save_flags = ret_flags & track_flags & (FL_NOENT|FL_SYMLINK);
if (save_flags && last_slash > 0 && last_slash < PATH_MAX) {
cache.path[last_slash] = '\0';
cache.len = last_slash;
@@ -120,20 +135,20 @@ static int lstat_cache(int len, const char *name,
last_slash_dir > 0 && last_slash_dir < PATH_MAX) {
/*
* We have a separate test for the directory case,
- * since it could be that we have found a symlink and
- * the track_flags says that we cannot cache this
- * fact, so the cache would then have been left empty
- * in this case.
+ * since it could be that we have found a symlink or a
+ * non-existing directory and the track_flags says
+ * that we cannot cache this fact, so the cache would
+ * then have been left empty in this case.
*
* But if we are allowed to track real directories, we
* can still cache the path components before the last
- * one (the found symlink component).
+ * one (the found symlink or non-existing component).
*/
cache.path[last_slash_dir] = '\0';
cache.len = last_slash_dir;
cache.flags = FL_DIR;
} else {
- reset_lstat_cache();
+ reset_lstat_cache(track_flags);
}
return ret_flags;
}
@@ -147,3 +162,14 @@ int has_symlink_leading_path(int len, const char *name)
FL_SYMLINK|FL_DIR) &
FL_SYMLINK;
}
+
+/*
+ * Return non-zero if path 'name' has a leading symlink component or
+ * if some leading path component does not exists.
+ */
+int has_symlink_or_noent_leading_path(int len, const char *name)
+{
+ return lstat_cache(len, name,
+ FL_SYMLINK|FL_NOENT|FL_DIR) &
+ (FL_SYMLINK|FL_NOENT);
+}
diff --git a/unpack-trees.c b/unpack-trees.c
index 15c9ef5..16bc2ca 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -61,7 +61,7 @@ static void unlink_entry(struct cache_entry *ce)
char *cp, *prev;
char *name = ce->name;
- if (has_symlink_leading_path(ce_namelen(ce), ce->name))
+ if (has_symlink_or_noent_leading_path(ce_namelen(ce), ce->name))
return;
if (unlink(name))
return;
@@ -580,7 +580,7 @@ static int verify_absent(struct cache_entry *ce, const char *action,
if (o->index_only || o->reset || !o->update)
return 0;
- if (has_symlink_leading_path(ce_namelen(ce), ce->name))
+ if (has_symlink_or_noent_leading_path(ce_namelen(ce), ce->name))
return 0;
if (!lstat(ce->name, &st)) {
--
1.6.1.83.gd727f
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v10 3/5] lstat_cache(): introduce has_dirs_only_path() function
2009-01-18 15:14 [PATCH v10 0/5] git checkout: optimise away lots of lstat() calls Kjetil Barvik
2009-01-18 15:14 ` [PATCH v10 1/5] lstat_cache(): more cache effective symlink/directory detection Kjetil Barvik
2009-01-18 15:14 ` [PATCH v10 2/5] lstat_cache(): introduce has_symlink_or_noent_leading_path() function Kjetil Barvik
@ 2009-01-18 15:14 ` Kjetil Barvik
2009-01-18 15:14 ` [PATCH v10 4/5] lstat_cache(): introduce invalidate_lstat_cache() function Kjetil Barvik
2009-01-18 15:14 ` [PATCH v10 5/5] lstat_cache(): introduce clear_lstat_cache() function Kjetil Barvik
4 siblings, 0 replies; 6+ messages in thread
From: Kjetil Barvik @ 2009-01-18 15:14 UTC (permalink / raw)
To: git; +Cc: Kjetil Barvik
The create_directories() function in entry.c currently calls stat()
or lstat() for each path component of the pathname 'path' each and every
time. For the 'git checkout' command, this function is called on each
file for which we must do an update (ce->ce_flags & CE_UPDATE), so we get
lots and lots of calls.
To fix this, we make a new wrapper to the lstat_cache() function, and
call the wrapper function instead of the calls to the stat() or the
lstat() functions. Since the paths given to the create_directories()
function, is sorted alphabetically, the new wrapper would be very
cache effective in this situation.
To support it we must update the lstat_cache() function to be able to
say that "please test the complete length of 'name'", and also to give
it the length of a prefix, where the cache should use the stat()
function instead of the lstat() function to test each path component.
Thanks to Junio C Hamano, Linus Torvalds and Rene Scharfe for valuable
comments to this patch!
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
cache.h | 1 +
entry.c | 34 +++++++++++--------------------
symlinks.c | 64 ++++++++++++++++++++++++++++++++++++++++++++---------------
3 files changed, 60 insertions(+), 39 deletions(-)
diff --git a/cache.h b/cache.h
index 518e4c7..110b9f9 100644
--- a/cache.h
+++ b/cache.h
@@ -718,6 +718,7 @@ struct checkout {
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
extern int has_symlink_leading_path(int len, const char *name);
extern int has_symlink_or_noent_leading_path(int len, const char *name);
+extern int has_dirs_only_path(int len, const char *name, int prefix_len);
extern struct alternate_object_database {
struct alternate_object_database *next;
diff --git a/entry.c b/entry.c
index aa2ee46..01a683e 100644
--- a/entry.c
+++ b/entry.c
@@ -8,35 +8,25 @@ static void create_directories(const char *path, const struct checkout *state)
const char *slash = path;
while ((slash = strchr(slash+1, '/')) != NULL) {
- struct stat st;
- int stat_status;
-
len = slash - path;
memcpy(buf, path, len);
buf[len] = 0;
- if (len <= state->base_dir_len)
- /*
- * checkout-index --prefix=<dir>; <dir> is
- * allowed to be a symlink to an existing
- * directory.
- */
- stat_status = stat(buf, &st);
- else
- /*
- * if there currently is a symlink, we would
- * want to replace it with a real directory.
- */
- stat_status = lstat(buf, &st);
-
- if (!stat_status && S_ISDIR(st.st_mode))
+ /*
+ * For 'checkout-index --prefix=<dir>', <dir> is
+ * allowed to be a symlink to an existing directory,
+ * and we set 'state->base_dir_len' below, such that
+ * we test the path components of the prefix with the
+ * stat() function instead of the lstat() function.
+ */
+ if (has_dirs_only_path(len, buf, state->base_dir_len))
continue; /* ok, it is already a directory. */
/*
- * We know stat_status == 0 means something exists
- * there and this mkdir would fail, but that is an
- * error codepath; we do not care, as we unlink and
- * mkdir again in such a case.
+ * If this mkdir() would fail, it could be that there
+ * is already a symlink or something else exists
+ * there, therefore we then try to unlink it and try
+ * one more time to create the directory.
*/
if (mkdir(buf, 0777)) {
if (errno == EEXIST && state->force &&
diff --git a/symlinks.c b/symlinks.c
index c69556a..918e24a 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -1,10 +1,11 @@
#include "cache.h"
static struct cache_def {
- char path[PATH_MAX];
+ char path[PATH_MAX + 1];
int len;
int flags;
int track_flags;
+ int prefix_len_stat_func;
} cache;
/*
@@ -31,12 +32,13 @@ static inline int longest_match_lstat_cache(int len, const char *name)
return match_len;
}
-static inline void reset_lstat_cache(int track_flags)
+static inline void reset_lstat_cache(int track_flags, int prefix_len_stat_func)
{
cache.path[0] = '\0';
cache.len = 0;
cache.flags = 0;
cache.track_flags = track_flags;
+ cache.prefix_len_stat_func = prefix_len_stat_func;
}
#define FL_DIR (1 << 0)
@@ -44,28 +46,35 @@ static inline void reset_lstat_cache(int track_flags)
#define FL_SYMLINK (1 << 2)
#define FL_LSTATERR (1 << 3)
#define FL_ERR (1 << 4)
+#define FL_FULLPATH (1 << 5)
/*
* Check if name 'name' of length 'len' has a symlink leading
* component, or if the directory exists and is real, or not.
*
* To speed up the check, some information is allowed to be cached.
- * This can be indicated by the 'track_flags' argument.
+ * This can be indicated by the 'track_flags' argument, which also can
+ * be used to indicate that we should check the full path.
+ *
+ * The 'prefix_len_stat_func' parameter can be used to set the length
+ * of the prefix, where the cache should use the stat() function
+ * instead of the lstat() function to test each path component.
*/
static int lstat_cache(int len, const char *name,
- int track_flags)
+ int track_flags, int prefix_len_stat_func)
{
int match_len, last_slash, last_slash_dir;
- int match_flags, ret_flags, save_flags, max_len;
+ int match_flags, ret_flags, save_flags, max_len, ret;
struct stat st;
- if (cache.track_flags != track_flags) {
+ if (cache.track_flags != track_flags ||
+ cache.prefix_len_stat_func != prefix_len_stat_func) {
/*
- * As a safeguard we clear the cache if the value of
- * track_flags does not match with the last supplied
- * value.
+ * As a safeguard we clear the cache if the values of
+ * track_flags and/or prefix_len_stat_func does not
+ * match with the last supplied values.
*/
- reset_lstat_cache(track_flags);
+ reset_lstat_cache(track_flags, prefix_len_stat_func);
match_len = last_slash = 0;
} else {
/*
@@ -101,12 +110,17 @@ static int lstat_cache(int len, const char *name,
cache.path[match_len] = name[match_len];
match_len++;
} while (match_len < max_len && name[match_len] != '/');
- if (match_len >= max_len)
+ if (match_len >= max_len && !(track_flags & FL_FULLPATH))
break;
last_slash = match_len;
cache.path[last_slash] = '\0';
- if (lstat(cache.path, &st)) {
+ if (last_slash <= prefix_len_stat_func)
+ ret = stat(cache.path, &st);
+ else
+ ret = lstat(cache.path, &st);
+
+ if (ret) {
ret_flags = FL_LSTATERR;
if (errno == ENOENT)
ret_flags |= FL_NOENT;
@@ -127,12 +141,12 @@ static int lstat_cache(int len, const char *name,
* for the moment!
*/
save_flags = ret_flags & track_flags & (FL_NOENT|FL_SYMLINK);
- if (save_flags && last_slash > 0 && last_slash < PATH_MAX) {
+ if (save_flags && last_slash > 0 && last_slash <= PATH_MAX) {
cache.path[last_slash] = '\0';
cache.len = last_slash;
cache.flags = save_flags;
} else if (track_flags & FL_DIR &&
- last_slash_dir > 0 && last_slash_dir < PATH_MAX) {
+ last_slash_dir > 0 && last_slash_dir <= PATH_MAX) {
/*
* We have a separate test for the directory case,
* since it could be that we have found a symlink or a
@@ -148,18 +162,20 @@ static int lstat_cache(int len, const char *name,
cache.len = last_slash_dir;
cache.flags = FL_DIR;
} else {
- reset_lstat_cache(track_flags);
+ reset_lstat_cache(track_flags, prefix_len_stat_func);
}
return ret_flags;
}
+#define USE_ONLY_LSTAT 0
+
/*
* Return non-zero if path 'name' has a leading symlink component
*/
int has_symlink_leading_path(int len, const char *name)
{
return lstat_cache(len, name,
- FL_SYMLINK|FL_DIR) &
+ FL_SYMLINK|FL_DIR, USE_ONLY_LSTAT) &
FL_SYMLINK;
}
@@ -170,6 +186,20 @@ int has_symlink_leading_path(int len, const char *name)
int has_symlink_or_noent_leading_path(int len, const char *name)
{
return lstat_cache(len, name,
- FL_SYMLINK|FL_NOENT|FL_DIR) &
+ FL_SYMLINK|FL_NOENT|FL_DIR, USE_ONLY_LSTAT) &
(FL_SYMLINK|FL_NOENT);
}
+
+/*
+ * Return non-zero if all path components of 'name' exists as a
+ * directory. If prefix_len > 0, we will test with the stat()
+ * function instead of the lstat() function for a prefix length of
+ * 'prefix_len', thus we then allow for symlinks in the prefix part as
+ * long as those points to real existing directories.
+ */
+int has_dirs_only_path(int len, const char *name, int prefix_len)
+{
+ return lstat_cache(len, name,
+ FL_DIR|FL_FULLPATH, prefix_len) &
+ FL_DIR;
+}
--
1.6.1.83.gd727f
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v10 4/5] lstat_cache(): introduce invalidate_lstat_cache() function
2009-01-18 15:14 [PATCH v10 0/5] git checkout: optimise away lots of lstat() calls Kjetil Barvik
` (2 preceding siblings ...)
2009-01-18 15:14 ` [PATCH v10 3/5] lstat_cache(): introduce has_dirs_only_path() function Kjetil Barvik
@ 2009-01-18 15:14 ` Kjetil Barvik
2009-01-18 15:14 ` [PATCH v10 5/5] lstat_cache(): introduce clear_lstat_cache() function Kjetil Barvik
4 siblings, 0 replies; 6+ messages in thread
From: Kjetil Barvik @ 2009-01-18 15:14 UTC (permalink / raw)
To: git; +Cc: Kjetil Barvik
In some cases it could maybe be necessary to say to the cache that
"Hey, I deleted/changed the type of this pathname and if you currently
have it inside your cache, you should deleted it".
This patch introduce a function which support this.
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
cache.h | 1 +
symlinks.c | 44 ++++++++++++++++++++++++++++++++++++--------
2 files changed, 37 insertions(+), 8 deletions(-)
diff --git a/cache.h b/cache.h
index 110b9f9..efcceec 100644
--- a/cache.h
+++ b/cache.h
@@ -719,6 +719,7 @@ extern int checkout_entry(struct cache_entry *ce, const struct checkout *state,
extern int has_symlink_leading_path(int len, const char *name);
extern int has_symlink_or_noent_leading_path(int len, const char *name);
extern int has_dirs_only_path(int len, const char *name, int prefix_len);
+extern void invalidate_lstat_cache(int len, const char *name);
extern struct alternate_object_database {
struct alternate_object_database *next;
diff --git a/symlinks.c b/symlinks.c
index 918e24a..dbdfec4 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -12,23 +12,30 @@ static struct cache_def {
* Returns the length (on a path component basis) of the longest
* common prefix match of 'name' and the cached path string.
*/
-static inline int longest_match_lstat_cache(int len, const char *name)
+static inline int longest_match_lstat_cache(int len, const char *name,
+ int *previous_slash)
{
- int max_len, match_len = 0, i = 0;
+ int max_len, match_len = 0, match_len_prev = 0, i = 0;
max_len = len < cache.len ? len : cache.len;
while (i < max_len && name[i] == cache.path[i]) {
- if (name[i] == '/')
+ if (name[i] == '/') {
+ match_len_prev = match_len;
match_len = i;
+ }
i++;
}
/* Is the cached path string a substring of 'name'? */
- if (i == cache.len && cache.len < len && name[cache.len] == '/')
+ if (i == cache.len && cache.len < len && name[cache.len] == '/') {
+ match_len_prev = match_len;
match_len = cache.len;
/* Is 'name' a substring of the cached path string? */
- else if ((i == len && len < cache.len && cache.path[len] == '/') ||
- (i == len && len == cache.len))
+ } else if ((i == len && len < cache.len && cache.path[len] == '/') ||
+ (i == len && len == cache.len)) {
+ match_len_prev = match_len;
match_len = len;
+ }
+ *previous_slash = match_len_prev;
return match_len;
}
@@ -63,7 +70,7 @@ static inline void reset_lstat_cache(int track_flags, int prefix_len_stat_func)
static int lstat_cache(int len, const char *name,
int track_flags, int prefix_len_stat_func)
{
- int match_len, last_slash, last_slash_dir;
+ int match_len, last_slash, last_slash_dir, previous_slash;
int match_flags, ret_flags, save_flags, max_len, ret;
struct stat st;
@@ -81,7 +88,8 @@ static int lstat_cache(int len, const char *name,
* Check to see if we have a match from the cache for
* the 2 "excluding" path types.
*/
- match_len = last_slash = longest_match_lstat_cache(len, name);
+ match_len = last_slash =
+ longest_match_lstat_cache(len, name, &previous_slash);
match_flags = cache.flags & track_flags & (FL_NOENT|FL_SYMLINK);
if (match_flags && match_len == cache.len)
return match_flags;
@@ -167,6 +175,26 @@ static int lstat_cache(int len, const char *name,
return ret_flags;
}
+/*
+ * Invalidate the given 'name' from the cache, if 'name' matches
+ * completely with the cache.
+ */
+void invalidate_lstat_cache(int len, const char *name)
+{
+ int match_len, previous_slash;
+
+ match_len = longest_match_lstat_cache(len, name, &previous_slash);
+ if (len == match_len) {
+ if (cache.track_flags & FL_DIR && previous_slash > 0) {
+ cache.path[previous_slash] = '\0';
+ cache.len = previous_slash;
+ cache.flags = FL_DIR;
+ } else
+ reset_lstat_cache(cache.track_flags,
+ cache.prefix_len_stat_func);
+ }
+}
+
#define USE_ONLY_LSTAT 0
/*
--
1.6.1.83.gd727f
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v10 5/5] lstat_cache(): introduce clear_lstat_cache() function
2009-01-18 15:14 [PATCH v10 0/5] git checkout: optimise away lots of lstat() calls Kjetil Barvik
` (3 preceding siblings ...)
2009-01-18 15:14 ` [PATCH v10 4/5] lstat_cache(): introduce invalidate_lstat_cache() function Kjetil Barvik
@ 2009-01-18 15:14 ` Kjetil Barvik
4 siblings, 0 replies; 6+ messages in thread
From: Kjetil Barvik @ 2009-01-18 15:14 UTC (permalink / raw)
To: git; +Cc: Kjetil Barvik
If you want to completely clear the contents of the lstat_cache(), then
call this new function.
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
cache.h | 1 +
symlinks.c | 8 ++++++++
2 files changed, 9 insertions(+), 0 deletions(-)
diff --git a/cache.h b/cache.h
index efcceec..8e22c27 100644
--- a/cache.h
+++ b/cache.h
@@ -720,6 +720,7 @@ extern int has_symlink_leading_path(int len, const char *name);
extern int has_symlink_or_noent_leading_path(int len, const char *name);
extern int has_dirs_only_path(int len, const char *name, int prefix_len);
extern void invalidate_lstat_cache(int len, const char *name);
+extern void clear_lstat_cache(void);
extern struct alternate_object_database {
struct alternate_object_database *next;
diff --git a/symlinks.c b/symlinks.c
index dbdfec4..83cecd7 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -195,6 +195,14 @@ void invalidate_lstat_cache(int len, const char *name)
}
}
+/*
+ * Completely clear the contents of the cache
+ */
+void clear_lstat_cache(void)
+{
+ reset_lstat_cache(0, 0);
+}
+
#define USE_ONLY_LSTAT 0
/*
--
1.6.1.83.gd727f
^ permalink raw reply related [flat|nested] 6+ messages in thread