* [PATCH v4 1/9] lstat_cache(): small cleanup and optimisation
2009-02-09 20:54 [PATCH v4 0/9] git checkout: more cleanups, optimisation, less lstat() calls Kjetil Barvik
@ 2009-02-09 20:54 ` Kjetil Barvik
2009-02-09 20:54 ` [PATCH v4 2/9] lstat_cache(): generalise longest_match_lstat_cache() Kjetil Barvik
` (7 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Kjetil Barvik @ 2009-02-09 20:54 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Kjetil Barvik
Simplify the if-else test in longest_match_lstat_cache() such that we
only have one simple if test. Instead of testing for 'i == cache.len'
or 'i == len', we transform this to a common test for 'i == max_len'.
And to further optimise we use 'i >= max_len' instead of 'i ==
max_len', the reason is that it is now the exact opposite of one part
inside the while-loop termination expression 'i < max_len && name[i]
== cache.path[i]', and then the compiler can probably reuse a test
instruction from it.
We also throw away the arguments to reset_lstat_cache(), such that all
the safeguard logic inside lstat_cache() is handled at one place.
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
symlinks.c | 44 ++++++++++++++++++++++++--------------------
1 files changed, 24 insertions(+), 20 deletions(-)
diff --git a/symlinks.c b/symlinks.c
index f262b7c..ae57e56 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -25,27 +25,30 @@ static inline int longest_match_lstat_cache(int len, const char *name,
}
i++;
}
- /* Is the cached path string a substring of 'name'? */
- if (i == cache.len && cache.len < len && name[cache.len] == '/') {
- match_len_prev = match_len;
- match_len = cache.len;
- /* Is 'name' a substring of the cached path string? */
- } else if ((i == len && len < cache.len && cache.path[len] == '/') ||
- (i == len && len == cache.len)) {
+ /*
+ * Is the cached path string a substring of 'name', is 'name'
+ * a substring of the cached path string, or is 'name' and the
+ * cached path string the exact same string?
+ */
+ if (i >= max_len && ((len > cache.len && name[cache.len] == '/') ||
+ (len < cache.len && cache.path[len] == '/') ||
+ (len == cache.len))) {
match_len_prev = match_len;
- match_len = len;
+ match_len = i;
}
*previous_slash = match_len_prev;
return match_len;
}
-static inline void reset_lstat_cache(int track_flags, int prefix_len_stat_func)
+static inline void reset_lstat_cache(void)
{
cache.path[0] = '\0';
cache.len = 0;
cache.flags = 0;
- cache.track_flags = track_flags;
- cache.prefix_len_stat_func = prefix_len_stat_func;
+ /*
+ * The track_flags and prefix_len_stat_func members is only
+ * set by the safeguard rule inside lstat_cache()
+ */
}
#define FL_DIR (1 << 0)
@@ -77,11 +80,13 @@ static int lstat_cache(int len, const char *name,
if (cache.track_flags != track_flags ||
cache.prefix_len_stat_func != prefix_len_stat_func) {
/*
- * As a safeguard we clear the cache if the values of
- * track_flags and/or prefix_len_stat_func does not
- * match with the last supplied values.
+ * As a safeguard rule we clear the cache if the
+ * values of track_flags and/or prefix_len_stat_func
+ * does not match with the last supplied values.
*/
- reset_lstat_cache(track_flags, prefix_len_stat_func);
+ reset_lstat_cache();
+ cache.track_flags = track_flags;
+ cache.prefix_len_stat_func = prefix_len_stat_func;
match_len = last_slash = 0;
} else {
/*
@@ -153,7 +158,7 @@ static int lstat_cache(int len, const char *name,
cache.path[last_slash] = '\0';
cache.len = last_slash;
cache.flags = save_flags;
- } else if (track_flags & FL_DIR &&
+ } else if ((track_flags & FL_DIR) &&
last_slash_dir > 0 && last_slash_dir <= PATH_MAX) {
/*
* We have a separate test for the directory case,
@@ -170,7 +175,7 @@ static int lstat_cache(int len, const char *name,
cache.len = last_slash_dir;
cache.flags = FL_DIR;
} else {
- reset_lstat_cache(track_flags, prefix_len_stat_func);
+ reset_lstat_cache();
}
return ret_flags;
}
@@ -190,8 +195,7 @@ void invalidate_lstat_cache(int len, const char *name)
cache.len = previous_slash;
cache.flags = FL_DIR;
} else
- reset_lstat_cache(cache.track_flags,
- cache.prefix_len_stat_func);
+ reset_lstat_cache();
}
}
@@ -200,7 +204,7 @@ void invalidate_lstat_cache(int len, const char *name)
*/
void clear_lstat_cache(void)
{
- reset_lstat_cache(0, 0);
+ reset_lstat_cache();
}
#define USE_ONLY_LSTAT 0
--
1.6.1.349.g99fa5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v4 2/9] lstat_cache(): generalise longest_match_lstat_cache()
2009-02-09 20:54 [PATCH v4 0/9] git checkout: more cleanups, optimisation, less lstat() calls Kjetil Barvik
2009-02-09 20:54 ` [PATCH v4 1/9] lstat_cache(): small cleanup and optimisation Kjetil Barvik
@ 2009-02-09 20:54 ` Kjetil Barvik
2009-02-09 20:54 ` [PATCH v4 3/9] lstat_cache(): swap func(length, string) into func(string, length) Kjetil Barvik
` (6 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Kjetil Barvik @ 2009-02-09 20:54 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Kjetil Barvik
Rename the function to longst_path_match() and generalise it such that
it can also be used by other functions.
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
symlinks.c | 46 ++++++++++++++++++++++++----------------------
1 files changed, 24 insertions(+), 22 deletions(-)
diff --git a/symlinks.c b/symlinks.c
index ae57e56..4596aee 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -1,38 +1,30 @@
#include "cache.h"
-static struct cache_def {
- char path[PATH_MAX + 1];
- int len;
- int flags;
- int track_flags;
- int prefix_len_stat_func;
-} cache;
-
/*
* Returns the length (on a path component basis) of the longest
- * common prefix match of 'name' and the cached path string.
+ * common prefix match of 'name_a' and 'name_b'.
*/
-static inline int longest_match_lstat_cache(int len, const char *name,
- int *previous_slash)
+static int longest_path_match(const char *name_a, int len_a,
+ const char *name_b, int len_b,
+ int *previous_slash)
{
int max_len, match_len = 0, match_len_prev = 0, i = 0;
- max_len = len < cache.len ? len : cache.len;
- while (i < max_len && name[i] == cache.path[i]) {
- if (name[i] == '/') {
+ max_len = len_a < len_b ? len_a : len_b;
+ while (i < max_len && name_a[i] == name_b[i]) {
+ if (name_a[i] == '/') {
match_len_prev = match_len;
match_len = i;
}
i++;
}
/*
- * Is the cached path string a substring of 'name', is 'name'
- * a substring of the cached path string, or is 'name' and the
- * cached path string the exact same string?
+ * Is 'name_b' a substring of 'name_a', the other way around,
+ * or is 'name_a' and 'name_b' the exact same string?
*/
- if (i >= max_len && ((len > cache.len && name[cache.len] == '/') ||
- (len < cache.len && cache.path[len] == '/') ||
- (len == cache.len))) {
+ if (i >= max_len && ((len_a > len_b && name_a[len_b] == '/') ||
+ (len_a < len_b && name_b[len_a] == '/') ||
+ (len_a == len_b))) {
match_len_prev = match_len;
match_len = i;
}
@@ -40,6 +32,14 @@ static inline int longest_match_lstat_cache(int len, const char *name,
return match_len;
}
+static struct cache_def {
+ char path[PATH_MAX + 1];
+ int len;
+ int flags;
+ int track_flags;
+ int prefix_len_stat_func;
+} cache;
+
static inline void reset_lstat_cache(void)
{
cache.path[0] = '\0';
@@ -94,7 +94,8 @@ static int lstat_cache(int len, const char *name,
* the 2 "excluding" path types.
*/
match_len = last_slash =
- longest_match_lstat_cache(len, name, &previous_slash);
+ longest_path_match(name, len, cache.path, cache.len,
+ &previous_slash);
match_flags = cache.flags & track_flags & (FL_NOENT|FL_SYMLINK);
if (match_flags && match_len == cache.len)
return match_flags;
@@ -188,7 +189,8 @@ void invalidate_lstat_cache(int len, const char *name)
{
int match_len, previous_slash;
- match_len = longest_match_lstat_cache(len, name, &previous_slash);
+ match_len = longest_path_match(name, len, cache.path, cache.len,
+ &previous_slash);
if (len == match_len) {
if ((cache.track_flags & FL_DIR) && previous_slash > 0) {
cache.path[previous_slash] = '\0';
--
1.6.1.349.g99fa5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v4 3/9] lstat_cache(): swap func(length, string) into func(string, length)
2009-02-09 20:54 [PATCH v4 0/9] git checkout: more cleanups, optimisation, less lstat() calls Kjetil Barvik
2009-02-09 20:54 ` [PATCH v4 1/9] lstat_cache(): small cleanup and optimisation Kjetil Barvik
2009-02-09 20:54 ` [PATCH v4 2/9] lstat_cache(): generalise longest_match_lstat_cache() Kjetil Barvik
@ 2009-02-09 20:54 ` Kjetil Barvik
2009-02-09 20:54 ` [PATCH v4 4/9] unlink_entry(): introduce schedule_dir_for_removal() Kjetil Barvik
` (5 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Kjetil Barvik @ 2009-02-09 20:54 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Kjetil Barvik
Swap function argument pair (length, string) into (string, length) to
conform with the commonly used order inside the GIT source code.
Also, add a note about this fact into the coding guidelines.
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
Documentation/CodingGuidelines | 3 +++
builtin-add.c | 2 +-
builtin-apply.c | 2 +-
builtin-update-index.c | 2 +-
cache.h | 8 ++++----
diff-lib.c | 2 +-
dir.c | 2 +-
entry.c | 2 +-
symlinks.c | 16 ++++++++--------
unpack-trees.c | 4 ++--
10 files changed, 23 insertions(+), 20 deletions(-)
diff --git a/Documentation/CodingGuidelines b/Documentation/CodingGuidelines
index 0d7fa9c..b8bf618 100644
--- a/Documentation/CodingGuidelines
+++ b/Documentation/CodingGuidelines
@@ -129,3 +129,6 @@ For C programs:
used in the git core command set (unless your command is clearly
separate from it, such as an importer to convert random-scm-X
repositories to git).
+
+ - When we pass <string, length> pair to functions, we should try to
+ pass them in that order.
diff --git a/builtin-add.c b/builtin-add.c
index ac98c83..a23ad96 100644
--- a/builtin-add.c
+++ b/builtin-add.c
@@ -148,7 +148,7 @@ static const char **validate_pathspec(int argc, const char **argv, const char *p
if (pathspec) {
const char **p;
for (p = pathspec; *p; p++) {
- if (has_symlink_leading_path(strlen(*p), *p)) {
+ if (has_symlink_leading_path(*p, strlen(*p))) {
int len = prefix ? strlen(prefix) : 0;
die("'%s' is beyond a symbolic link", *p + len);
}
diff --git a/builtin-apply.c b/builtin-apply.c
index f312798..106be94 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -2360,7 +2360,7 @@ static int check_to_create_blob(const char *new_name, int ok_if_exists)
* In such a case, path "new_name" does not exist as
* far as git is concerned.
*/
- if (has_symlink_leading_path(strlen(new_name), new_name))
+ if (has_symlink_leading_path(new_name, strlen(new_name)))
return 0;
return error("%s: already exists in working directory", new_name);
diff --git a/builtin-update-index.c b/builtin-update-index.c
index 5604977..6c55527 100644
--- a/builtin-update-index.c
+++ b/builtin-update-index.c
@@ -195,7 +195,7 @@ static int process_path(const char *path)
struct stat st;
len = strlen(path);
- if (has_symlink_leading_path(len, path))
+ if (has_symlink_leading_path(path, len))
return error("'%s' is beyond a symbolic link", path);
/*
diff --git a/cache.h b/cache.h
index 2d889de..80eeeb7 100644
--- a/cache.h
+++ b/cache.h
@@ -724,10 +724,10 @@ struct checkout {
};
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
-extern int has_symlink_leading_path(int len, const char *name);
-extern int has_symlink_or_noent_leading_path(int len, const char *name);
-extern int has_dirs_only_path(int len, const char *name, int prefix_len);
-extern void invalidate_lstat_cache(int len, const char *name);
+extern int has_symlink_leading_path(const char *name, int len);
+extern int has_symlink_or_noent_leading_path(const char *name, int len);
+extern int has_dirs_only_path(const char *name, int len, int prefix_len);
+extern void invalidate_lstat_cache(const char *name, int len);
extern void clear_lstat_cache(void);
extern struct alternate_object_database {
diff --git a/diff-lib.c b/diff-lib.c
index a41e1ec..a3ba20e 100644
--- a/diff-lib.c
+++ b/diff-lib.c
@@ -31,7 +31,7 @@ static int check_removed(const struct cache_entry *ce, struct stat *st)
return -1;
return 1;
}
- if (has_symlink_leading_path(ce_namelen(ce), ce->name))
+ if (has_symlink_leading_path(ce->name, ce_namelen(ce)))
return 1;
if (S_ISDIR(st->st_mode)) {
unsigned char sub[20];
diff --git a/dir.c b/dir.c
index cfd1ea5..8fb5226 100644
--- a/dir.c
+++ b/dir.c
@@ -720,7 +720,7 @@ int read_directory(struct dir_struct *dir, const char *path, const char *base, i
{
struct path_simplify *simplify;
- if (has_symlink_leading_path(strlen(path), path))
+ if (has_symlink_leading_path(path, strlen(path)))
return dir->nr;
simplify = create_simplify(pathspec);
diff --git a/entry.c b/entry.c
index 05aa58d..bb6bdb9 100644
--- a/entry.c
+++ b/entry.c
@@ -20,7 +20,7 @@ static void create_directories(const char *path, const struct checkout *state)
* we test the path components of the prefix with the
* stat() function instead of the lstat() function.
*/
- if (has_dirs_only_path(len, buf, state->base_dir_len))
+ if (has_dirs_only_path(buf, len, state->base_dir_len))
continue; /* ok, it is already a directory. */
/*
diff --git a/symlinks.c b/symlinks.c
index 4596aee..5167286 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -70,7 +70,7 @@ static inline void reset_lstat_cache(void)
* of the prefix, where the cache should use the stat() function
* instead of the lstat() function to test each path component.
*/
-static int lstat_cache(int len, const char *name,
+static int lstat_cache(const char *name, int len,
int track_flags, int prefix_len_stat_func)
{
int match_len, last_slash, last_slash_dir, previous_slash;
@@ -185,7 +185,7 @@ static int lstat_cache(int len, const char *name,
* Invalidate the given 'name' from the cache, if 'name' matches
* completely with the cache.
*/
-void invalidate_lstat_cache(int len, const char *name)
+void invalidate_lstat_cache(const char *name, int len)
{
int match_len, previous_slash;
@@ -214,9 +214,9 @@ void clear_lstat_cache(void)
/*
* Return non-zero if path 'name' has a leading symlink component
*/
-int has_symlink_leading_path(int len, const char *name)
+int has_symlink_leading_path(const char *name, int len)
{
- return lstat_cache(len, name,
+ return lstat_cache(name, len,
FL_SYMLINK|FL_DIR, USE_ONLY_LSTAT) &
FL_SYMLINK;
}
@@ -225,9 +225,9 @@ int has_symlink_leading_path(int len, const char *name)
* Return non-zero if path 'name' has a leading symlink component or
* if some leading path component does not exists.
*/
-int has_symlink_or_noent_leading_path(int len, const char *name)
+int has_symlink_or_noent_leading_path(const char *name, int len)
{
- return lstat_cache(len, name,
+ return lstat_cache(name, len,
FL_SYMLINK|FL_NOENT|FL_DIR, USE_ONLY_LSTAT) &
(FL_SYMLINK|FL_NOENT);
}
@@ -239,9 +239,9 @@ int has_symlink_or_noent_leading_path(int len, const char *name)
* 'prefix_len', thus we then allow for symlinks in the prefix part as
* long as those points to real existing directories.
*/
-int has_dirs_only_path(int len, const char *name, int prefix_len)
+int has_dirs_only_path(const char *name, int len, int prefix_len)
{
- return lstat_cache(len, name,
+ return lstat_cache(name, len,
FL_DIR|FL_FULLPATH, prefix_len) &
FL_DIR;
}
diff --git a/unpack-trees.c b/unpack-trees.c
index e547282..2293158 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -61,7 +61,7 @@ static void unlink_entry(struct cache_entry *ce)
char *cp, *prev;
char *name = ce->name;
- if (has_symlink_or_noent_leading_path(ce_namelen(ce), ce->name))
+ if (has_symlink_or_noent_leading_path(ce->name, ce_namelen(ce)))
return;
if (unlink(name))
return;
@@ -583,7 +583,7 @@ static int verify_absent(struct cache_entry *ce, const char *action,
if (o->index_only || o->reset || !o->update)
return 0;
- if (has_symlink_or_noent_leading_path(ce_namelen(ce), ce->name))
+ if (has_symlink_or_noent_leading_path(ce->name, ce_namelen(ce)))
return 0;
if (!lstat(ce->name, &st)) {
--
1.6.1.349.g99fa5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v4 4/9] unlink_entry(): introduce schedule_dir_for_removal()
2009-02-09 20:54 [PATCH v4 0/9] git checkout: more cleanups, optimisation, less lstat() calls Kjetil Barvik
` (2 preceding siblings ...)
2009-02-09 20:54 ` [PATCH v4 3/9] lstat_cache(): swap func(length, string) into func(string, length) Kjetil Barvik
@ 2009-02-09 20:54 ` Kjetil Barvik
2009-02-09 20:54 ` [PATCH v4 5/9] create_directories(): remove some memcpy() and strchr() calls Kjetil Barvik
` (4 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Kjetil Barvik @ 2009-02-09 20:54 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Kjetil Barvik
Currently inside unlink_entry() if we get a successful removal of one
file with unlink(), we try to remove the leading directories each and
every time. So if one directory containing 200 files is moved to an
other location we get 199 failed calls to rmdir() and 1 successful
call.
To fix this and avoid some unnecessary calls to rmdir(), we schedule
each directory for removal and wait much longer before we do the real
call to rmdir().
Since the unlink_entry() function is called with alphabetically sorted
names, this new function end up being very effective to avoid
unnecessary calls to rmdir(). In some cases over 95% of all calls to
rmdir() is removed with this patch.
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
cache.h | 2 +
symlinks.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
unpack-trees.c | 30 +++++----------------------
3 files changed, 67 insertions(+), 24 deletions(-)
diff --git a/cache.h b/cache.h
index 80eeeb7..1bf2d4b 100644
--- a/cache.h
+++ b/cache.h
@@ -729,6 +729,8 @@ extern int has_symlink_or_noent_leading_path(const char *name, int len);
extern int has_dirs_only_path(const char *name, int len, int prefix_len);
extern void invalidate_lstat_cache(const char *name, int len);
extern void clear_lstat_cache(void);
+extern void schedule_dir_for_removal(const char *name, int len);
+extern void remove_scheduled_dirs(void);
extern struct alternate_object_database {
struct alternate_object_database *next;
diff --git a/symlinks.c b/symlinks.c
index 5167286..1d6b35b 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -245,3 +245,62 @@ int has_dirs_only_path(const char *name, int len, int prefix_len)
FL_DIR|FL_FULLPATH, prefix_len) &
FL_DIR;
}
+
+static struct removal_def {
+ char path[PATH_MAX];
+ int len;
+} removal;
+
+static void do_remove_scheduled_dirs(int new_len)
+{
+ while (removal.len > new_len) {
+ removal.path[removal.len] = '\0';
+ if (rmdir(removal.path))
+ break;
+ do {
+ removal.len--;
+ } while (removal.len > new_len &&
+ removal.path[removal.len] != '/');
+ }
+ removal.len = new_len;
+ return;
+}
+
+void schedule_dir_for_removal(const char *name, int len)
+{
+ int match_len, last_slash, i, previous_slash;
+
+ match_len = last_slash = i =
+ longest_path_match(name, len, removal.path, removal.len,
+ &previous_slash);
+ /* Find last slash inside 'name' */
+ while (i < len) {
+ if (name[i] == '/')
+ last_slash = i;
+ i++;
+ }
+
+ /*
+ * If we are about to go down the directory tree, we check if
+ * we must first go upwards the tree, such that we then can
+ * remove possible empty directories as we go upwards.
+ */
+ if (match_len < last_slash && match_len < removal.len)
+ do_remove_scheduled_dirs(match_len);
+ /*
+ * If we go deeper down the directory tree, we only need to
+ * save the new path components as we go down.
+ */
+ if (match_len < last_slash) {
+ memcpy(&removal.path[match_len], &name[match_len],
+ last_slash - match_len);
+ removal.len = last_slash;
+ }
+ return;
+}
+
+void remove_scheduled_dirs(void)
+{
+ do_remove_scheduled_dirs(0);
+ return;
+}
diff --git a/unpack-trees.c b/unpack-trees.c
index 2293158..e3c3fa1 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -52,36 +52,17 @@ static void add_entry(struct unpack_trees_options *o, struct cache_entry *ce,
add_index_entry(&o->result, new, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE|ADD_CACHE_SKIP_DFCHECK);
}
-/* Unlink the last component and attempt to remove leading
- * directories, in case this unlink is the removal of the
- * last entry in the directory -- empty directories are removed.
+/*
+ * Unlink the last component and schedule the leading directories for
+ * removal, such that empty directories get removed.
*/
static void unlink_entry(struct cache_entry *ce)
{
- char *cp, *prev;
- char *name = ce->name;
-
if (has_symlink_or_noent_leading_path(ce->name, ce_namelen(ce)))
return;
- if (unlink(name))
+ if (unlink(ce->name))
return;
- prev = NULL;
- while (1) {
- int status;
- cp = strrchr(name, '/');
- if (prev)
- *prev = '/';
- if (!cp)
- break;
-
- *cp = 0;
- status = rmdir(name);
- if (status) {
- *cp = '/';
- break;
- }
- prev = cp;
- }
+ schedule_dir_for_removal(ce->name, ce_namelen(ce));
}
static struct checkout state;
@@ -117,6 +98,7 @@ static int check_updates(struct unpack_trees_options *o)
continue;
}
}
+ remove_scheduled_dirs();
for (i = 0; i < index->cache_nr; i++) {
struct cache_entry *ce = index->cache[i];
--
1.6.1.349.g99fa5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v4 5/9] create_directories(): remove some memcpy() and strchr() calls
2009-02-09 20:54 [PATCH v4 0/9] git checkout: more cleanups, optimisation, less lstat() calls Kjetil Barvik
` (3 preceding siblings ...)
2009-02-09 20:54 ` [PATCH v4 4/9] unlink_entry(): introduce schedule_dir_for_removal() Kjetil Barvik
@ 2009-02-09 20:54 ` Kjetil Barvik
2009-02-09 20:54 ` [PATCH v4 6/9] write_entry(): cleanup of some duplicated code Kjetil Barvik
` (3 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Kjetil Barvik @ 2009-02-09 20:54 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Kjetil Barvik
Remove the call to memcpy() and strchr() for each path component
tested, and instead add each path component as we go forward inside
the while-loop.
Impact: small optimisation
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
entry.c | 23 ++++++++++++++---------
1 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/entry.c b/entry.c
index bb6bdb9..cc8f0c6 100644
--- a/entry.c
+++ b/entry.c
@@ -2,15 +2,19 @@
#include "blob.h"
#include "dir.h"
-static void create_directories(const char *path, const struct checkout *state)
+static void create_directories(const char *path, int path_len,
+ const struct checkout *state)
{
- int len = strlen(path);
- char *buf = xmalloc(len + 1);
- const char *slash = path;
-
- while ((slash = strchr(slash+1, '/')) != NULL) {
- len = slash - path;
- memcpy(buf, path, len);
+ char *buf = xmalloc(path_len + 1);
+ int len = 0;
+
+ while (len < path_len) {
+ do {
+ buf[len] = path[len];
+ len++;
+ } while (len < path_len && path[len] != '/');
+ if (len >= path_len)
+ break;
buf[len] = 0;
/*
@@ -190,6 +194,7 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *t
memcpy(path, state->base_dir, len);
strcpy(path + len, ce->name);
+ len += ce_namelen(ce);
if (!lstat(path, &st)) {
unsigned changed = ce_match_stat(ce, &st, CE_MATCH_IGNORE_VALID);
@@ -218,6 +223,6 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *t
return error("unable to unlink old '%s' (%s)", path, strerror(errno));
} else if (state->not_new)
return 0;
- create_directories(path, state);
+ create_directories(path, len, state);
return write_entry(ce, path, state, 0);
}
--
1.6.1.349.g99fa5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v4 6/9] write_entry(): cleanup of some duplicated code
2009-02-09 20:54 [PATCH v4 0/9] git checkout: more cleanups, optimisation, less lstat() calls Kjetil Barvik
` (4 preceding siblings ...)
2009-02-09 20:54 ` [PATCH v4 5/9] create_directories(): remove some memcpy() and strchr() calls Kjetil Barvik
@ 2009-02-09 20:54 ` Kjetil Barvik
2009-02-09 20:54 ` [PATCH v4 7/9] write_entry(): use fstat() instead of lstat() when file is open Kjetil Barvik
` (2 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Kjetil Barvik @ 2009-02-09 20:54 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Kjetil Barvik
The switch-cases for S_IFREG and S_IFLNK was so similar that it will
be better to do some cleanup and use the common parts of it.
And the entry.c file should now be clean for 'gcc -Wextra' warnings.
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
entry.c | 75 +++++++++++++++++++++++++-------------------------------------
1 files changed, 30 insertions(+), 45 deletions(-)
diff --git a/entry.c b/entry.c
index cc8f0c6..1f53588 100644
--- a/entry.c
+++ b/entry.c
@@ -78,7 +78,7 @@ static int create_file(const char *path, unsigned int mode)
return open(path, O_WRONLY | O_CREAT | O_EXCL, mode);
}
-static void *read_blob_entry(struct cache_entry *ce, const char *path, unsigned long *size)
+static void *read_blob_entry(struct cache_entry *ce, unsigned long *size)
{
enum object_type type;
void *new = read_sha1_file(ce->sha1, &type, size);
@@ -93,36 +93,51 @@ static void *read_blob_entry(struct cache_entry *ce, const char *path, unsigned
static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile)
{
- int fd;
- long wrote;
-
- switch (ce->ce_mode & S_IFMT) {
- char *new;
- struct strbuf buf;
- unsigned long size;
-
+ unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT;
+ int fd, ret;
+ char *new;
+ struct strbuf buf = STRBUF_INIT;
+ unsigned long size;
+ size_t wrote, newsize = 0;
+
+ switch (ce_mode_s_ifmt) {
case S_IFREG:
- new = read_blob_entry(ce, path, &size);
+ case S_IFLNK:
+ new = read_blob_entry(ce, &size);
if (!new)
return error("git checkout-index: unable to read sha1 file of %s (%s)",
path, sha1_to_hex(ce->sha1));
+ if (ce_mode_s_ifmt == S_IFLNK && has_symlinks && !to_tempfile) {
+ ret = symlink(new, path);
+ free(new);
+ if (ret)
+ return error("git checkout-index: unable to create symlink %s (%s)",
+ path, strerror(errno));
+ break;
+ }
+
/*
* Convert from git internal format to working tree format
*/
- strbuf_init(&buf, 0);
- if (convert_to_working_tree(ce->name, new, size, &buf)) {
- size_t newsize = 0;
+ if (ce_mode_s_ifmt == S_IFREG &&
+ convert_to_working_tree(ce->name, new, size, &buf)) {
free(new);
new = strbuf_detach(&buf, &newsize);
size = newsize;
}
if (to_tempfile) {
- strcpy(path, ".merge_file_XXXXXX");
+ if (ce_mode_s_ifmt == S_IFREG)
+ strcpy(path, ".merge_file_XXXXXX");
+ else
+ strcpy(path, ".merge_link_XXXXXX");
fd = mkstemp(path);
- } else
+ } else if (ce_mode_s_ifmt == S_IFREG) {
fd = create_file(path, ce->ce_mode);
+ } else {
+ fd = create_file(path, 0666);
+ }
if (fd < 0) {
free(new);
return error("git checkout-index: unable to create file %s (%s)",
@@ -135,36 +150,6 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
if (wrote != size)
return error("git checkout-index: unable to write file %s", path);
break;
- case S_IFLNK:
- new = read_blob_entry(ce, path, &size);
- if (!new)
- return error("git checkout-index: unable to read sha1 file of %s (%s)",
- path, sha1_to_hex(ce->sha1));
- if (to_tempfile || !has_symlinks) {
- if (to_tempfile) {
- strcpy(path, ".merge_link_XXXXXX");
- fd = mkstemp(path);
- } else
- fd = create_file(path, 0666);
- if (fd < 0) {
- free(new);
- return error("git checkout-index: unable to create "
- "file %s (%s)", path, strerror(errno));
- }
- wrote = write_in_full(fd, new, size);
- close(fd);
- free(new);
- if (wrote != size)
- return error("git checkout-index: unable to write file %s",
- path);
- } else {
- wrote = symlink(new, path);
- free(new);
- if (wrote)
- return error("git checkout-index: unable to create "
- "symlink %s (%s)", path, strerror(errno));
- }
- break;
case S_IFGITLINK:
if (to_tempfile)
return error("git checkout-index: cannot create temporary subproject %s", path);
--
1.6.1.349.g99fa5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v4 7/9] write_entry(): use fstat() instead of lstat() when file is open
2009-02-09 20:54 [PATCH v4 0/9] git checkout: more cleanups, optimisation, less lstat() calls Kjetil Barvik
` (5 preceding siblings ...)
2009-02-09 20:54 ` [PATCH v4 6/9] write_entry(): cleanup of some duplicated code Kjetil Barvik
@ 2009-02-09 20:54 ` Kjetil Barvik
2009-02-09 20:54 ` [PATCH v4 8/9] show_patch_diff(): remove a call to fstat() Kjetil Barvik
2009-02-09 20:54 ` [PATCH v4 9/9] lstat_cache(): print a warning if doing ping-pong between cache types Kjetil Barvik
8 siblings, 0 replies; 10+ messages in thread
From: Kjetil Barvik @ 2009-02-09 20:54 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Kjetil Barvik
Currently inside write_entry() we do an lstat(path, &st) call on a
file which have just been opened inside the exact same function. It
should be better to call fstat(fd, &st) on the file while it is open,
and it should be at least as fast as the lstat() method.
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
entry.c | 12 +++++++++---
1 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/entry.c b/entry.c
index 1f53588..5daacc2 100644
--- a/entry.c
+++ b/entry.c
@@ -94,11 +94,12 @@ static void *read_blob_entry(struct cache_entry *ce, unsigned long *size)
static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile)
{
unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT;
- int fd, ret;
+ int fd, ret, fstat_done = 0;
char *new;
struct strbuf buf = STRBUF_INIT;
unsigned long size;
size_t wrote, newsize = 0;
+ struct stat st;
switch (ce_mode_s_ifmt) {
case S_IFREG:
@@ -145,6 +146,11 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
}
wrote = write_in_full(fd, new, size);
+ /* use fstat() only when path == ce->name */
+ if (state->refresh_cache && !to_tempfile && !state->base_dir_len) {
+ fstat(fd, &st);
+ fstat_done = 1;
+ }
close(fd);
free(new);
if (wrote != size)
@@ -161,8 +167,8 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
}
if (state->refresh_cache) {
- struct stat st;
- lstat(ce->name, &st);
+ if (!fstat_done)
+ lstat(ce->name, &st);
fill_stat_cache_info(ce, &st);
}
return 0;
--
1.6.1.349.g99fa5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v4 8/9] show_patch_diff(): remove a call to fstat()
2009-02-09 20:54 [PATCH v4 0/9] git checkout: more cleanups, optimisation, less lstat() calls Kjetil Barvik
` (6 preceding siblings ...)
2009-02-09 20:54 ` [PATCH v4 7/9] write_entry(): use fstat() instead of lstat() when file is open Kjetil Barvik
@ 2009-02-09 20:54 ` Kjetil Barvik
2009-02-09 20:54 ` [PATCH v4 9/9] lstat_cache(): print a warning if doing ping-pong between cache types Kjetil Barvik
8 siblings, 0 replies; 10+ messages in thread
From: Kjetil Barvik @ 2009-02-09 20:54 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Kjetil Barvik
Currently inside show_patch_diff() we have an fstat() call after an
ok lstat() call. Since before the call to fstat() we have already
tested for the link case with S_ISLNK(), the fstat() can be removed.
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
combine-diff.c | 4 +---
1 files changed, 1 insertions(+), 3 deletions(-)
diff --git a/combine-diff.c b/combine-diff.c
index bccc018..4300319 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -713,9 +713,7 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent,
result_size = buf.len;
result = strbuf_detach(&buf, NULL);
elem->mode = canon_mode(st.st_mode);
- }
- else if (0 <= (fd = open(elem->path, O_RDONLY)) &&
- !fstat(fd, &st)) {
+ } else if (0 <= (fd = open(elem->path, O_RDONLY))) {
size_t len = xsize_t(st.st_size);
ssize_t done;
int is_file, i;
--
1.6.1.349.g99fa5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v4 9/9] lstat_cache(): print a warning if doing ping-pong between cache types
2009-02-09 20:54 [PATCH v4 0/9] git checkout: more cleanups, optimisation, less lstat() calls Kjetil Barvik
` (7 preceding siblings ...)
2009-02-09 20:54 ` [PATCH v4 8/9] show_patch_diff(): remove a call to fstat() Kjetil Barvik
@ 2009-02-09 20:54 ` Kjetil Barvik
8 siblings, 0 replies; 10+ messages in thread
From: Kjetil Barvik @ 2009-02-09 20:54 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Kjetil Barvik
This is a debug patch which is only to be used while the lstat_cache()
is in the test stage, and should be removed/reverted before the final
relase.
I think it should be useful to catch these warnings, as I it could be
an indication of that the cache would not be very effective if it is
doing ping-pong by switching between different cache types too many
times.
Also, if someone is experimenting with the lstat_cache(), this patch
will maybe be useful while debugging.
If someone is able to trigger the warning, then send a mail to the GIT
mailing list, containing the first 15 lines of the warning, and a
short description of the GIT commands to trigger the warnings.
I hope someone is willing to use this patch for a while, to be able to
catch possible ping-pong's.
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
symlinks.c | 23 +++++++++++++++++++++++
1 files changed, 23 insertions(+), 0 deletions(-)
diff --git a/symlinks.c b/symlinks.c
index 1d6b35b..cb255a3 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -51,6 +51,11 @@ static inline void reset_lstat_cache(void)
*/
}
+#define SWITCHES_BEFORE_WARNING 10
+static unsigned int cache_switches, number_of_warnings;
+static unsigned int current_cache_func, last_cache_func;
+static unsigned int total_calls;
+
#define FL_DIR (1 << 0)
#define FL_NOENT (1 << 1)
#define FL_SYMLINK (1 << 2)
@@ -77,6 +82,7 @@ static int lstat_cache(const char *name, int len,
int match_flags, ret_flags, save_flags, max_len, ret;
struct stat st;
+ total_calls++;
if (cache.track_flags != track_flags ||
cache.prefix_len_stat_func != prefix_len_stat_func) {
/*
@@ -88,6 +94,17 @@ static int lstat_cache(const char *name, int len,
cache.track_flags = track_flags;
cache.prefix_len_stat_func = prefix_len_stat_func;
match_len = last_slash = 0;
+ cache_switches++;
+ if (cache_switches > SWITCHES_BEFORE_WARNING) {
+ if (number_of_warnings < 10 || number_of_warnings % 1000 == 0)
+ printf("warning from %s:%d cache_switches:%u > %u "\
+ "(current:%u last:%u total:%u)\n",
+ __FILE__, __LINE__,
+ cache_switches, SWITCHES_BEFORE_WARNING,
+ current_cache_func, last_cache_func,
+ total_calls);
+ number_of_warnings++;
+ }
} else {
/*
* Check to see if we have a match from the cache for
@@ -216,6 +233,8 @@ void clear_lstat_cache(void)
*/
int has_symlink_leading_path(const char *name, int len)
{
+ last_cache_func = current_cache_func;
+ current_cache_func = 1;
return lstat_cache(name, len,
FL_SYMLINK|FL_DIR, USE_ONLY_LSTAT) &
FL_SYMLINK;
@@ -227,6 +246,8 @@ int has_symlink_leading_path(const char *name, int len)
*/
int has_symlink_or_noent_leading_path(const char *name, int len)
{
+ last_cache_func = current_cache_func;
+ current_cache_func = 2;
return lstat_cache(name, len,
FL_SYMLINK|FL_NOENT|FL_DIR, USE_ONLY_LSTAT) &
(FL_SYMLINK|FL_NOENT);
@@ -241,6 +262,8 @@ int has_symlink_or_noent_leading_path(const char *name, int len)
*/
int has_dirs_only_path(const char *name, int len, int prefix_len)
{
+ last_cache_func = current_cache_func;
+ current_cache_func = 3;
return lstat_cache(name, len,
FL_DIR|FL_FULLPATH, prefix_len) &
FL_DIR;
--
1.6.1.349.g99fa5
^ permalink raw reply related [flat|nested] 10+ messages in thread