* [PATCH/RFC 1/4] Optimised, faster, more effective symlink/directory detection
2009-01-05 13:09 [PATCH/RFC 0/4] git checkout: optimise away lots of lstat() calls Kjetil Barvik
@ 2009-01-05 13:09 ` Kjetil Barvik
2009-01-06 8:19 ` Junio C Hamano
2009-01-05 13:09 ` [PATCH/RFC 2/4] Use 'lstat_cache()' instead of 'has_symlink_leading_path()' Kjetil Barvik
` (2 subsequent siblings)
3 siblings, 1 reply; 12+ messages in thread
From: Kjetil Barvik @ 2009-01-05 13:09 UTC (permalink / raw)
To: git; +Cc: Linus Torvalds, Junio C Hamano, Kjetil Barvik
This patch is work based on the following 2 commits:
Linus Torvalds: c40641b77b0274186fd1b327d5dc3246f814aaaf
Junio C Hamano: f859c846e90b385c7ef873df22403529208ade50
Changes includes the following:
- The cache functionality is more effective. Previously when A/B/C/D
was in the cache and A/B/C/E/file.c was called for, there was no
match at all from the cache. Now we use the fact that the paths
"A", "A/B" and "A/B/C" is already tested, and we only need to do an
lstat() call on "A/B/C/E".
- We only cache/store the last path regardless of it's type. Since the
cache functionality is always used with alphabetically sorted names
(at least it seams so for me), there is no need to store both the
last symlink-leading path and the last real-directory path. Note
that if the cache is not called with (mostly) alphabetically sorted
names, neither the old, nor this new one, would be very effective.
- We also can cache the fact that a directory does not exist.
Previously we could end up doing lots of lstat() calls for a removed
directory which previously contained lots of files. Since we
already have simplified the cache functionality and only store the
last path (see above), this new functionality was easy to add.
- Avoid copying the first path components of the name 2 zillions times
when we tests new path components. Since we always cache/store the
last path, we can copy each component as we test those directly into
the cache. Previously we ended up doing a memcpy() for the full
path/name right before each lstat() call, and when updating the
cache for each time we have tested an new path component.
- We also use less memory, that is PATH_MAX bytes less memory on the
stack and PATH_MAX bytes less memory on the heap.
- Introduce a 3rd argument, 'unsigned int track_flags', to the
cache-test function, check_lstat_cache(). This new argument can be
used to tell the cache functionality which types of directories
should be cached.
- Also introduce a 'void clear_lstat_cache(void)' function, which
should be used to clean the cache before usage. If for instance,
you have changed the types of directories which should be cached,
the cache could contain a path which was not wanted.
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
:100644 100644 aabf013... 7449b10... M Makefile
:100644 100644 231c06d... 7c246a4... M cache.h
:000000 100644 0000000... 34f4e9a... A lstat_cache.c
Makefile | 1 +
cache.h | 15 ++++++++
lstat_cache.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 121 insertions(+), 0 deletions(-)
create mode 100644 lstat_cache.c
diff --git a/Makefile b/Makefile
index aabf0130b99bee5204c8e668ba8f40caea77dae2..7449b105b03e862d53244d50ed035b4ddabef028 100644
--- a/Makefile
+++ b/Makefile
@@ -446,6 +446,7 @@ LIB_OBJS += list-objects.o
LIB_OBJS += ll-merge.o
LIB_OBJS += lockfile.o
LIB_OBJS += log-tree.o
+LIB_OBJS += lstat_cache.o
LIB_OBJS += mailmap.o
LIB_OBJS += match-trees.o
LIB_OBJS += merge-file.o
diff --git a/cache.h b/cache.h
index 231c06d7726b575f6e522d5b0c0fe43557e8c651..7c246a42df3d60ac2c0f7431ff29ee8fb70235ce 100644
--- a/cache.h
+++ b/cache.h
@@ -721,6 +721,21 @@ struct checkout {
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
extern int has_symlink_leading_path(int len, const char *name);
+#define LSTAT_DIR (1u << 0)
+#define LSTAT_NOTDIR (1u << 1)
+#define LSTAT_SYMLINK (1u << 2)
+#define LSTAT_LSTATERR (1u << 3)
+#define LSTAT_ERR (1u << 4)
+extern unsigned int check_lstat_cache(int len, const char *name,
+ unsigned int track_flags);
+static inline unsigned int lstat_cache(int len, const char *name,
+ unsigned int track_flags,
+ unsigned int mask_flags)
+{
+ return check_lstat_cache(len, name, track_flags) & mask_flags;
+}
+extern void clear_lstat_cache(void);
+
extern struct alternate_object_database {
struct alternate_object_database *next;
char *name;
diff --git a/lstat_cache.c b/lstat_cache.c
new file mode 100644
index 0000000000000000000000000000000000000000..34f4e9a003905323f45a4f739d2a354e02dbbe93
--- /dev/null
+++ b/lstat_cache.c
@@ -0,0 +1,105 @@
+#include "cache.h"
+
+static char cache_path[PATH_MAX];
+static int cache_len = 0;
+static unsigned int cache_flags = 0;
+
+static inline int
+greatest_common_path_cache_prefix(int len, const char *name)
+{
+ int max_len, match_len = 0, i = 0;
+
+ max_len = len < cache_len ? len : cache_len;
+ while (i < max_len && name[i] == cache_path[i]) {
+ if (name[i] == '/') match_len = i;
+ i++;
+ }
+ if (i == cache_len && len > cache_len && name[cache_len] == '/')
+ match_len = cache_len;
+ return match_len;
+}
+
+static inline void
+update_path_cache(unsigned int ret_flags, unsigned int track_flags,
+ int last_slash)
+{
+ /* Max 3 different path types can be cached for the moment! */
+ unsigned int save_flags =
+ ret_flags & track_flags & (LSTAT_DIR|
+ LSTAT_NOTDIR|
+ LSTAT_SYMLINK);
+ if (save_flags && last_slash > 0 && last_slash < PATH_MAX) {
+ cache_flags = save_flags;
+ cache_len = last_slash;
+ } else {
+ cache_flags = 0;
+ cache_len = 0;
+ }
+}
+
+/*
+ * Check if name 'name' of length 'len' has a symlink leading
+ * component, or if the directory exists and is real, or not.
+ *
+ * To speed up the check, some information is allowed to be cached.
+ * This is indicated by the 'track_flags' argument.
+ */
+unsigned int
+check_lstat_cache(int len, const char *name, unsigned int track_flags)
+{
+ int match_len, last_slash, max_len;
+ unsigned int match_flags, ret_flags;
+ struct stat st;
+
+ /* Check if match from the cache for 2 "excluding" path types.
+ */
+ match_len = last_slash =
+ greatest_common_path_cache_prefix(len, name);
+ match_flags =
+ cache_flags & track_flags & (LSTAT_NOTDIR|
+ LSTAT_SYMLINK);
+ if (match_flags && match_len == cache_len)
+ return match_flags;
+
+ /* Okay, no match from the cache so far, so now we have to
+ * check the rest of the path components and update the cache.
+ */
+ ret_flags = LSTAT_DIR;
+ max_len = len < PATH_MAX ? len : PATH_MAX;
+ while (match_len < max_len) {
+ do {
+ cache_path[match_len] = name[match_len];
+ match_len++;
+ } while (match_len < max_len && name[match_len] != '/');
+ if (match_len >= max_len)
+ break;
+ last_slash = match_len;
+ cache_path[last_slash] = '\0';
+
+ if (lstat(cache_path, &st)) {
+ ret_flags = LSTAT_LSTATERR;
+ if (errno == ENOENT || errno == ENOTDIR)
+ ret_flags |= LSTAT_NOTDIR;
+ } else if (S_ISDIR(st.st_mode)) {
+ continue;
+ } else if (S_ISLNK(st.st_mode)) {
+ ret_flags = LSTAT_SYMLINK;
+ } else {
+ ret_flags = LSTAT_ERR;
+ }
+ break;
+ }
+ update_path_cache(ret_flags, track_flags, last_slash);
+ return ret_flags;
+}
+
+/*
+ * Before usage of the check_lstat_cache() function one should call
+ * clear_lstat_cache() (at an appropriate place) to make sure that the
+ * cache is clean before first call to check_lstat_cache().
+ */
+void clear_lstat_cache(void)
+{
+ cache_flags = 0;
+ cache_len = 0;
+}
--
1.6.1.rc1.49.g7f705
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH/RFC 1/4] Optimised, faster, more effective symlink/directory detection
2009-01-05 13:09 ` [PATCH/RFC 1/4] Optimised, faster, more effective symlink/directory detection Kjetil Barvik
@ 2009-01-06 8:19 ` Junio C Hamano
2009-01-06 8:56 ` Junio C Hamano
2009-01-06 9:36 ` Kjetil Barvik
0 siblings, 2 replies; 12+ messages in thread
From: Junio C Hamano @ 2009-01-06 8:19 UTC (permalink / raw)
To: Kjetil Barvik; +Cc: git, Linus Torvalds
Kjetil Barvik <barvik@broadpark.no> writes:
> +static inline void
> +update_path_cache(unsigned int ret_flags, unsigned int track_flags,
> + int last_slash)
> +{
> + /* Max 3 different path types can be cached for the moment! */
> + unsigned int save_flags =
> + ret_flags & track_flags & (LSTAT_DIR|
> + LSTAT_NOTDIR|
> + LSTAT_SYMLINK);
> + if (save_flags && last_slash > 0 && last_slash < PATH_MAX) {
> + cache_flags = save_flags;
> + cache_len = last_slash;
> + } else {
> + cache_flags = 0;
> + cache_len = 0;
> + }
> +}
I personally found this inline function with a single call site
distracting in following the logic. It does not make the indentation
level shallower, either. Also, the else part should probably call
clear_lstat_cache() to protect it from possible future enhancements to add
more state variables.
> +
> +/*
> + * Check if name 'name' of length 'len' has a symlink leading
> + * component, or if the directory exists and is real, or not.
> + *
> + * To speed up the check, some information is allowed to be cached.
> + * This is indicated by the 'track_flags' argument.
> + */
> +unsigned int
> +check_lstat_cache(int len, const char *name, unsigned int track_flags)
> +{
> + int match_len, last_slash, max_len;
> + unsigned int match_flags, ret_flags;
> + struct stat st;
> +
> + /* Check if match from the cache for 2 "excluding" path types.
> + */
> + match_len = last_slash =
> + greatest_common_path_cache_prefix(len, name);
> + match_flags =
> + cache_flags & track_flags & (LSTAT_NOTDIR|
> + LSTAT_SYMLINK);
> + if (match_flags && match_len == cache_len)
> + return match_flags;
Let me see if I understand the logic behind this caching. When you have
checked A/B/C earlier and you already know B is a symlink, you remember
that A/B was a symlink.. You can fill a request to check A/B/$whatever
(assuming A/B does not change --- otherwise the caller should clear the
cache) from the cached data, because no matter what $whatever is, it will
result in the same "has-leading-symlink".
Similarly, if you know A/B is not a directory from an earlier test, you
know that a request to check A/B/$whatever will result in the same ENOTDIR
no matter what $whatever is, so you can return early.
The above "return match_flags" will not trigger if the cached path does
not have any leading symlink. So we know the matched part are all good
directories when we start lstat() loop.
Am I following you so far?
> + /* Okay, no match from the cache so far, so now we have to
> + * check the rest of the path components and update the cache.
> + */
> + ret_flags = LSTAT_DIR;
> + max_len = len < PATH_MAX ? len : PATH_MAX;
> + while (match_len < max_len) {
> + do {
> + cache_path[match_len] = name[match_len];
> + match_len++;
> + } while (match_len < max_len && name[match_len] != '/');
You take one component from the input, and append it to the part that is
already known to be true directory (i.e. cached part and the part earlier
iteration of the loop checked so far), to be tested by lstat()...
> + if (match_len >= max_len)
> + break;
... but you are not interested in the full input. We are only checking
the leading path (e.g. check for "A/B/C" may lstat() "A", "A/B" but not
"A/B/C").
> + last_slash = match_len;
> + cache_path[last_slash] = '\0';
> +
> + if (lstat(cache_path, &st)) {
> + ret_flags = LSTAT_LSTATERR;
> + if (errno == ENOENT || errno == ENOTDIR)
> + ret_flags |= LSTAT_NOTDIR;
If you tested "A/B" here and got ENOENT back, you know "A/B" does not
exist; you cache this knowledge as "A/B is not a directory" (I also think
you could use it as a cached knowledge that "A exists and is a directory".
I am not sure if you are taking advantage of that).
What I do not understand about this code is the ENOTDIR case. If you
tested "A/B" and got ENOTDIR back, what you know is that "A" is not a
directory (if the path tested this round were deeper like "X/Y/A/B", you
know "X/Y/A" is not a directory, and you know "X" and "X/Y" are true
directories; otherwise the loop would have exited before this round when
you tested "X" or "X/Y" in the earlier rounds).
So as far as I can think of, ENOENT case and ENOTDIR case would give you
different information (ENOENT would say "A is a dir, A/B is not"; ENOTDIR
would say "A is not a dir"). I am confused how you can cache the same
path and same flag between these two cases here.
> + } else if (S_ISDIR(st.st_mode)) {
> + continue;
> + } else if (S_ISLNK(st.st_mode)) {
> + ret_flags = LSTAT_SYMLINK;
> + } else {
> + ret_flags = LSTAT_ERR;
> + }
> + break;
> + }
> + update_path_cache(ret_flags, track_flags, last_slash);
> + return ret_flags;
> +}
> +
> +/*
> + * Before usage of the check_lstat_cache() function one should call
> + * clear_lstat_cache() (at an appropriate place) to make sure that the
> + * cache is clean before first call to check_lstat_cache().
> + */
> +void clear_lstat_cache(void)
> +{
> + cache_flags = 0;
> + cache_len = 0;
> +}
> --
> 1.6.1.rc1.49.g7f705
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH/RFC 1/4] Optimised, faster, more effective symlink/directory detection
2009-01-06 8:19 ` Junio C Hamano
@ 2009-01-06 8:56 ` Junio C Hamano
2009-01-06 9:56 ` Kjetil Barvik
2009-01-06 13:45 ` Kjetil Barvik
2009-01-06 9:36 ` Kjetil Barvik
1 sibling, 2 replies; 12+ messages in thread
From: Junio C Hamano @ 2009-01-06 8:56 UTC (permalink / raw)
To: Kjetil Barvik; +Cc: git, Linus Torvalds
Junio C Hamano <gitster@pobox.com> writes:
> Let me see if I understand the logic behind this caching....
> ...
>> + last_slash = match_len;
>> + cache_path[last_slash] = '\0';
>> +
>> + if (lstat(cache_path, &st)) {
>> + ret_flags = LSTAT_LSTATERR;
>> + if (errno == ENOENT || errno == ENOTDIR)
>> + ret_flags |= LSTAT_NOTDIR;
>
> If you tested "A/B" here and got ENOENT back, you know "A/B" does not
> exist; you cache this knowledge as "A/B is not a directory" (I also think
> you could use it as a cached knowledge that "A exists and is a directory".
> I am not sure if you are taking advantage of that).
>
> What I do not understand about this code is the ENOTDIR case. If you
> tested "A/B" and got ENOTDIR back, what you know is that "A" is not a
> directory (if the path tested this round were deeper like "X/Y/A/B", you
> know "X/Y/A" is not a directory, and you know "X" and "X/Y" are true
> directories; otherwise the loop would have exited before this round when
> you tested "X" or "X/Y" in the earlier rounds).
>
> So as far as I can think of, ENOENT case and ENOTDIR case would give you
> different information (ENOENT would say "A is a dir, A/B is not"; ENOTDIR
> would say "A is not a dir"). I am confused how you can cache the same
> path and same flag between these two cases here.
I take 1/4 of the above back.
If everything is working correctly, I do not think you will ever have a
situation where you check "A/B" here and get ENOTDIR back. lstat("A/B")
would yield ENOTDIR if "A" is not a directory, but:
(1) If you did test "A" in the earlier round in the loop, you would have
already found it is not a directory and would have exited the loop
with LSTAT_ERR. You cannot test "A/B" in such a case;
(2) If you did not test "A" in the loop, that has to be because you had a
cached information for it, and the caching logic would have returned
early if "A" was a non-directory without entering the loop; in other
words, you would test "A/B" here without testing "A" in the loop only
when the cache says "A" was a directory. You cannot get ENOTDIR in
such a case.
In any case, my main puzzlement still stands. I think ENOTDIR case should
behave differently from ENOENT case.
And I think it is an indication of a grave error, either this code is
racing with an external process that is actively mucking with the work
tree to make your cached information unusable, or somebody forgot to call
clear_lstat_cache().
Hmm?
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH/RFC 1/4] Optimised, faster, more effective symlink/directory detection
2009-01-06 8:56 ` Junio C Hamano
@ 2009-01-06 9:56 ` Kjetil Barvik
2009-01-06 13:45 ` Kjetil Barvik
1 sibling, 0 replies; 12+ messages in thread
From: Kjetil Barvik @ 2009-01-06 9:56 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Linus Torvalds
Junio C Hamano <gitster@pobox.com> writes:
<snipp>
> If everything is working correctly, I do not think you will ever have a
> situation where you check "A/B" here and get ENOTDIR back. lstat("A/B")
> would yield ENOTDIR if "A" is not a directory, but:
>
> (1) If you did test "A" in the earlier round in the loop, you would have
> already found it is not a directory and would have exited the loop
> with LSTAT_ERR. You cannot test "A/B" in such a case;
No, if the test lstat("A") already returned -1 it would then have set
errno to ENOTDIR in this case, I think. So in this case the cache
would have only stored "A".
> (2) If you did not test "A" in the loop, that has to be because you had a
> cached information for it, and the caching logic would have returned
> early if "A" was a non-directory without entering the loop; in other
> words, you would test "A/B" here without testing "A" in the loop only
> when the cache says "A" was a directory. You cannot get ENOTDIR in
> such a case.
>
> In any case, my main puzzlement still stands. I think ENOTDIR case should
> behave differently from ENOENT case.
>
> And I think it is an indication of a grave error, either this code is
> racing with an external process that is actively mucking with the work
> tree to make your cached information unusable, or somebody forgot to call
> clear_lstat_cache().
>
> Hmm?
I think that you do not see that the when the lstat() function returns
errno =
ENOENT A component of the path path does not exist, or the path is
an empty string.
ENOTDIR A component of the path is not a directory.
the cache logic will/must assume that the component we got an error on
will/must always be the _last_ component in the path.
> I take 1/4 of the above back.
Will this means that I do not have to update the patch to version 2?
Note, one thing what I personally do not like with this patch so far,
is that it makes a new file, and then it kinda hides the comments from
Linus which talks aboth extending the logic. It would maybe be better
to have reused the same file (symlinks.c), but then the function name
('lstat_cache()') and the filename would not match.
-- kjetil
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH/RFC 1/4] Optimised, faster, more effective symlink/directory detection
2009-01-06 8:56 ` Junio C Hamano
2009-01-06 9:56 ` Kjetil Barvik
@ 2009-01-06 13:45 ` Kjetil Barvik
1 sibling, 0 replies; 12+ messages in thread
From: Kjetil Barvik @ 2009-01-06 13:45 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Linus Torvalds
Junio C Hamano <gitster@pobox.com> writes:
<snipp>
> If everything is working correctly, I do not think you will ever have a
> situation where you check "A/B" here and get ENOTDIR back. lstat("A/B")
> would yield ENOTDIR if "A" is not a directory, but:
>
> (1) If you did test "A" in the earlier round in the loop, you would have
> already found it is not a directory and would have exited the loop
> with LSTAT_ERR. You cannot test "A/B" in such a case;
>
> (2) If you did not test "A" in the loop, that has to be because you had a
> cached information for it, and the caching logic would have returned
> early if "A" was a non-directory without entering the loop; in other
> words, you would test "A/B" here without testing "A" in the loop only
> when the cache says "A" was a directory. You cannot get ENOTDIR in
> such a case.
>
> In any case, my main puzzlement still stands. I think ENOTDIR case should
> behave differently from ENOENT case.
>
> And I think it is an indication of a grave error, either this code is
> racing with an external process that is actively mucking with the work
> tree to make your cached information unusable, or somebody forgot to call
> clear_lstat_cache().
>
> Hmm?
I have looked at this once more, and I have to admit that what you
have said is correct. Sorry for being confusing! I will update the
patch by removing the 'errno == ENOTDIR' part from the if-test.
-- kjetil
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH/RFC 1/4] Optimised, faster, more effective symlink/directory detection
2009-01-06 8:19 ` Junio C Hamano
2009-01-06 8:56 ` Junio C Hamano
@ 2009-01-06 9:36 ` Kjetil Barvik
1 sibling, 0 replies; 12+ messages in thread
From: Kjetil Barvik @ 2009-01-06 9:36 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Linus Torvalds
Junio C Hamano <gitster@pobox.com> writes:
> Kjetil Barvik <barvik@broadpark.no> writes:
>
>> +static inline void
>> +update_path_cache(unsigned int ret_flags, unsigned int track_flags,
>> + int last_slash)
>> +{
>> + /* Max 3 different path types can be cached for the moment! */
>> + unsigned int save_flags =
>> + ret_flags & track_flags & (LSTAT_DIR|
>> + LSTAT_NOTDIR|
>> + LSTAT_SYMLINK);
>> + if (save_flags && last_slash > 0 && last_slash < PATH_MAX) {
>> + cache_flags = save_flags;
>> + cache_len = last_slash;
>> + } else {
>> + cache_flags = 0;
>> + cache_len = 0;
>> + }
>> +}
>
> I personally found this inline function with a single call site
> distracting in following the logic. It does not make the indentation
> level shallower, either. Also, the else part should probably call
> clear_lstat_cache() to protect it from possible future enhancements to add
> more state variables.
Ok, I will remove the function and update the else-part.
>> +
>> +/*
>> + * Check if name 'name' of length 'len' has a symlink leading
>> + * component, or if the directory exists and is real, or not.
>> + *
>> + * To speed up the check, some information is allowed to be cached.
>> + * This is indicated by the 'track_flags' argument.
>> + */
>> +unsigned int
>> +check_lstat_cache(int len, const char *name, unsigned int track_flags)
>> +{
>> + int match_len, last_slash, max_len;
>> + unsigned int match_flags, ret_flags;
>> + struct stat st;
>> +
>> + /* Check if match from the cache for 2 "excluding" path types.
>> + */
>> + match_len = last_slash =
>> + greatest_common_path_cache_prefix(len, name);
>> + match_flags =
>> + cache_flags & track_flags & (LSTAT_NOTDIR|
>> + LSTAT_SYMLINK);
>> + if (match_flags && match_len == cache_len)
>> + return match_flags;
>
> Let me see if I understand the logic behind this caching. When you have
> checked A/B/C earlier and you already know B is a symlink, you remember
> that A/B was a symlink.. You can fill a request to check A/B/$whatever
> (assuming A/B does not change --- otherwise the caller should clear the
> cache) from the cached data, because no matter what $whatever is, it will
> result in the same "has-leading-symlink".
>
> Similarly, if you know A/B is not a directory from an earlier test, you
> know that a request to check A/B/$whatever will result in the same ENOTDIR
> no matter what $whatever is, so you can return early.
>
> The above "return match_flags" will not trigger if the cached path does
> not have any leading symlink. So we know the matched part are all good
> directories when we start lstat() loop.
>
> Am I following you so far?
Yes you do!
>> + /* Okay, no match from the cache so far, so now we have to
>> + * check the rest of the path components and update the cache.
>> + */
>> + ret_flags = LSTAT_DIR;
>> + max_len = len < PATH_MAX ? len : PATH_MAX;
>> + while (match_len < max_len) {
>> + do {
>> + cache_path[match_len] = name[match_len];
>> + match_len++;
>> + } while (match_len < max_len && name[match_len] != '/');
>
> You take one component from the input, and append it to the part that is
> already known to be true directory (i.e. cached part and the part earlier
> iteration of the loop checked so far), to be tested by lstat()...
>
>> + if (match_len >= max_len)
>> + break;
>
> ... but you are not interested in the full input.
If the lengt of name is larger than PATH_MAX all lstat() calls would
fail with an ENAMETOOLONG error, at least on my Linux box, so I
thought that it was not nessarry to test further. But maybe we should
emdiatly return an error if name is too long?
> We are only checking the leading path (e.g. check for "A/B/C" may
> lstat() "A", "A/B" but not "A/B/C").
That is correct, and it is the same logic as in the
has_symlink_leading_path() function.
>> + last_slash = match_len;
>> + cache_path[last_slash] = '\0';
>> +
>> + if (lstat(cache_path, &st)) {
>> + ret_flags = LSTAT_LSTATERR;
>> + if (errno == ENOENT || errno == ENOTDIR)
>> + ret_flags |= LSTAT_NOTDIR;
>
> If you tested "A/B" here and got ENOENT back, you know "A/B" does not
> exist; you cache this knowledge as "A/B is not a directory"
Correct.
> (I also think you could use it as a cached knowledge that "A exists
> and is a directory". I am not sure if you are taking advantage of
> that).
It does take adavantage of this fact. It will also do simmilar things
with a symlink cached path.
> What I do not understand about this code is the ENOTDIR case. If you
> tested "A/B" and got ENOTDIR back, what you know is that "A" is not a
> directory (if the path tested this round were deeper like "X/Y/A/B", you
> know "X/Y/A" is not a directory, and you know "X" and "X/Y" are true
> directories; otherwise the loop would have exited before this round when
> you tested "X" or "X/Y" in the earlier rounds).
Since the cache is supoosed to start from a known existing directory,
and is testing each path component when the lstat("A/B") calls returns
ENOTDIR, we should know the fact that the directory "A" exists, and
that "A/B/" does not exists.
> So as far as I can think of, ENOENT case and ENOTDIR case would give you
> different information (ENOENT would say "A is a dir, A/B is not"; ENOTDIR
> would say "A is not a dir"). I am confused how you can cache the same
> path and same flag between these two cases here.
I admit that I used copy-and-paste from other similar test's from the
sourcecode:
kjetil@localhost ~/git/git $ grep -Hn ENOENT.*ENOTDIR *.c
builtin-apply.c:2364: else if ((errno != ENOENT) && (errno != ENOTDIR))
builtin-update-index.c:74: * - missing file (ENOENT or ENOTDIR). That's ok if we're
builtin-update-index.c:81: if (err == ENOENT || err == ENOTDIR)
diff-lib.c:30: if (errno != ENOENT && errno != ENOTDIR)
lstat_cache.c:81: if (errno == ENOENT || errno == ENOTDIR)
setup.c:191: if (errno != ENOENT && errno != ENOTDIR)
From the 'man lstat' page I read the following for this 2 error codes:
ENOENT A component of the path _path_ does not exist, or the path is an empty string.
ENOTDIR A component of the path is not a directory.
I would have guessed that for what we is looking for, it would be
correct to threat both these error codes as the same. Is this wrong?
-- kjetil
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH/RFC 2/4] Use 'lstat_cache()' instead of 'has_symlink_leading_path()'
2009-01-05 13:09 [PATCH/RFC 0/4] git checkout: optimise away lots of lstat() calls Kjetil Barvik
2009-01-05 13:09 ` [PATCH/RFC 1/4] Optimised, faster, more effective symlink/directory detection Kjetil Barvik
@ 2009-01-05 13:09 ` Kjetil Barvik
2009-01-06 8:19 ` Junio C Hamano
2009-01-05 13:10 ` [PATCH/RFC 3/4] create_directories() inside entry.c: only check each directory once! Kjetil Barvik
2009-01-05 13:10 ` [PATCH/RFC 4/4] remove the old 'has_symlink_leading_path()' function Kjetil Barvik
3 siblings, 1 reply; 12+ messages in thread
From: Kjetil Barvik @ 2009-01-05 13:09 UTC (permalink / raw)
To: git; +Cc: Linus Torvalds, Junio C Hamano, Kjetil Barvik
Start using the optimised, faster and more effective symlink/directory
cache. The previously used call:
has_symlink_leading_path(len, name);
should be identically with the following call to lstat_cache():
lstat_cache(len, name,
LSTAT_SYMLINK|LSTAT_DIR,
LSTAT_SYMLINK);
The primary reason for the new name of the function (instead of using
the old name and add 2 extra arguments), is that it is now more
general, for instance, it now also can cache the fact that a directory
does not exists.
I noticed that inside the unlink_entry() function in unpack-trees.c,
one could often end up calling rmdir() lots and lots of times on
none-empty directories. Maybe one should schedule each directory for
removal by an appropriate function, and then at the end call a new
function to clean all the directories at once?
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
:100644 100644 719de8b... 152c52c... M builtin-add.c
:100644 100644 a8f75ed... 0eb2b21... M builtin-apply.c
:100644 100644 65d5775... fa7d994... M builtin-update-index.c
:100644 100644 ae96c64... 127bdf2... M diff-lib.c
:100644 100644 0131983... 9f2a1b1... M dir.c
:100644 100644 54f301d... 93923db... M unpack-trees.c
builtin-add.c | 5 ++++-
builtin-apply.c | 5 ++++-
builtin-update-index.c | 5 ++++-
diff-lib.c | 5 ++++-
dir.c | 4 +++-
unpack-trees.c | 9 +++++++--
6 files changed, 26 insertions(+), 7 deletions(-)
diff --git a/builtin-add.c b/builtin-add.c
index 719de8b0f2d2d831f326d948aa18700e5c474950..152c52c0f22b3e71931b2a5629e1472602817785 100644
--- a/builtin-add.c
+++ b/builtin-add.c
@@ -121,7 +121,9 @@ static const char **validate_pathspec(int argc, const char **argv, const char *p
if (pathspec) {
const char **p;
for (p = pathspec; *p; p++) {
- if (has_symlink_leading_path(strlen(*p), *p)) {
+ if (lstat_cache(strlen(*p), *p,
+ LSTAT_SYMLINK|LSTAT_DIR,
+ LSTAT_SYMLINK)) {
int len = prefix ? strlen(prefix) : 0;
die("'%s' is beyond a symbolic link", *p + len);
}
@@ -225,6 +227,7 @@ int cmd_add(int argc, const char **argv, const char *prefix)
argc = parse_options(argc, argv, builtin_add_options,
builtin_add_usage, 0);
+ clear_lstat_cache();
if (patch_interactive)
add_interactive = 1;
if (add_interactive)
diff --git a/builtin-apply.c b/builtin-apply.c
index a8f75ed3ed411d8cf7a3ec9dfefef7407c50f447..0eb2b21ee245919189c780a64cc494ca35f7934a 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -2354,7 +2354,9 @@ static int check_to_create_blob(const char *new_name, int ok_if_exists)
* In such a case, path "new_name" does not exist as
* far as git is concerned.
*/
- if (has_symlink_leading_path(strlen(new_name), new_name))
+ if (lstat_cache(strlen(new_name), new_name,
+ LSTAT_SYMLINK|LSTAT_DIR,
+ LSTAT_SYMLINK))
return 0;
return error("%s: already exists in working directory", new_name);
@@ -3154,6 +3156,7 @@ int cmd_apply(int argc, const char **argv, const char *unused_prefix)
if (apply_default_whitespace)
parse_whitespace_option(apply_default_whitespace);
+ clear_lstat_cache();
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
char *end;
diff --git a/builtin-update-index.c b/builtin-update-index.c
index 65d5775107f9013526cc5b288a80a00b449e8814..fa7d994b3cfe7343c1a181f4c6f6a4c6ee6cea75 100644
--- a/builtin-update-index.c
+++ b/builtin-update-index.c
@@ -195,7 +195,9 @@ static int process_path(const char *path)
struct stat st;
len = strlen(path);
- if (has_symlink_leading_path(len, path))
+ if (lstat_cache(len, path,
+ LSTAT_SYMLINK|LSTAT_DIR,
+ LSTAT_SYMLINK))
return error("'%s' is beyond a symbolic link", path);
/*
@@ -581,6 +583,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
if (entries < 0)
die("cache corrupted");
+ clear_lstat_cache();
for (i = 1 ; i < argc; i++) {
const char *path = argv[i];
const char *p;
diff --git a/diff-lib.c b/diff-lib.c
index ae96c64ca209f4df9008198e8a04b160bed618c7..127bdf2dfbeb3538fa01e500ed8bcd3f4c8d422b 100644
--- a/diff-lib.c
+++ b/diff-lib.c
@@ -31,7 +31,9 @@ static int check_removed(const struct cache_entry *ce, struct stat *st)
return -1;
return 1;
}
- if (has_symlink_leading_path(ce_namelen(ce), ce->name))
+ if (lstat_cache(ce_namelen(ce), ce->name,
+ LSTAT_SYMLINK|LSTAT_NOTDIR|LSTAT_DIR,
+ LSTAT_SYMLINK))
return 1;
if (S_ISDIR(st->st_mode)) {
unsigned char sub[20];
@@ -69,6 +71,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option)
diff_unmerged_stage = 2;
entries = active_nr;
symcache[0] = '\0';
+ clear_lstat_cache();
for (i = 0; i < entries; i++) {
struct stat st;
unsigned int oldmode, newmode;
diff --git a/dir.c b/dir.c
index 0131983dfbc143ce5dae77e067663bb2e7d5f126..9f2a1b1f245c3f1d4f12d54d45bb193c53fa15b5 100644
--- a/dir.c
+++ b/dir.c
@@ -719,7 +719,9 @@ int read_directory(struct dir_struct *dir, const char *path, const char *base, i
{
struct path_simplify *simplify;
- if (has_symlink_leading_path(strlen(path), path))
+ if (lstat_cache(strlen(path), path,
+ LSTAT_SYMLINK|LSTAT_DIR,
+ LSTAT_SYMLINK))
return dir->nr;
simplify = create_simplify(pathspec);
diff --git a/unpack-trees.c b/unpack-trees.c
index 54f301da67be879c80426bc21776427fdd38c02e..93923dbbc6ab80deadfd737aa9975f6e5a4d1e89 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -61,7 +61,9 @@ static void unlink_entry(struct cache_entry *ce)
char *cp, *prev;
char *name = ce->name;
- if (has_symlink_leading_path(ce_namelen(ce), ce->name))
+ if (lstat_cache(ce_namelen(ce), ce->name,
+ LSTAT_SYMLINK|LSTAT_NOTDIR|LSTAT_DIR,
+ LSTAT_SYMLINK|LSTAT_NOTDIR))
return;
if (unlink(name))
return;
@@ -105,6 +107,7 @@ static int check_updates(struct unpack_trees_options *o)
cnt = 0;
}
+ clear_lstat_cache();
for (i = 0; i < index->cache_nr; i++) {
struct cache_entry *ce = index->cache[i];
@@ -584,7 +587,9 @@ static int verify_absent(struct cache_entry *ce, const char *action,
if (o->index_only || o->reset || !o->update)
return 0;
- if (has_symlink_leading_path(ce_namelen(ce), ce->name))
+ if (lstat_cache(ce_namelen(ce), ce->name,
+ LSTAT_SYMLINK|LSTAT_NOTDIR|LSTAT_DIR,
+ LSTAT_SYMLINK|LSTAT_NOTDIR))
return 0;
if (!lstat(ce->name, &st)) {
--
1.6.1.rc1.49.g7f705
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH/RFC 2/4] Use 'lstat_cache()' instead of 'has_symlink_leading_path()'
2009-01-05 13:09 ` [PATCH/RFC 2/4] Use 'lstat_cache()' instead of 'has_symlink_leading_path()' Kjetil Barvik
@ 2009-01-06 8:19 ` Junio C Hamano
2009-01-06 12:50 ` Kjetil Barvik
0 siblings, 1 reply; 12+ messages in thread
From: Junio C Hamano @ 2009-01-06 8:19 UTC (permalink / raw)
To: Kjetil Barvik; +Cc: git, Linus Torvalds
Kjetil Barvik <barvik@broadpark.no> writes:
> Start using the optimised, faster and more effective symlink/directory
> cache. The previously used call:
>
> has_symlink_leading_path(len, name);
>
> should be identically with the following call to lstat_cache():
>
> lstat_cache(len, name,
> LSTAT_SYMLINK|LSTAT_DIR,
> LSTAT_SYMLINK);
> ...
Care to enlighten why some of callers use the above, but not others?
Namely, check_removed() in diff-lib.c and callers in unpack-trees.c care
about NOTDIR unlike others, even though the original code checked for
exactly the same condition.
Does this mean that some callers of has_symlink_leading_path() checked
only for leading symlinks when they should also have checked for a leading
non-directory, and this patch is also a bugfix?
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH/RFC 2/4] Use 'lstat_cache()' instead of 'has_symlink_leading_path()'
2009-01-06 8:19 ` Junio C Hamano
@ 2009-01-06 12:50 ` Kjetil Barvik
0 siblings, 0 replies; 12+ messages in thread
From: Kjetil Barvik @ 2009-01-06 12:50 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Linus Torvalds
Junio C Hamano <gitster@pobox.com> writes:
> Kjetil Barvik <barvik@broadpark.no> writes:
>
>> Start using the optimised, faster and more effective symlink/directory
>> cache. The previously used call:
>>
>> has_symlink_leading_path(len, name);
>>
>> should be identically with the following call to lstat_cache():
>>
>> lstat_cache(len, name,
>> LSTAT_SYMLINK|LSTAT_DIR,
>> LSTAT_SYMLINK);
>> ...
>
> Care to enlighten why some of callers use the above, but not others?
> Namely, check_removed() in diff-lib.c
I though that first it would be a good thing to introduce as little
changes as possible, and then later on do some cleanups. Regarding
the 'check_removed()' function, I though that it could later have been
written something like this after cleanups:
[....]
static int check_removed(const struct cache_entry *ce, struct stat *st)
{
unsigned int ret_flags =
check_lstat_cache(ce_namelen(ce), ce->name,
LSTAT_SYMLINK|LSTAT_NOTDIR|LSTAT_DIR);
if (ret_flags & (LSTAT_SYMLINK|LSTAT_NOTDIR))
return 1;
if (ret_flags & LSTAT_LSTATERR)
return -1;
if (ret_flags & LSTAT_DIR) {
unsigned char sub[20];
[....]
This would have saved one more lstat() call in some cases. But after
a test, I now see that it does not work. The reason is that it does
not set the 'struct stat *st' parameter and/or that for the moment you
can not tell the 'lstat_cache()' function to also always test the last
path component. It could be extended to do this, if someone ask for
it and if it would be useful to extend the lstat_cache() for this
fact.
I will remove the '|LSTAT_NOTDIR' part from the call to lstat_cache()
in 'check_removed()' in the next version of the patch.
> and callers in unpack-trees.c care about NOTDIR unlike others, even
> though the original code checked for exactly the same condition.
Regarding the 'verify_absent()' function in unpack-trees.c, the
'|LSTAT_NOTDIR' part of the call to lstat_cache() helps to avoid 16047
lstat() calls for the given test case mentioned in the cover-letter.
And from the source code:
[...]
if (lstat_cache(ce_namelen(ce), ce->name,
LSTAT_SYMLINK|LSTAT_NOTDIR|LSTAT_DIR,
LSTAT_SYMLINK|LSTAT_NOTDIR))
return 0;
if (!lstat(ce->name, &st)) {
[...]
it should be easy to see that if we from the lstat_cache() could
already spot that some path component of ce->name does not exists,
then we can avoid the following lstat() call, as it then should known
to be failing.
regarding the 'unlink_entry()' function in unpack-trees.c, the
'|LSTAT_NOTDIR' part of the call to lstat_cache() does not for the
moment helps to avoid any lstat() calls, as far as I can see. But,
again, from the source code:
[..]
char *name = ce->name;
if (lstat_cache(ce_namelen(ce), ce->name,
LSTAT_SYMLINK|LSTAT_NOTDIR|LSTAT_DIR,
LSTAT_SYMLINK|LSTAT_NOTDIR))
return;
if (unlink(name))
return;
[...]
it should be correct, since if we already know that some path
component of ce-name does not exist, the call to unlink(name) would
always fail (with ENOENT).
> Does this mean that some callers of has_symlink_leading_path() checked
> only for leading symlinks when they should also have checked for a leading
> non-directory, and this patch is also a bugfix?
Yes, as indicated above, has_symlink_leading_path() should have
checked for leading non-directories when called from for the
'verify_absent()' function to be able to optimise away some more
lstat() calls.
I admit that I do not know the source code good enough to decide if
this is an indication of a bug somewhere, or just an optimisation.
-- kjetil
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH/RFC 3/4] create_directories() inside entry.c: only check each directory once!
2009-01-05 13:09 [PATCH/RFC 0/4] git checkout: optimise away lots of lstat() calls Kjetil Barvik
2009-01-05 13:09 ` [PATCH/RFC 1/4] Optimised, faster, more effective symlink/directory detection Kjetil Barvik
2009-01-05 13:09 ` [PATCH/RFC 2/4] Use 'lstat_cache()' instead of 'has_symlink_leading_path()' Kjetil Barvik
@ 2009-01-05 13:10 ` Kjetil Barvik
2009-01-05 13:10 ` [PATCH/RFC 4/4] remove the old 'has_symlink_leading_path()' function Kjetil Barvik
3 siblings, 0 replies; 12+ messages in thread
From: Kjetil Barvik @ 2009-01-05 13:10 UTC (permalink / raw)
To: git; +Cc: Linus Torvalds, Junio C Hamano, Kjetil Barvik
When we do an 'git checkout' after some time we end up in the
'checkout_entry()' function inside entry.c, and from here we call the
'create_directories()' function to make sure the all the directories
exists for the possible new file or entry.
The 'create_directories()' function happily started to check that all
path component exists. This resulted in tons and tons of calls to
lstat() or stat() when we checkout files nested deep inside a
directory.
We try to avoid this by remembering the last checked and possible
newly created directory.
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
:100644 100644 7c246a4... 8d0228c... M cache.h
:100644 100644 aa2ee46... 666a8ce... M entry.c
:100644 100644 93923db... 7a2219d... M unpack-trees.c
cache.h | 1 +
entry.c | 86 ++++++++++++++++++++++++++++++++++++++++++++------------
unpack-trees.c | 1 +
3 files changed, 70 insertions(+), 18 deletions(-)
diff --git a/cache.h b/cache.h
index 7c246a42df3d60ac2c0f7431ff29ee8fb70235ce..8d0228c857ab9d8e31585ad5aa6838403adef3a2 100644
--- a/cache.h
+++ b/cache.h
@@ -718,6 +718,7 @@ struct checkout {
refresh_cache:1;
};
+extern void clear_created_dirs_cache(void);
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
extern int has_symlink_leading_path(int len, const char *name);
diff --git a/entry.c b/entry.c
index aa2ee46a84033585d8e07a585610c5a697af82c2..666a8ce3a132e85a45b0828521f3c2119c77833e 100644
--- a/entry.c
+++ b/entry.c
@@ -1,33 +1,76 @@
#include "cache.h"
#include "blob.h"
-static void create_directories(const char *path, const struct checkout *state)
+static char dirs_path[PATH_MAX];
+static int dirs_len = 0;
+
+static inline int
+greatest_common_created_dirs_prefix(int len, const char *name)
{
- int len = strlen(path);
- char *buf = xmalloc(len + 1);
- const char *slash = path;
+ int max_len, match_len = 0, i = 0;
- while ((slash = strchr(slash+1, '/')) != NULL) {
- struct stat st;
- int stat_status;
+ max_len = len < dirs_len ? len : dirs_len;
+ while (i < max_len && name[i] == dirs_path[i]) {
+ if (name[i] == '/') match_len = i;
+ i++;
+ }
+ if (i == dirs_len && len > dirs_len && name[dirs_len] == '/')
+ match_len = dirs_len;
+ return match_len;
+}
+
+static inline void
+update_created_dirs_cache(int last_slash)
+{
+ if (last_slash > 0 && last_slash < PATH_MAX) {
+ dirs_len = last_slash;
+ } else {
+ dirs_len = 0;
+ }
+}
- len = slash - path;
- memcpy(buf, path, len);
- buf[len] = 0;
+void clear_created_dirs_cache(void)
+{
+ dirs_len = 0;
+}
+
+static void
+create_directories(int len, const char *path, const struct checkout *state)
+{
+ int i, max_len, last_slash, stat_status;
+ struct stat st;
+
+ /* Check the cache for previously checked or created
+ * directories (and components) within this function. There
+ * is no need to check or re-create directory components more
+ * than once!
+ */
+ max_len = len < PATH_MAX ? len : PATH_MAX;
+ i = last_slash = greatest_common_created_dirs_prefix(max_len, path);
- if (len <= state->base_dir_len)
+ while (i < max_len) {
+ do {
+ dirs_path[i] = path[i];
+ i++;
+ } while (i < max_len && path[i] != '/');
+ if (i >= max_len)
+ break;
+ last_slash = i;
+ dirs_path[last_slash] = '\0';
+
+ if (last_slash <= state->base_dir_len)
/*
* checkout-index --prefix=<dir>; <dir> is
* allowed to be a symlink to an existing
* directory.
*/
- stat_status = stat(buf, &st);
+ stat_status = stat(dirs_path, &st);
else
/*
* if there currently is a symlink, we would
* want to replace it with a real directory.
*/
- stat_status = lstat(buf, &st);
+ stat_status = lstat(dirs_path, &st);
if (!stat_status && S_ISDIR(st.st_mode))
continue; /* ok, it is already a directory. */
@@ -38,14 +81,14 @@ static void create_directories(const char *path, const struct checkout *state)
* error codepath; we do not care, as we unlink and
* mkdir again in such a case.
*/
- if (mkdir(buf, 0777)) {
+ if (mkdir(dirs_path, 0777)) {
if (errno == EEXIST && state->force &&
- !unlink(buf) && !mkdir(buf, 0777))
+ !unlink(dirs_path) && !mkdir(dirs_path, 0777))
continue;
- die("cannot create directory at %s", buf);
+ die("cannot create directory at %s", dirs_path);
}
}
- free(buf);
+ update_created_dirs_cache(last_slash);
}
static void remove_subtree(const char *path)
@@ -55,6 +98,11 @@ static void remove_subtree(const char *path)
char pathbuf[PATH_MAX];
char *name;
+ /* To be utterly safe we invalidate the cache of the
+ * previously created directories.
+ */
+ clear_created_dirs_cache();
+
if (!dir)
die("cannot opendir %s (%s)", path, strerror(errno));
strcpy(pathbuf, path);
@@ -195,12 +243,14 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *t
static char path[PATH_MAX + 1];
struct stat st;
int len = state->base_dir_len;
+ int path_len;
if (topath)
return write_entry(ce, topath, state, 1);
memcpy(path, state->base_dir, len);
strcpy(path + len, ce->name);
+ path_len = len + ce_namelen(ce);
if (!lstat(path, &st)) {
unsigned changed = ce_match_stat(ce, &st, CE_MATCH_IGNORE_VALID);
@@ -229,6 +279,6 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *t
return error("unable to unlink old '%s' (%s)", path, strerror(errno));
} else if (state->not_new)
return 0;
- create_directories(path, state);
+ create_directories(path_len, path, state);
return write_entry(ce, path, state, 0);
}
diff --git a/unpack-trees.c b/unpack-trees.c
index 93923dbbc6ab80deadfd737aa9975f6e5a4d1e89..7a2219d14d19b80e67c01d051e57c341f60f455c 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -121,6 +121,7 @@ static int check_updates(struct unpack_trees_options *o)
}
}
+ clear_created_dirs_cache();
for (i = 0; i < index->cache_nr; i++) {
struct cache_entry *ce = index->cache[i];
--
1.6.1.rc1.49.g7f705
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH/RFC 4/4] remove the old 'has_symlink_leading_path()' function
2009-01-05 13:09 [PATCH/RFC 0/4] git checkout: optimise away lots of lstat() calls Kjetil Barvik
` (2 preceding siblings ...)
2009-01-05 13:10 ` [PATCH/RFC 3/4] create_directories() inside entry.c: only check each directory once! Kjetil Barvik
@ 2009-01-05 13:10 ` Kjetil Barvik
3 siblings, 0 replies; 12+ messages in thread
From: Kjetil Barvik @ 2009-01-05 13:10 UTC (permalink / raw)
To: git; +Cc: Linus Torvalds, Junio C Hamano, Kjetil Barvik
It has been replaced by the more cache effective 'lstat_cache()'
function. see lstat_cache.c
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
---
:100644 100644 7449b10... edd4798... M Makefile
:100644 100644 8d0228c... dc5c383... M cache.h
:100644 000000 5a5e781... 0000000... D symlinks.c
Makefile | 1 -
cache.h | 1 -
symlinks.c | 64 ------------------------------------------------------------
3 files changed, 0 insertions(+), 66 deletions(-)
delete mode 100644 symlinks.c
diff --git a/Makefile b/Makefile
index 7449b105b03e862d53244d50ed035b4ddabef028..edd4798429ad3828f5b9dcff20f73c6a7269520e 100644
--- a/Makefile
+++ b/Makefile
@@ -483,7 +483,6 @@ LIB_OBJS += sha1_name.o
LIB_OBJS += shallow.o
LIB_OBJS += sideband.o
LIB_OBJS += strbuf.o
-LIB_OBJS += symlinks.o
LIB_OBJS += tag.o
LIB_OBJS += trace.o
LIB_OBJS += transport.o
diff --git a/cache.h b/cache.h
index 8d0228c857ab9d8e31585ad5aa6838403adef3a2..dc5c3833d789a6012f9ce05522c8840f01ddc027 100644
--- a/cache.h
+++ b/cache.h
@@ -720,7 +720,6 @@ struct checkout {
extern void clear_created_dirs_cache(void);
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
-extern int has_symlink_leading_path(int len, const char *name);
#define LSTAT_DIR (1u << 0)
#define LSTAT_NOTDIR (1u << 1)
diff --git a/symlinks.c b/symlinks.c
deleted file mode 100644
index 5a5e781a15d7d9cb60797958433eca896b31ec85..0000000000000000000000000000000000000000
--- a/symlinks.c
+++ /dev/null
@@ -1,64 +0,0 @@
-#include "cache.h"
-
-struct pathname {
- int len;
- char path[PATH_MAX];
-};
-
-/* Return matching pathname prefix length, or zero if not matching */
-static inline int match_pathname(int len, const char *name, struct pathname *match)
-{
- int match_len = match->len;
- return (len > match_len &&
- name[match_len] == '/' &&
- !memcmp(name, match->path, match_len)) ? match_len : 0;
-}
-
-static inline void set_pathname(int len, const char *name, struct pathname *match)
-{
- if (len < PATH_MAX) {
- match->len = len;
- memcpy(match->path, name, len);
- match->path[len] = 0;
- }
-}
-
-int has_symlink_leading_path(int len, const char *name)
-{
- static struct pathname link, nonlink;
- char path[PATH_MAX];
- struct stat st;
- char *sp;
- int known_dir;
-
- /*
- * See if the last known symlink cache matches.
- */
- if (match_pathname(len, name, &link))
- return 1;
-
- /*
- * Get rid of the last known directory part
- */
- known_dir = match_pathname(len, name, &nonlink);
-
- while ((sp = strchr(name + known_dir + 1, '/')) != NULL) {
- int thislen = sp - name ;
- memcpy(path, name, thislen);
- path[thislen] = 0;
-
- if (lstat(path, &st))
- return 0;
- if (S_ISDIR(st.st_mode)) {
- set_pathname(thislen, path, &nonlink);
- known_dir = thislen;
- continue;
- }
- if (S_ISLNK(st.st_mode)) {
- set_pathname(thislen, path, &link);
- return 1;
- }
- break;
- }
- return 0;
-}
--
1.6.1.rc1.49.g7f705
^ permalink raw reply related [flat|nested] 12+ messages in thread