From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 06/19] untracked cache: record/validate dir mtime and reuse cached output
Date: Mon, 27 Oct 2014 19:10:33 +0700 [thread overview]
Message-ID: <1414411846-4450-7-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1414411846-4450-1-git-send-email-pclouds@gmail.com>
The main readdir loop in read_directory_recursive() is replaced with a
new one that checks if cached results of a directory is still valid.
If a file is added or removed from the index, the containing directory
is invalidated (but not its subdirs). If directory's mtime is changed,
the same happens. If a .gitignore is updated, the containing directory
and all subdirs are invalidated recursively. If dir_struct#flags or
other conditions change, the cache is ignored.
If a directory is invalidated, we opendir/readdir/closedir and run the
exclude machinery on that directory listing as usual. If untracked
cache is also enabled, we'll update the cache along the way. If a
directory is validated, we simply pull the untracked listing out from
the cache. The cache also records the list of direct subdirs that we
have to recurse in. Fully excluded directories are seen as "untracked
files".
In the best case when no dirs are invalidated, read_directory()
becomes a series of
stat(dir), open(.gitignore), fstat(), read(), close() and optionally
hash_sha1_file()
For comparison, standard read_directory() is a sequence of
opendir(), readdir(), open(.gitignore), fstat(), read(), close(), the
expensive last_exclude_matching() and closedir().
We already try not to open(.gitignore) if we know it does not exist,
so open/fstat/read/close sequence does not apply to every
directory. The sequence could be reduced further, as noted in
prep_exclude() in another patch. So in theory, the entire best-case
read_directory sequence could be reduced to a series of stat() and
nothing else.
This is not a silver bullet approach. When you compile a C file, for
example, the old .o file is removed and a new one with the same name
created, effectively invalidating the containing directory's cache
(but not its subdirectories). If your build process touches every
directory, this cache adds extra overhead for nothing, so it's a good
idea to separate generated files from tracked files.. Editors may use
the same strategy for saving files. And of course you're out of luck
running your repo on an unsupported filesytem and/or operating system.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
dir.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
dir.h | 2 ++
2 files changed, 123 insertions(+), 2 deletions(-)
diff --git a/dir.c b/dir.c
index 2793e57..55780a7 100644
--- a/dir.c
+++ b/dir.c
@@ -37,7 +37,12 @@ enum path_treatment {
struct cached_dir {
DIR *fdir;
struct untracked_cache_dir *untracked;
+ int nr_files;
+ int nr_dirs;
+
struct dirent *de;
+ const char *file;
+ struct untracked_cache_dir *ucd;
};
static enum path_treatment read_directory_recursive(struct dir_struct *dir,
@@ -606,6 +611,14 @@ static void invalidate_gitignore(struct untracked_cache *uc,
do_invalidate_gitignore(dir);
}
+static void invalidate_directory(struct untracked_cache *uc,
+ struct untracked_cache_dir *dir)
+{
+ uc->dir_invalidated++;
+ dir->valid = 0;
+ dir->untracked_nr = 0;
+}
+
/*
* Given a file with name "fname", read it (either from disk, or from
* the index if "check_index" is non-zero), parse it and store the
@@ -1418,6 +1431,41 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
}
}
+static enum path_treatment treat_path_fast(struct dir_struct *dir,
+ struct untracked_cache_dir *untracked,
+ struct cached_dir *cdir,
+ struct strbuf *path,
+ int baselen,
+ const struct path_simplify *simplify)
+{
+ if (!cdir->ucd) {
+ strbuf_setlen(path, baselen);
+ strbuf_addstr(path, cdir->file);
+ return path_untracked;
+ }
+ strbuf_setlen(path, baselen);
+ strbuf_addstr(path, cdir->ucd->name);
+ /* treat_one_path() does this before it calls treat_directory() */
+ if (path->buf[path->len - 1] != '/')
+ strbuf_addch(path, '/');
+ if (cdir->ucd->check_only)
+ /*
+ * check_only is set as a result of treat_directory() getting
+ * to its bottom. Verify again the same set of directories
+ * with check_only set.
+ */
+ return read_directory_recursive(dir, path->buf, path->len,
+
+ cdir->ucd, 1, simplify);
+ /*
+ * We get path_recurse in the first run when
+ * directory_exists_in_index() returns index_nonexistent. We
+ * are sure that new changes in the index does not impact the
+ * outcome. Return now.
+ */
+ return path_recurse;
+}
+
static enum path_treatment treat_path(struct dir_struct *dir,
struct untracked_cache_dir *untracked,
struct cached_dir *cdir,
@@ -1428,6 +1476,9 @@ static enum path_treatment treat_path(struct dir_struct *dir,
int dtype;
struct dirent *de = cdir->de;
+ if (!de)
+ return treat_path_fast(dir, untracked, cdir, path,
+ baselen, simplify);
if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
return path_none;
strbuf_setlen(path, baselen);
@@ -1448,6 +1499,52 @@ static void add_untracked(struct untracked_cache_dir *dir, const char *name)
dir->untracked[dir->untracked_nr++] = xstrdup(name);
}
+static int valid_cached_dir(struct dir_struct *dir,
+ struct untracked_cache_dir *untracked,
+ struct strbuf *path,
+ int check_only)
+{
+ struct stat st;
+
+ if (!untracked)
+ return 0;
+
+ if (stat(path->len ? path->buf : ".", &st)) {
+ invalidate_directory(dir->untracked, untracked);
+ memset(&untracked->stat_data, 0, sizeof(untracked->stat_data));
+ return 0;
+ }
+ if (!untracked->valid ||
+ match_stat_data(&untracked->stat_data, &st)) {
+ if (untracked->valid)
+ invalidate_directory(dir->untracked, untracked);
+ fill_stat_data(&untracked->stat_data, &st);
+ return 0;
+ }
+
+ if (untracked->check_only != !!check_only) {
+ invalidate_directory(dir->untracked, untracked);
+ return 0;
+ }
+
+ /*
+ * prep_exclude will be called eventually on this directory,
+ * but it's called much later in last_exclude_matching(). We
+ * need it now to determine the validity of the cache for this
+ * path. The next calls will be nearly no-op, the way
+ * prep_exclude() is designed.
+ */
+ if (path->len && path->buf[path->len - 1] != '/') {
+ strbuf_addch(path, '/');
+ prep_exclude(dir, path->buf, path->len);
+ strbuf_setlen(path, path->len - 1);
+ } else
+ prep_exclude(dir, path->buf, path->len);
+
+ /* hopefully prep_exclude() haven't invalidated this entry... */
+ return untracked->valid;
+}
+
static int open_cached_dir(struct cached_dir *cdir,
struct dir_struct *dir,
struct untracked_cache_dir *untracked,
@@ -1456,7 +1553,11 @@ static int open_cached_dir(struct cached_dir *cdir,
{
memset(cdir, 0, sizeof(*cdir));
cdir->untracked = untracked;
+ if (valid_cached_dir(dir, untracked, path, check_only))
+ return 0;
cdir->fdir = opendir(path->len ? path->buf : ".");
+ if (dir->untracked)
+ dir->untracked->dir_opened++;
if (!cdir->fdir)
return -1;
return 0;
@@ -1470,6 +1571,18 @@ int read_cached_dir(struct cached_dir *cdir)
return -1;
return 0;
}
+ while (cdir->nr_dirs < cdir->untracked->dirs_nr) {
+ struct untracked_cache_dir *d = cdir->untracked->dirs[cdir->nr_dirs];
+ cdir->ucd = d;
+ cdir->nr_dirs++;
+ return 0;
+ }
+ cdir->ucd = NULL;
+ if (cdir->nr_files < cdir->untracked->untracked_nr) {
+ struct untracked_cache_dir *d = cdir->untracked;
+ cdir->file = d->untracked[cdir->nr_files++];
+ return 0;
+ }
return -1;
}
@@ -1477,6 +1590,12 @@ static void close_cached_dir(struct cached_dir *cdir)
{
if (cdir->fdir)
closedir(cdir->fdir);
+ /*
+ * We have gone through this directory and found no untracked
+ * entries. Mark it valid.
+ */
+ if (cdir->untracked && !cdir->untracked->valid)
+ cdir->untracked->valid = 1;
}
/*
@@ -1530,7 +1649,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
if (check_only) {
/* abort early if maximum state has been reached */
if (dir_state == path_untracked) {
- if (untracked)
+ if (cdir.fdir)
add_untracked(untracked, path.buf + baselen);
break;
}
@@ -1554,7 +1673,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
if (dir->flags & DIR_SHOW_IGNORED)
break;
dir_add_name(dir, path.buf, path.len);
- if (untracked)
+ if (cdir.fdir)
add_untracked(untracked, path.buf + baselen);
break;
diff --git a/dir.h b/dir.h
index 35701b2..1fefd4e 100644
--- a/dir.h
+++ b/dir.h
@@ -134,6 +134,8 @@ struct untracked_cache {
/* Statistics */
int dir_created;
int gitignore_invalidated;
+ int dir_invalidated;
+ int dir_opened;
};
struct dir_struct {
--
2.1.0.rc0.78.gc0d8480
next prev parent reply other threads:[~2014-10-27 12:11 UTC|newest]
Thread overview: 65+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 01/19] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
2014-10-27 22:46 ` Junio C Hamano
2014-10-28 0:12 ` Duy Nguyen
2014-10-28 17:37 ` Torsten Bögershausen
2014-11-02 1:25 ` Duy Nguyen
2014-10-27 12:10 ` [PATCH 02/19] untracked cache: record .gitignore information and dir hierarchy Nguyễn Thái Ngọc Duy
2014-10-28 17:37 ` Torsten Bögershausen
2014-10-27 12:10 ` [PATCH 03/19] untracked cache: initial untracked cache validation Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 04/19] untracked cache: invalidate dirs recursively if .gitignore changes Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 05/19] untracked cache: make a wrapper around {open,read,close}dir() Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy [this message]
2014-10-30 16:19 ` [PATCH 06/19] untracked cache: record/validate dir mtime and reuse cached output Eric Sunshine
2014-10-27 12:10 ` [PATCH 07/19] untracked cache: mark what dirs should be recursed/saved Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 08/19] untracked cache: don't open non-existent .gitignore Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 09/19] untracked cache: save to an index extension Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 10/19] untracked cache: load from UNTR " Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 11/19] untracked cache: invalidate at index addition or removal Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 12/19] read-cache.c: split racy stat test to a separate function Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 13/19] untracked cache: avoid racy timestamps Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 14/19] untracked cache: print stats with $GIT_TRACE_UNTRACKED_STATS Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 15/19] untracked cache: mark index dirty if untracked cache is updated Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 16/19] status: enable untracked cache Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 17/19] update-index: manually enable or disable " Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 18/19] update-index: test the system before enabling " Nguyễn Thái Ngọc Duy
2014-10-28 17:37 ` Torsten Bögershausen
2014-11-03 12:16 ` Duy Nguyen
2014-11-03 18:09 ` Junio C Hamano
2014-10-28 23:25 ` Eric Sunshine
2014-10-27 12:10 ` [PATCH 19/19] t7063: tests for " Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 01/22] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
2014-11-17 19:31 ` David Turner
2014-11-08 9:39 ` [PATCH v2 02/22] untracked cache: record .gitignore information and dir hierarchy Nguyễn Thái Ngọc Duy
2014-11-08 17:08 ` brian m. carlson
2014-11-17 20:35 ` David Turner
2014-11-08 9:39 ` [PATCH v2 03/22] untracked cache: initial untracked cache validation Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 04/22] untracked cache: invalidate dirs recursively if .gitignore changes Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 05/22] untracked cache: make a wrapper around {open,read,close}dir() Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 06/22] untracked cache: record/validate dir mtime and reuse cached output Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 07/22] untracked cache: mark what dirs should be recursed/saved Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 08/22] untracked cache: don't open non-existent .gitignore Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 09/22] untracked cache: save to an index extension Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 10/22] untracked cache: load from UNTR " Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 11/22] untracked cache: invalidate at index addition or removal Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 12/22] read-cache.c: split racy stat test to a separate function Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 13/22] untracked cache: avoid racy timestamps Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 14/22] untracked cache: print stats with $GIT_TRACE_UNTRACKED_STATS Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 15/22] untracked cache: mark index dirty if untracked cache is updated Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 16/22] untracked-cache: temporarily disable with $GIT_DISABLE_UNTRACKED_CACHE Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 17/22] status: enable untracked cache Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 18/22] update-index: manually enable or disable " Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 19/22] update-index: test the system before enabling " Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 20/22] t7063: tests for " Nguyễn Thái Ngọc Duy
2014-11-08 9:39 ` [PATCH v2 21/22] mingw32: add uname() Nguyễn Thái Ngọc Duy
2014-11-09 3:32 ` Eric Sunshine
2014-11-09 8:36 ` Duy Nguyen
2014-11-09 11:46 ` Torsten Bögershausen
2014-11-09 18:47 ` Junio C Hamano
2014-11-08 9:39 ` [PATCH v2 22/22] untracked cache: guard and disable on system changes Nguyễn Thái Ngọc Duy
2014-11-09 3:39 ` Eric Sunshine
2014-11-09 8:34 ` Duy Nguyen
2014-11-09 21:39 ` Torsten Bögershausen
2014-11-09 23:47 ` Duy Nguyen
2014-11-10 20:48 ` Torsten Bögershausen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1414411846-4450-7-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).