From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH v3 13/13] read_directory: calculate name hashes incrementally
Date: Tue, 12 Mar 2013 20:05:00 +0700 [thread overview]
Message-ID: <1363093500-16796-14-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1363093500-16796-1-git-send-email-pclouds@gmail.com>
Instead of index_name_exists() calculating a hash for full pathname
for every entry, we calculate partial hash per directory, use it as a
seed. The number of characters that icase_hash has to chew will
reduce.
treat_leading_path: 0.000 0.000
read_directory: 1.296 1.235
+treat_one_path: 0.599 0.531
++is_excluded: 0.102 0.102
+++prep_exclude: 0.040 0.040
+++matching: 0.035 0.035
++dir_exist: 0.035 0.035
++index_name_exists: 0.292 0.225
lazy_init_name_hash: 0.155 0.155
+simplify_away: 0.082 0.083
+dir_add_name: 0.000 0.000
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
dir.c | 44 +++++++++++++++++++++++++++++++++-----------
1 file changed, 33 insertions(+), 11 deletions(-)
diff --git a/dir.c b/dir.c
index 5fda5af..8638dcd 100644
--- a/dir.c
+++ b/dir.c
@@ -46,6 +46,7 @@ struct path_simplify {
static void read_directory_recursive(struct dir_struct *dir,
const char *path, int len,
+ unsigned int hash,
int check_only,
const struct path_simplify *simplify,
int *contents);
@@ -1044,12 +1045,17 @@ static struct dir_entry *dir_entry_new(const char *pathname, int len)
return ent;
}
-static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
+static struct dir_entry *dir_add_name(struct dir_struct *dir,
+ const char *pathname, int len,
+ unsigned int hash, int baselen)
{
if (!(dir->flags & DIR_SHOW_IGNORED)) {
struct cache_entry *ce;
START_CLOCK();
- ce = cache_name_exists(pathname, len, ignore_case);
+ ce = index_name_exists_base(&the_index,
+ hash, baselen,
+ pathname, len,
+ ignore_case);
STOP_CLOCK(tv_index_name_exists);
if (ce)
return NULL;
@@ -1225,7 +1231,9 @@ static enum directory_treatment treat_directory(struct dir_struct *dir,
if ((dir->flags & DIR_SHOW_IGNORED) && !exclude) {
dir->flags &= ~DIR_SHOW_IGNORED;
dir->flags |= DIR_HIDE_EMPTY_DIRECTORIES;
- read_directory_recursive(dir, dirname, len, 1, simplify, &contents);
+ read_directory_recursive(dir, dirname, len,
+ hash_name(dirname, len),
+ 1, simplify, &contents);
dir->flags &= ~DIR_HIDE_EMPTY_DIRECTORIES;
dir->flags |= DIR_SHOW_IGNORED;
@@ -1234,7 +1242,9 @@ static enum directory_treatment treat_directory(struct dir_struct *dir,
if (!(dir->flags & DIR_SHOW_IGNORED) &&
!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
return show_directory;
- read_directory_recursive(dir, dirname, len, 1, simplify, &contents);
+ read_directory_recursive(dir, dirname, len,
+ hash_name(dirname, len),
+ 1, simplify, &contents);
if (!contents)
return ignore_directory;
return show_directory;
@@ -1401,6 +1411,8 @@ enum path_treatment {
static enum path_treatment treat_one_path(struct dir_struct *dir,
struct strbuf *path,
+ unsigned int hash,
+ int baselen,
const struct path_simplify *simplify,
int dtype, struct dirent *de,
int exclude_shortcut_ok)
@@ -1416,7 +1428,8 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
dtype != DT_DIR) {
struct cache_entry *ce;
START_CLOCK();
- ce = cache_name_exists(path->buf, path->len, ignore_case);
+ ce = index_name_exists_base(&the_index, hash, baselen,
+ path->buf, path->len, ignore_case);
STOP_CLOCK(tv_index_name_exists);
if (ce)
return path_ignored;
@@ -1467,6 +1480,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
static enum path_treatment treat_path(struct dir_struct *dir,
struct dirent *de,
struct strbuf *path,
+ unsigned int hash,
int baselen,
const struct path_simplify *simplify,
int exclude_shortcut_ok)
@@ -1485,7 +1499,8 @@ static enum path_treatment treat_path(struct dir_struct *dir,
dtype = DTYPE(de);
START_CLOCK();
- ret = treat_one_path(dir, path, simplify, dtype, de, exclude_shortcut_ok);
+ ret = treat_one_path(dir, path, hash, baselen,
+ simplify, dtype, de, exclude_shortcut_ok);
STOP_CLOCK(tv_treat_one_path);
return ret;
}
@@ -1501,6 +1516,7 @@ static enum path_treatment treat_path(struct dir_struct *dir,
*/
static void read_directory_recursive(struct dir_struct *dir,
const char *base, int baselen,
+ unsigned int hash,
int check_only,
const struct path_simplify *simplify,
int *contents)
@@ -1517,12 +1533,16 @@ static void read_directory_recursive(struct dir_struct *dir,
dir->exclude_prepared = 0;
while ((de = readdir(fdir)) != NULL) {
- switch (treat_path(dir, de, &path, baselen,
+ switch (treat_path(dir, de, &path, hash, baselen,
simplify,
!check_only && !contents)) {
case path_recurse:
read_directory_recursive(dir, path.buf,
- path.len, 0,
+ path.len,
+ hash_name_from(hash,
+ path.buf + baselen,
+ path.len - baselen),
+ 0,
simplify,
contents);
continue;
@@ -1543,7 +1563,7 @@ static void read_directory_recursive(struct dir_struct *dir,
if (check_only)
break;
START_CLOCK();
- dir_add_name(dir, path.buf, path.len);
+ dir_add_name(dir, path.buf, path.len, hash, baselen);
STOP_CLOCK(tv_dir_add_name);
}
closedir(fdir);
@@ -1619,7 +1639,7 @@ static int treat_leading_path(struct dir_struct *dir,
if (simplify_away(sb.buf, sb.len, simplify))
break;
dir->exclude_prepared = 0;
- if (treat_one_path(dir, &sb, simplify,
+ if (treat_one_path(dir, &sb, 0, 0, simplify,
DT_DIR, NULL, 0) == path_ignored)
break; /* do not recurse into it */
if (len <= baselen) {
@@ -1648,7 +1668,9 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const char
STOP_CLOCK(tv_lazy_init_name_hash);
#endif
START_CLOCK();
- read_directory_recursive(dir, path, len, 0, simplify, NULL);
+ read_directory_recursive(dir, path, len,
+ hash_name(path, len),
+ 0, simplify, NULL);
STOP_CLOCK(tv_read_directory);
}
#ifdef MEASURE_EXCLUDE
--
1.8.1.2.536.gf441e6d
next prev parent reply other threads:[~2013-03-12 13:07 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-03-09 4:09 [PATCH 0/3] Trivial (and small) exclude optimizations Nguyễn Thái Ngọc Duy
2013-03-09 4:09 ` [PATCH 1/3] match_pathname: avoid calling strncmp if baselen is 0 Nguyễn Thái Ngọc Duy
2013-03-09 9:06 ` Antoine Pelisse
2013-03-09 4:09 ` [PATCH 2/3] dir.c: inline convenient *_icase helpers Nguyễn Thái Ngọc Duy
2013-03-09 4:09 ` [PATCH 3/3] match_basename: use strncmp instead of strcmp Nguyễn Thái Ngọc Duy
2013-03-09 7:50 ` Junio C Hamano
2013-03-09 8:47 ` Fredrik Gustafsson
2013-03-09 9:58 ` Duy Nguyen
2013-03-10 6:14 ` [PATCH v2 0/6] Exclude optimizations Nguyễn Thái Ngọc Duy
2013-03-10 6:14 ` [PATCH v2 1/6] match_pathname: avoid calling strncmp if baselen is 0 Nguyễn Thái Ngọc Duy
2013-03-10 6:14 ` [PATCH v2 2/6] dir.c: inline convenient *_icase helpers Nguyễn Thái Ngọc Duy
2013-03-10 6:14 ` [PATCH v2 3/6] match_basename: use strncmp instead of strcmp Nguyễn Thái Ngọc Duy
2013-03-10 7:34 ` Junio C Hamano
2013-03-10 10:38 ` Duy Nguyen
2013-03-10 11:43 ` Antoine Pelisse
2013-03-10 11:54 ` Antoine Pelisse
2013-03-10 12:06 ` Duy Nguyen
2013-03-10 12:11 ` Antoine Pelisse
2013-03-10 12:14 ` Duy Nguyen
2013-03-12 20:59 ` Junio C Hamano
2013-03-13 1:11 ` Duy Nguyen
2013-03-10 6:14 ` [PATCH v2 4/6] match_{base,path}name: replace strncmp_icase with strnequal_icase Nguyễn Thái Ngọc Duy
2013-03-10 6:14 ` [PATCH v2 5/6] dir.c: pass pathname length to last_exclude_matching Nguyễn Thái Ngọc Duy
2013-03-10 6:14 ` [PATCH v2 6/6] exclude: filter patterns by directory level Nguyễn Thái Ngọc Duy
2013-03-10 8:20 ` Junio C Hamano
2013-03-10 10:18 ` Duy Nguyen
2013-03-10 10:58 ` Junio C Hamano
2013-03-10 11:14 ` Duy Nguyen
2013-03-11 15:11 ` [PATCH v2 0/6] Exclude optimizations Duy Nguyen
2013-03-12 13:04 ` [PATCH v3 00/13] " Nguyễn Thái Ngọc Duy
2013-03-12 13:04 ` [PATCH v3 01/13] dir.c: add MEASURE_EXCLUDE code for tracking exclude performance Nguyễn Thái Ngọc Duy
2013-03-12 13:04 ` [PATCH v3 02/13] match_pathname: avoid calling strncmp if baselen is 0 Nguyễn Thái Ngọc Duy
2013-03-12 13:04 ` [PATCH v3 03/13] dir.c: inline convenient *_icase helpers Nguyễn Thái Ngọc Duy
2013-03-12 13:04 ` [PATCH v3 04/13] match_basename: use strncmp instead of strcmp Nguyễn Thái Ngọc Duy
2013-03-12 17:40 ` Antoine Pelisse
2013-03-13 1:05 ` Duy Nguyen
2013-03-12 13:04 ` [PATCH v3 05/13] match_{base,path}name: replace strncmp_icase with memequal_icase Nguyễn Thái Ngọc Duy
2013-03-13 1:14 ` Duy Nguyen
2013-03-12 13:04 ` [PATCH v3 06/13] dir: pass pathname length to last_exclude_matching Nguyễn Thái Ngọc Duy
2013-03-12 13:04 ` [PATCH v3 07/13] exclude: avoid calling prep_exclude on entries of the same directory Nguyễn Thái Ngọc Duy
2013-03-12 13:04 ` [PATCH v3 08/13] exclude: record baselen in the pattern Nguyễn Thái Ngọc Duy
2013-03-12 13:04 ` [PATCH v3 09/13] exclude: filter out patterns not applicable to the current directory Nguyễn Thái Ngọc Duy
2013-03-12 23:13 ` Eric Sunshine
2013-03-12 13:04 ` [PATCH v3 10/13] read_directory: avoid invoking exclude machinery on tracked files Nguyễn Thái Ngọc Duy
2013-03-12 13:04 ` [PATCH v3 11/13] Preallocate hash tables when the number of inserts are known in advance Nguyễn Thái Ngọc Duy
2013-03-12 13:04 ` [PATCH v3 12/13] name-hash: allow to lookup a name with precalculated base hash Nguyễn Thái Ngọc Duy
2013-03-12 13:05 ` Nguyễn Thái Ngọc Duy [this message]
2013-03-14 13:05 ` [PATCH v3 00/13] Exclude optimizations Duy Nguyen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1363093500-16796-14-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.