git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH v3 13/13] read_directory: calculate name hashes incrementally
Date: Tue, 12 Mar 2013 20:05:00 +0700	[thread overview]
Message-ID: <1363093500-16796-14-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1363093500-16796-1-git-send-email-pclouds@gmail.com>

Instead of index_name_exists() calculating a hash for full pathname
for every entry, we calculate partial hash per directory, use it as a
seed. The number of characters that icase_hash has to chew will
reduce.

treat_leading_path:   0.000  0.000
read_directory:       1.296  1.235
+treat_one_path:      0.599  0.531
++is_excluded:        0.102  0.102
+++prep_exclude:      0.040  0.040
+++matching:          0.035  0.035
++dir_exist:          0.035  0.035
++index_name_exists:  0.292  0.225
lazy_init_name_hash:  0.155  0.155
+simplify_away:       0.082  0.083
+dir_add_name:        0.000  0.000

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 44 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/dir.c b/dir.c
index 5fda5af..8638dcd 100644
--- a/dir.c
+++ b/dir.c
@@ -46,6 +46,7 @@ struct path_simplify {
 
 static void read_directory_recursive(struct dir_struct *dir,
 				     const char *path, int len,
+				     unsigned int hash,
 				     int check_only,
 				     const struct path_simplify *simplify,
 				     int *contents);
@@ -1044,12 +1045,17 @@ static struct dir_entry *dir_entry_new(const char *pathname, int len)
 	return ent;
 }
 
-static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
+static struct dir_entry *dir_add_name(struct dir_struct *dir,
+				      const char *pathname, int len,
+				      unsigned int hash, int baselen)
 {
 	if (!(dir->flags & DIR_SHOW_IGNORED)) {
 		struct cache_entry *ce;
 		START_CLOCK();
-		ce = cache_name_exists(pathname, len, ignore_case);
+		ce = index_name_exists_base(&the_index,
+					    hash, baselen,
+					    pathname, len,
+					    ignore_case);
 		STOP_CLOCK(tv_index_name_exists);
 		if (ce)
 			return NULL;
@@ -1225,7 +1231,9 @@ static enum directory_treatment treat_directory(struct dir_struct *dir,
 	if ((dir->flags & DIR_SHOW_IGNORED) && !exclude) {
 		dir->flags &= ~DIR_SHOW_IGNORED;
 		dir->flags |= DIR_HIDE_EMPTY_DIRECTORIES;
-		read_directory_recursive(dir, dirname, len, 1, simplify, &contents);
+		read_directory_recursive(dir, dirname, len,
+					 hash_name(dirname, len),
+					 1, simplify, &contents);
 		dir->flags &= ~DIR_HIDE_EMPTY_DIRECTORIES;
 		dir->flags |= DIR_SHOW_IGNORED;
 
@@ -1234,7 +1242,9 @@ static enum directory_treatment treat_directory(struct dir_struct *dir,
 	if (!(dir->flags & DIR_SHOW_IGNORED) &&
 	    !(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
 		return show_directory;
-	read_directory_recursive(dir, dirname, len, 1, simplify, &contents);
+	read_directory_recursive(dir, dirname, len,
+				 hash_name(dirname, len),
+				 1, simplify, &contents);
 	if (!contents)
 		return ignore_directory;
 	return show_directory;
@@ -1401,6 +1411,8 @@ enum path_treatment {
 
 static enum path_treatment treat_one_path(struct dir_struct *dir,
 					  struct strbuf *path,
+					  unsigned int hash,
+					  int baselen,
 					  const struct path_simplify *simplify,
 					  int dtype, struct dirent *de,
 					  int exclude_shortcut_ok)
@@ -1416,7 +1428,8 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 	    dtype != DT_DIR) {
 		struct cache_entry *ce;
 		START_CLOCK();
-		ce = cache_name_exists(path->buf, path->len, ignore_case);
+		ce = index_name_exists_base(&the_index, hash, baselen,
+					    path->buf, path->len, ignore_case);
 		STOP_CLOCK(tv_index_name_exists);
 		if (ce)
 			return path_ignored;
@@ -1467,6 +1480,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 static enum path_treatment treat_path(struct dir_struct *dir,
 				      struct dirent *de,
 				      struct strbuf *path,
+				      unsigned int hash,
 				      int baselen,
 				      const struct path_simplify *simplify,
 				      int exclude_shortcut_ok)
@@ -1485,7 +1499,8 @@ static enum path_treatment treat_path(struct dir_struct *dir,
 
 	dtype = DTYPE(de);
 	START_CLOCK();
-	ret = treat_one_path(dir, path, simplify, dtype, de, exclude_shortcut_ok);
+	ret = treat_one_path(dir, path, hash, baselen,
+			     simplify, dtype, de, exclude_shortcut_ok);
 	STOP_CLOCK(tv_treat_one_path);
 	return ret;
 }
@@ -1501,6 +1516,7 @@ static enum path_treatment treat_path(struct dir_struct *dir,
  */
 static void read_directory_recursive(struct dir_struct *dir,
 				     const char *base, int baselen,
+				     unsigned int hash,
 				     int check_only,
 				     const struct path_simplify *simplify,
 				     int *contents)
@@ -1517,12 +1533,16 @@ static void read_directory_recursive(struct dir_struct *dir,
 
 	dir->exclude_prepared = 0;
 	while ((de = readdir(fdir)) != NULL) {
-		switch (treat_path(dir, de, &path, baselen,
+		switch (treat_path(dir, de, &path, hash, baselen,
 				   simplify,
 				   !check_only && !contents)) {
 		case path_recurse:
 			read_directory_recursive(dir, path.buf,
-						 path.len, 0,
+						 path.len,
+						 hash_name_from(hash,
+								path.buf + baselen,
+								path.len - baselen),
+						 0,
 						 simplify,
 						 contents);
 			continue;
@@ -1543,7 +1563,7 @@ static void read_directory_recursive(struct dir_struct *dir,
 		if (check_only)
 			break;
 		START_CLOCK();
-		dir_add_name(dir, path.buf, path.len);
+		dir_add_name(dir, path.buf, path.len, hash, baselen);
 		STOP_CLOCK(tv_dir_add_name);
 	}
 	closedir(fdir);
@@ -1619,7 +1639,7 @@ static int treat_leading_path(struct dir_struct *dir,
 		if (simplify_away(sb.buf, sb.len, simplify))
 			break;
 		dir->exclude_prepared = 0;
-		if (treat_one_path(dir, &sb, simplify,
+		if (treat_one_path(dir, &sb, 0, 0, simplify,
 				   DT_DIR, NULL, 0) == path_ignored)
 			break; /* do not recurse into it */
 		if (len <= baselen) {
@@ -1648,7 +1668,9 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const char
 		STOP_CLOCK(tv_lazy_init_name_hash);
 #endif
 		START_CLOCK();
-		read_directory_recursive(dir, path, len, 0, simplify, NULL);
+		read_directory_recursive(dir, path, len,
+					 hash_name(path, len),
+					 0, simplify, NULL);
 		STOP_CLOCK(tv_read_directory);
 	}
 #ifdef MEASURE_EXCLUDE
-- 
1.8.1.2.536.gf441e6d

  parent reply	other threads:[~2013-03-12 13:07 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-09  4:09 [PATCH 0/3] Trivial (and small) exclude optimizations Nguyễn Thái Ngọc Duy
2013-03-09  4:09 ` [PATCH 1/3] match_pathname: avoid calling strncmp if baselen is 0 Nguyễn Thái Ngọc Duy
2013-03-09  9:06   ` Antoine Pelisse
2013-03-09  4:09 ` [PATCH 2/3] dir.c: inline convenient *_icase helpers Nguyễn Thái Ngọc Duy
2013-03-09  4:09 ` [PATCH 3/3] match_basename: use strncmp instead of strcmp Nguyễn Thái Ngọc Duy
2013-03-09  7:50   ` Junio C Hamano
2013-03-09  8:47     ` Fredrik Gustafsson
2013-03-09  9:58     ` Duy Nguyen
2013-03-10  6:14 ` [PATCH v2 0/6] Exclude optimizations Nguyễn Thái Ngọc Duy
2013-03-10  6:14   ` [PATCH v2 1/6] match_pathname: avoid calling strncmp if baselen is 0 Nguyễn Thái Ngọc Duy
2013-03-10  6:14   ` [PATCH v2 2/6] dir.c: inline convenient *_icase helpers Nguyễn Thái Ngọc Duy
2013-03-10  6:14   ` [PATCH v2 3/6] match_basename: use strncmp instead of strcmp Nguyễn Thái Ngọc Duy
2013-03-10  7:34     ` Junio C Hamano
2013-03-10 10:38       ` Duy Nguyen
2013-03-10 11:43         ` Antoine Pelisse
2013-03-10 11:54           ` Antoine Pelisse
2013-03-10 12:06             ` Duy Nguyen
2013-03-10 12:11               ` Antoine Pelisse
2013-03-10 12:14                 ` Duy Nguyen
2013-03-12 20:59         ` Junio C Hamano
2013-03-13  1:11           ` Duy Nguyen
2013-03-10  6:14   ` [PATCH v2 4/6] match_{base,path}name: replace strncmp_icase with strnequal_icase Nguyễn Thái Ngọc Duy
2013-03-10  6:14   ` [PATCH v2 5/6] dir.c: pass pathname length to last_exclude_matching Nguyễn Thái Ngọc Duy
2013-03-10  6:14   ` [PATCH v2 6/6] exclude: filter patterns by directory level Nguyễn Thái Ngọc Duy
2013-03-10  8:20     ` Junio C Hamano
2013-03-10 10:18       ` Duy Nguyen
2013-03-10 10:58       ` Junio C Hamano
2013-03-10 11:14         ` Duy Nguyen
2013-03-11 15:11   ` [PATCH v2 0/6] Exclude optimizations Duy Nguyen
2013-03-12 13:04   ` [PATCH v3 00/13] " Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 01/13] dir.c: add MEASURE_EXCLUDE code for tracking exclude performance Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 02/13] match_pathname: avoid calling strncmp if baselen is 0 Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 03/13] dir.c: inline convenient *_icase helpers Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 04/13] match_basename: use strncmp instead of strcmp Nguyễn Thái Ngọc Duy
2013-03-12 17:40       ` Antoine Pelisse
2013-03-13  1:05         ` Duy Nguyen
2013-03-12 13:04     ` [PATCH v3 05/13] match_{base,path}name: replace strncmp_icase with memequal_icase Nguyễn Thái Ngọc Duy
2013-03-13  1:14       ` Duy Nguyen
2013-03-12 13:04     ` [PATCH v3 06/13] dir: pass pathname length to last_exclude_matching Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 07/13] exclude: avoid calling prep_exclude on entries of the same directory Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 08/13] exclude: record baselen in the pattern Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 09/13] exclude: filter out patterns not applicable to the current directory Nguyễn Thái Ngọc Duy
2013-03-12 23:13       ` Eric Sunshine
2013-03-12 13:04     ` [PATCH v3 10/13] read_directory: avoid invoking exclude machinery on tracked files Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 11/13] Preallocate hash tables when the number of inserts are known in advance Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 12/13] name-hash: allow to lookup a name with precalculated base hash Nguyễn Thái Ngọc Duy
2013-03-12 13:05     ` Nguyễn Thái Ngọc Duy [this message]
2013-03-14 13:05     ` [PATCH v3 00/13] Exclude optimizations Duy Nguyen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1363093500-16796-14-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).