git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH v2 6/6] exclude: filter patterns by directory level
Date: Sun, 10 Mar 2013 13:14:30 +0700	[thread overview]
Message-ID: <1362896070-17456-7-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1362896070-17456-1-git-send-email-pclouds@gmail.com>

A non-basename pattern that does not contain /**/ can't match anything
outside the attached directory. Record its directory level and avoid
matching unless the pathname is also at the same directory level.

This optimization shines when there are a lot of non-basename patterns
are the root .gitignore and big/deep worktree. Due to the cascading
rule of .gitignore, patterns in the root .gitignore are checked for
_all_ entries in the worktree.

        before      after
user    0m0.424s    0m0.365s
user    0m0.427s    0m0.366s
user    0m0.432s    0m0.374s
user    0m0.435s    0m0.374s
user    0m0.435s    0m0.377s
user    0m0.437s    0m0.381s
user    0m0.439s    0m0.381s
user    0m0.440s    0m0.383s
user    0m0.450s    0m0.384s
user    0m0.454s    0m0.384s

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 attr.c |  3 ++-
 dir.c  | 68 ++++++++++++++++++++++++++++++++++++++++++++++++------------------
 dir.h  |  9 ++++++++-
 3 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/attr.c b/attr.c
index 1818ba5..7764ddd 100644
--- a/attr.c
+++ b/attr.c
@@ -254,7 +254,8 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
 		parse_exclude_pattern(&res->u.pat.pattern,
 				      &res->u.pat.patternlen,
 				      &res->u.pat.flags,
-				      &res->u.pat.nowildcardlen);
+				      &res->u.pat.nowildcardlen,
+				      NULL);
 		if (res->u.pat.flags & EXC_FLAG_MUSTBEDIR)
 			res->u.pat.patternlen++;
 		if (res->u.pat.flags & EXC_FLAG_NEGATIVE) {
diff --git a/dir.c b/dir.c
index 880b5e6..de7a6ba 100644
--- a/dir.c
+++ b/dir.c
@@ -360,10 +360,12 @@ static int no_wildcard(const char *string)
 void parse_exclude_pattern(const char **pattern,
 			   int *patternlen,
 			   int *flags,
-			   int *nowildcardlen)
+			   int *nowildcardlen,
+			   int *dirs_p)
 {
 	const char *p = *pattern;
 	size_t i, len;
+	int dirs;
 
 	*flags = 0;
 	if (*p == '!') {
@@ -375,12 +377,15 @@ void parse_exclude_pattern(const char **pattern,
 		len--;
 		*flags |= EXC_FLAG_MUSTBEDIR;
 	}
-	for (i = 0; i < len; i++) {
+	for (i = 0, dirs = 0; i < len; i++) {
 		if (p[i] == '/')
-			break;
+			dirs++;
 	}
-	if (i == len)
+	if (!dirs)
 		*flags |= EXC_FLAG_NODIR;
+	else if (*p == '/')
+		dirs--;
+
 	*nowildcardlen = simple_length(p);
 	/*
 	 * we should have excluded the trailing slash from 'p' too,
@@ -393,6 +398,8 @@ void parse_exclude_pattern(const char **pattern,
 		*flags |= EXC_FLAG_ENDSWITH;
 	*pattern = p;
 	*patternlen = len;
+	if (dirs_p)
+		*dirs_p = dirs;
 }
 
 void add_exclude(const char *string, const char *base,
@@ -402,8 +409,9 @@ void add_exclude(const char *string, const char *base,
 	int patternlen;
 	int flags;
 	int nowildcardlen;
+	int dirs;
 
-	parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen);
+	parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen, &dirs);
 	if (flags & EXC_FLAG_MUSTBEDIR) {
 		char *s;
 		x = xmalloc(sizeof(*x) + patternlen + 1);
@@ -415,11 +423,26 @@ void add_exclude(const char *string, const char *base,
 		x = xmalloc(sizeof(*x));
 		x->pattern = string;
 	}
+	/*
+	 * TODO: nowildcardlen < patternlen is a stricter than
+	 * necessary mainly to exclude "**" that breaks directory
+	 * boundary. Patterns like "/foo-*" should be fine.
+	 */
+	if ((flags & EXC_FLAG_NODIR) || nowildcardlen < patternlen)
+		dirs = -1;
+	else {
+		int i;
+		for (i = 0; i < baselen; i++) {
+			if (base[i] == '/')
+				dirs++;
+		}
+	}
 	x->patternlen = patternlen;
 	x->nowildcardlen = nowildcardlen;
 	x->base = base;
 	x->baselen = baselen;
 	x->flags = flags;
+	x->dirs = dirs;
 	x->srcpos = srcpos;
 	ALLOC_GROW(el->excludes, el->nr + 1, el->alloc);
 	el->excludes[el->nr++] = x;
@@ -701,7 +724,7 @@ int match_pathname(const char *pathname, int pathlen,
  * matched, or NULL for undecided.
  */
 static struct exclude *last_exclude_matching_from_list(const char *pathname,
-						       int pathlen,
+						       int pathlen, int dirs,
 						       const char *basename,
 						       int *dtype,
 						       struct exclude_list *el)
@@ -732,6 +755,9 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname,
 			continue;
 		}
 
+		if (dirs >= 0 && x->dirs >= 0 && x->dirs != dirs)
+			continue;
+
 		assert(x->baselen == 0 || x->base[x->baselen - 1] == '/');
 		if (match_pathname(pathname, pathlen,
 				   x->base, x->baselen ? x->baselen - 1 : 0,
@@ -750,7 +776,8 @@ int is_excluded_from_list(const char *pathname,
 			  struct exclude_list *el)
 {
 	struct exclude *exclude;
-	exclude = last_exclude_matching_from_list(pathname, pathlen, basename, dtype, el);
+	exclude = last_exclude_matching_from_list(pathname, pathlen, -1,
+						  basename, dtype, el);
 	if (exclude)
 		return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
 	return -1; /* undecided */
@@ -765,6 +792,7 @@ int is_excluded_from_list(const char *pathname,
 static struct exclude *last_exclude_matching(struct dir_struct *dir,
 					     const char *pathname,
 					     int pathlen,
+					     int dirs,
 					     int *dtype_p)
 {
 	int i, j;
@@ -779,8 +807,8 @@ static struct exclude *last_exclude_matching(struct dir_struct *dir,
 		group = &dir->exclude_list_group[i];
 		for (j = group->nr - 1; j >= 0; j--) {
 			exclude = last_exclude_matching_from_list(
-				pathname, pathlen, basename, dtype_p,
-				&group->el[j]);
+				pathname, pathlen, dir->dir_level,
+				basename, dtype_p, &group->el[j]);
 			if (exclude)
 				return exclude;
 		}
@@ -794,11 +822,11 @@ static struct exclude *last_exclude_matching(struct dir_struct *dir,
  * Returns 1 if true, otherwise 0.
  */
 static int is_excluded(struct dir_struct *dir,
-		       const char *pathname, int pathlen,
+		       const char *pathname, int pathlen, int dirs,
 		       int *dtype_p)
 {
 	struct exclude *exclude =
-		last_exclude_matching(dir, pathname, pathlen, dtype_p);
+		last_exclude_matching(dir, pathname, pathlen, dirs, dtype_p);
 	if (exclude)
 		return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
 	return 0;
@@ -862,7 +890,7 @@ struct exclude *last_exclude_matching_path(struct path_exclude_check *check,
 			int dt = DT_DIR;
 			exclude = last_exclude_matching(check->dir,
 							path->buf, path->len,
-							&dt);
+							-1, &dt);
 			if (exclude) {
 				check->exclude = exclude;
 				return exclude;
@@ -874,7 +902,7 @@ struct exclude *last_exclude_matching_path(struct path_exclude_check *check,
 	/* An entry in the index; cannot be a directory with subentries */
 	strbuf_setlen(path, 0);
 
-	return last_exclude_matching(check->dir, name, namelen, dtype);
+	return last_exclude_matching(check->dir, name, namelen, -1, dtype);
 }
 
 /*
@@ -1248,11 +1276,11 @@ enum path_treatment {
 };
 
 static enum path_treatment treat_one_path(struct dir_struct *dir,
-					  struct strbuf *path,
+					  struct strbuf *path, int dirs,
 					  const struct path_simplify *simplify,
 					  int dtype, struct dirent *de)
 {
-	int exclude = is_excluded(dir, path->buf, path->len, &dtype);
+	int exclude = is_excluded(dir, path->buf, path->len, dirs, &dtype);
 	if (exclude && (dir->flags & DIR_COLLECT_IGNORED)
 	    && exclude_matches_pathspec(path->buf, path->len, simplify))
 		dir_add_ignored(dir, path->buf, path->len);
@@ -1310,7 +1338,7 @@ static enum path_treatment treat_path(struct dir_struct *dir,
 		return path_ignored;
 
 	dtype = DTYPE(de);
-	return treat_one_path(dir, path, simplify, dtype, de);
+	return treat_one_path(dir, path, -1, simplify, dtype, de);
 }
 
 /*
@@ -1338,6 +1366,7 @@ static int read_directory_recursive(struct dir_struct *dir,
 	if (!fdir)
 		goto out;
 
+	dir->dir_level++;
 	while ((de = readdir(fdir)) != NULL) {
 		switch (treat_path(dir, de, &path, baselen, simplify)) {
 		case path_recurse:
@@ -1357,6 +1386,7 @@ static int read_directory_recursive(struct dir_struct *dir,
 	}
 	closedir(fdir);
  out:
+	dir->dir_level--;
 	strbuf_release(&path);
 
 	return contents;
@@ -1427,7 +1457,7 @@ static int treat_leading_path(struct dir_struct *dir,
 			break;
 		if (simplify_away(sb.buf, sb.len, simplify))
 			break;
-		if (treat_one_path(dir, &sb, simplify,
+		if (treat_one_path(dir, &sb, -1, simplify,
 				   DT_DIR, NULL) == path_ignored)
 			break; /* do not recurse into it */
 		if (len <= baselen) {
@@ -1447,8 +1477,10 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const char
 		return dir->nr;
 
 	simplify = create_simplify(pathspec);
-	if (!len || treat_leading_path(dir, path, len, simplify))
+	if (!len || treat_leading_path(dir, path, len, simplify)) {
+		dir->dir_level = -1;
 		read_directory_recursive(dir, path, len, 0, simplify);
+	}
 	free_simplify(simplify);
 	qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
 	qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
diff --git a/dir.h b/dir.h
index 560ade4..c434f1c 100644
--- a/dir.h
+++ b/dir.h
@@ -45,6 +45,7 @@ struct exclude_list {
 		const char *base;
 		int baselen;
 		int flags;
+		int dirs;
 
 		/*
 		 * Counting starts from 1 for line numbers in ignore files,
@@ -87,6 +88,8 @@ struct dir_struct {
 	/* Exclude info */
 	const char *exclude_per_dir;
 
+	int dir_level;
+
 	/*
 	 * We maintain three groups of exclude pattern lists:
 	 *
@@ -171,7 +174,11 @@ extern struct exclude_list *add_exclude_list(struct dir_struct *dir,
 extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
 					  struct exclude_list *el, int check_index);
 extern void add_excludes_from_file(struct dir_struct *, const char *fname);
-extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen);
+extern void parse_exclude_pattern(const char **string,
+				  int *patternlen,
+				  int *flags,
+				  int *nowildcardlen,
+				  int *dirs);
 extern void add_exclude(const char *string, const char *base,
 			int baselen, struct exclude_list *el, int srcpos);
 extern void clear_exclude_list(struct exclude_list *el);
-- 
1.8.1.2.536.gf441e6d

  parent reply	other threads:[~2013-03-10  6:15 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-09  4:09 [PATCH 0/3] Trivial (and small) exclude optimizations Nguyễn Thái Ngọc Duy
2013-03-09  4:09 ` [PATCH 1/3] match_pathname: avoid calling strncmp if baselen is 0 Nguyễn Thái Ngọc Duy
2013-03-09  9:06   ` Antoine Pelisse
2013-03-09  4:09 ` [PATCH 2/3] dir.c: inline convenient *_icase helpers Nguyễn Thái Ngọc Duy
2013-03-09  4:09 ` [PATCH 3/3] match_basename: use strncmp instead of strcmp Nguyễn Thái Ngọc Duy
2013-03-09  7:50   ` Junio C Hamano
2013-03-09  8:47     ` Fredrik Gustafsson
2013-03-09  9:58     ` Duy Nguyen
2013-03-10  6:14 ` [PATCH v2 0/6] Exclude optimizations Nguyễn Thái Ngọc Duy
2013-03-10  6:14   ` [PATCH v2 1/6] match_pathname: avoid calling strncmp if baselen is 0 Nguyễn Thái Ngọc Duy
2013-03-10  6:14   ` [PATCH v2 2/6] dir.c: inline convenient *_icase helpers Nguyễn Thái Ngọc Duy
2013-03-10  6:14   ` [PATCH v2 3/6] match_basename: use strncmp instead of strcmp Nguyễn Thái Ngọc Duy
2013-03-10  7:34     ` Junio C Hamano
2013-03-10 10:38       ` Duy Nguyen
2013-03-10 11:43         ` Antoine Pelisse
2013-03-10 11:54           ` Antoine Pelisse
2013-03-10 12:06             ` Duy Nguyen
2013-03-10 12:11               ` Antoine Pelisse
2013-03-10 12:14                 ` Duy Nguyen
2013-03-12 20:59         ` Junio C Hamano
2013-03-13  1:11           ` Duy Nguyen
2013-03-10  6:14   ` [PATCH v2 4/6] match_{base,path}name: replace strncmp_icase with strnequal_icase Nguyễn Thái Ngọc Duy
2013-03-10  6:14   ` [PATCH v2 5/6] dir.c: pass pathname length to last_exclude_matching Nguyễn Thái Ngọc Duy
2013-03-10  6:14   ` Nguyễn Thái Ngọc Duy [this message]
2013-03-10  8:20     ` [PATCH v2 6/6] exclude: filter patterns by directory level Junio C Hamano
2013-03-10 10:18       ` Duy Nguyen
2013-03-10 10:58       ` Junio C Hamano
2013-03-10 11:14         ` Duy Nguyen
2013-03-11 15:11   ` [PATCH v2 0/6] Exclude optimizations Duy Nguyen
2013-03-12 13:04   ` [PATCH v3 00/13] " Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 01/13] dir.c: add MEASURE_EXCLUDE code for tracking exclude performance Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 02/13] match_pathname: avoid calling strncmp if baselen is 0 Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 03/13] dir.c: inline convenient *_icase helpers Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 04/13] match_basename: use strncmp instead of strcmp Nguyễn Thái Ngọc Duy
2013-03-12 17:40       ` Antoine Pelisse
2013-03-13  1:05         ` Duy Nguyen
2013-03-12 13:04     ` [PATCH v3 05/13] match_{base,path}name: replace strncmp_icase with memequal_icase Nguyễn Thái Ngọc Duy
2013-03-13  1:14       ` Duy Nguyen
2013-03-12 13:04     ` [PATCH v3 06/13] dir: pass pathname length to last_exclude_matching Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 07/13] exclude: avoid calling prep_exclude on entries of the same directory Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 08/13] exclude: record baselen in the pattern Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 09/13] exclude: filter out patterns not applicable to the current directory Nguyễn Thái Ngọc Duy
2013-03-12 23:13       ` Eric Sunshine
2013-03-12 13:04     ` [PATCH v3 10/13] read_directory: avoid invoking exclude machinery on tracked files Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 11/13] Preallocate hash tables when the number of inserts are known in advance Nguyễn Thái Ngọc Duy
2013-03-12 13:04     ` [PATCH v3 12/13] name-hash: allow to lookup a name with precalculated base hash Nguyễn Thái Ngọc Duy
2013-03-12 13:05     ` [PATCH v3 13/13] read_directory: calculate name hashes incrementally Nguyễn Thái Ngọc Duy
2013-03-14 13:05     ` [PATCH v3 00/13] Exclude optimizations Duy Nguyen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1362896070-17456-7-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).