All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 3/4] pathspec: apply "*.c" optimization from exclude
Date: Sun, 18 Nov 2012 16:13:08 +0700	[thread overview]
Message-ID: <1353229989-13075-4-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1353229989-13075-1-git-send-email-pclouds@gmail.com>

-O2 build on linux-2.6, without the patch:

$ time git rev-list --quiet HEAD -- '*.c'

real    0m40.770s
user    0m40.290s
sys     0m0.256s

With the patch

$ time ~/w/git/git rev-list --quiet HEAD -- '*.c'

real    0m34.288s
user    0m33.997s
sys     0m0.205s

The above command is not supposed to be widely popular. It's chosen
because it exercises pathspec matching a lot. The point is it cuts
down matching time for popular patterns like *.c, which could be used
as pathspec in other places.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 cache.h     |  3 +++
 dir.c       | 17 +++++++++++++++--
 dir.h       |  1 +
 tree-walk.c |  6 ++++--
 4 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/cache.h b/cache.h
index bf031f1..d18f584 100644
--- a/cache.h
+++ b/cache.h
@@ -473,6 +473,8 @@ extern int index_name_is_other(const struct index_state *, const char *, int);
 extern int ie_match_stat(const struct index_state *, struct cache_entry *, struct stat *, unsigned int);
 extern int ie_modified(const struct index_state *, struct cache_entry *, struct stat *, unsigned int);
 
+#define PSF_ONESTAR 1
+
 struct pathspec {
 	const char **raw; /* get_pathspec() result, not freed by free_pathspec() */
 	int nr;
@@ -483,6 +485,7 @@ struct pathspec {
 		const char *match;
 		int len;
 		int nowildcard_len;
+		int flags;
 	} *items;
 };
 
diff --git a/dir.c b/dir.c
index e4e6ca1..d00f240 100644
--- a/dir.c
+++ b/dir.c
@@ -46,6 +46,12 @@ inline int git_fnmatch(const char *pattern, const char *string,
 		pattern += prefix;
 		string += prefix;
 	}
+	if (flags & GF_ONESTAR) {
+		int pattern_len = strlen(++pattern);
+		int string_len = strlen(string);
+		return strcmp(pattern,
+			      string + string_len - pattern_len);
+	}
 	return fnmatch(pattern, string, fnm_flags);
 }
 
@@ -246,7 +252,9 @@ static int match_pathspec_item(const struct pathspec_item *item, int prefix,
 	}
 
 	if (item->nowildcard_len < item->len &&
-	    !git_fnmatch(match, name, 0, item->nowildcard_len - prefix))
+	    !git_fnmatch(match, name,
+			 item->flags & PSF_ONESTAR ? GF_ONESTAR : 0,
+			 item->nowildcard_len - prefix))
 		return MATCHED_FNMATCH;
 
 	return 0;
@@ -1446,8 +1454,13 @@ int init_pathspec(struct pathspec *pathspec, const char **paths)
 		item->match = path;
 		item->len = strlen(path);
 		item->nowildcard_len = simple_length(path);
-		if (item->nowildcard_len < item->len)
+		item->flags = 0;
+		if (item->nowildcard_len < item->len) {
 			pathspec->has_wildcard = 1;
+			if (path[item->nowildcard_len] == '*' &&
+			    no_wildcard(path + item->nowildcard_len + 1))
+				item->flags |= PSF_ONESTAR;
+		}
 	}
 
 	qsort(pathspec->items, pathspec->nr,
diff --git a/dir.h b/dir.h
index 4cd5074..590532b 100644
--- a/dir.h
+++ b/dir.h
@@ -143,6 +143,7 @@ extern int fnmatch_icase(const char *pattern, const char *string, int flags);
  * The prefix part of pattern must not contains wildcards.
  */
 #define GF_PATHNAME 1
+#define GF_ONESTAR  2
 
 extern int git_fnmatch(const char *pattern, const char *string,
 		       int flags, int prefix);
diff --git a/tree-walk.c b/tree-walk.c
index 2fcf3c0..42fe610 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -628,7 +628,8 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
 
 			if (item->nowildcard_len < item->len) {
 				if (!git_fnmatch(match + baselen, entry->path,
-						 0, item->nowildcard_len - baselen))
+						 item->flags & PSF_ONESTAR ? GF_ONESTAR : 0,
+						 item->nowildcard_len - baselen))
 					return entry_interesting;
 
 				/*
@@ -654,7 +655,8 @@ match_wildcards:
 		strbuf_add(base, entry->path, pathlen);
 
 		if (!git_fnmatch(match, base->buf + base_offset,
-				 0, item->nowildcard_len)) {
+				 item->flags & PSF_ONESTAR ? GF_ONESTAR : 0,
+				 item->nowildcard_len)) {
 			strbuf_setlen(base, base_offset + baselen);
 			return entry_interesting;
 		}
-- 
1.8.0.rc2.23.g1fb49df

  parent reply	other threads:[~2012-11-18  9:13 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-11-18  9:13 [PATCH 0/4] Some pathspec wildcard optimization Nguyễn Thái Ngọc Duy
2012-11-18  9:13 ` [PATCH 1/4] pathspec: save the non-wildcard length part Nguyễn Thái Ngọc Duy
2012-11-19 20:48   ` Junio C Hamano
2012-11-18  9:13 ` [PATCH 2/4] pathspec: do exact comparison on the leading non-wildcard part Nguyễn Thái Ngọc Duy
2012-11-19 20:54   ` Junio C Hamano
2012-11-20 13:09     ` Nguyen Thai Ngoc Duy
2012-11-18  9:13 ` Nguyễn Thái Ngọc Duy [this message]
2012-11-19 21:20   ` [PATCH 3/4] pathspec: apply "*.c" optimization from exclude Junio C Hamano
2012-11-20 13:23     ` Nguyen Thai Ngoc Duy
2012-11-20 19:47       ` Junio C Hamano
2012-11-18  9:13 ` [PATCH 4/4] tree_entry_interesting: do basedir compare on wildcard patterns when possible Nguyễn Thái Ngọc Duy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1353229989-13075-4-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.