git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Speedup scanning for excluded files.
@ 2007-10-29  7:45 Lars Knoll
  2007-10-29  8:02 ` Pierre Habouzit
  0 siblings, 1 reply; 8+ messages in thread
From: Lars Knoll @ 2007-10-29  7:45 UTC (permalink / raw)
  To: git

Try to avoid a lot of work while scanning for excluded files,
by caching some more information when setting up the exclusion
data structure.

Speeds up 'git runstatus' on a repository containing the Qt sources by 30% and 
reduces the amount of instructions executed (as measured by valgrind) by a 
factor of 2.
---

I did a short timing measurement on the git repository itself, the timings 
without and with the patch give:

[lars@dahab git (speedup)]$ time git runstatus > /dev/null

real    0m0.098s
user    0m0.068s
sys     0m0.020s
[lars@dahab git (speedup)]$ time ./git runstatus > /dev/null

real    0m0.033s
user    0m0.012s
sys     0m0.016s

Cheers,
Lars

 dir.c |   58 ++++++++++++++++++++++++++++++++++++++++++----------------
 dir.h |    7 +++++++
 2 files changed, 49 insertions(+), 16 deletions(-)

diff --git a/dir.c b/dir.c
index 4c17d36..7bb5add 100644
--- a/dir.c
+++ b/dir.c
@@ -118,14 +118,32 @@ int match_pathspec(const char **pathspec, const char 
*name, int namelen, int pre
 	return retval;
 }
 
+static int no_wildcard(const char *string)
+{
+    return !strchr(string, '*') && !strchr(string, '?') 
&& !strchr(string, '[') && !strchr(string, '{');
+}
+
 void add_exclude(const char *string, const char *base,
 		 int baselen, struct exclude_list *which)
 {
 	struct exclude *x = xmalloc(sizeof (*x));
 
+        x->to_exclude = 1;
+        if (*string == '!') {
+            x->to_exclude = 0;
+            string++;
+        }
 	x->pattern = string;
+        x->patternlen = strlen(string);
 	x->base = base;
 	x->baselen = baselen;
+        x->flags = 0;
+        if (!strchr(string, '/'))
+            x->flags |= EXC_FLAG_NODIR;
+        if (no_wildcard(string))
+            x->flags |= EXC_FLAG_NOWILDCARD;
+        if (*string == '*' && no_wildcard(string+1))
+            x->flags |= EXC_FLAG_ENDSWITH;
 	if (which->nr == which->alloc) {
 		which->alloc = alloc_nr(which->alloc);
 		which->excludes = xrealloc(which->excludes,
@@ -209,7 +227,7 @@ void pop_exclude_per_directory(struct dir_struct *dir, int 
stk)
  * Return 1 for exclude, 0 for include and -1 for undecided.
  */
 static int excluded_1(const char *pathname,
-		      int pathlen,
+		      int pathlen, const char *basename, 
 		      struct exclude_list *el)
 {
 	int i;
@@ -218,19 +236,20 @@ static int excluded_1(const char *pathname,
 		for (i = el->nr - 1; 0 <= i; i--) {
 			struct exclude *x = el->excludes[i];
 			const char *exclude = x->pattern;
-			int to_exclude = 1;
+			int to_exclude = x->to_exclude;
 
-			if (*exclude == '!') {
-				to_exclude = 0;
-				exclude++;
-			}
-
-			if (!strchr(exclude, '/')) {
+			if (x->flags & EXC_FLAG_NODIR) {
 				/* match basename */
-				const char *basename = strrchr(pathname, '/');
-				basename = (basename) ? basename+1 : pathname;
-				if (fnmatch(exclude, basename, 0) == 0)
-					return to_exclude;
+                            if (x->flags & EXC_FLAG_NOWILDCARD) {
+                                if (!strcmp(exclude, basename))
+                                    return to_exclude;
+                            } else if (x->flags & EXC_FLAG_ENDSWITH) {
+                                if (!strcmp(exclude + 1, pathname + 
pathlen -x->patternlen + 1))
+                                    return to_exclude;
+                            } else {
+                                if (fnmatch(exclude, basename, 0) == 0)
+                                    return to_exclude;
+                            }
 			}
 			else {
 				/* match with FNM_PATHNAME:
@@ -246,10 +265,15 @@ static int excluded_1(const char *pathname,
 				    strncmp(pathname, x->base, baselen))
 				    continue;
 
-				if (fnmatch(exclude, pathname+baselen,
-					    FNM_PATHNAME) == 0)
+                                if (x->flags & EXC_FLAG_NOWILDCARD) {
+                                    if (!strcmp(exclude, pathname + baselen))
+                                        return to_exclude;
+                                } else {
+                                    if (fnmatch(exclude, pathname+baselen,
+                                                FNM_PATHNAME) == 0)
 					return to_exclude;
-			}
+                                }
+                        }
 		}
 	}
 	return -1; /* undecided */
@@ -259,9 +283,11 @@ int excluded(struct dir_struct *dir, const char 
*pathname)
 {
 	int pathlen = strlen(pathname);
 	int st;
+        const char *basename = strrchr(pathname, '/');
+        basename = (basename) ? basename+1 : pathname;
 
 	for (st = EXC_CMDL; st <= EXC_FILE; st++) {
-		switch (excluded_1(pathname, pathlen, &dir->exclude_list[st])) {
+		switch (excluded_1(pathname, pathlen, basename, &dir->exclude_list[st])) {
 		case 0:
 			return 0;
 		case 1:
diff --git a/dir.h b/dir.h
index a248a23..4dc3c6e 100644
--- a/dir.h
+++ b/dir.h
@@ -17,13 +17,20 @@ struct dir_entry {
 	char name[FLEX_ARRAY]; /* more */
 };
 
+#define EXC_FLAG_NODIR 1
+#define EXC_FLAG_NOWILDCARD 2
+#define EXC_FLAG_ENDSWITH 4
+
 struct exclude_list {
 	int nr;
 	int alloc;
 	struct exclude {
 		const char *pattern;
+                int patternlen;            
 		const char *base;
 		int baselen;
+                int to_exclude;
+                int flags;
 	} **excludes;
 };
 
-- 
1.5.3.4.383.gd90a7

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] Speedup scanning for excluded files.
  2007-10-29  7:45 [PATCH] Speedup scanning for excluded files Lars Knoll
@ 2007-10-29  8:02 ` Pierre Habouzit
  2007-10-29  8:59   ` Lars Knoll
  0 siblings, 1 reply; 8+ messages in thread
From: Pierre Habouzit @ 2007-10-29  8:02 UTC (permalink / raw)
  To: Lars Knoll; +Cc: git

[-- Attachment #1: Type: text/plain, Size: 830 bytes --]

On Mon, Oct 29, 2007 at 07:45:26AM +0000, Lars Knoll wrote:
> +static int no_wildcard(const char *string)
> +{
> +    return !strchr(string, '*') && !strchr(string, '?') 
> && !strchr(string, '[') && !strchr(string, '{');

       return string[strcspn(string, "*?[{")] == '\0';

is faster as it doesn't scan the string 4 time in the case where there
is no wildcard.

> +}
> +
>  void add_exclude(const char *string, const char *base,
>  		 int baselen, struct exclude_list *which)
>  {
>  	struct exclude *x = xmalloc(sizeof (*x));
>  
> +        x->to_exclude = 1;

  You used spaces in here, and in many places of your patch.


-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Speedup scanning for excluded files.
  2007-10-29  8:02 ` Pierre Habouzit
@ 2007-10-29  8:59   ` Lars Knoll
  2007-10-29 22:17     ` Junio C Hamano
  2007-10-29 22:59     ` Morten Welinder
  0 siblings, 2 replies; 8+ messages in thread
From: Lars Knoll @ 2007-10-29  8:59 UTC (permalink / raw)
  To: git; +Cc: Pierre Habouzit, Junio C Hamano

>From 51b364c9c87bec89556a2089cc5363c588ea2ff5 Mon Sep 17 00:00:00 2001
From: Lars Knoll <lars@trolltech.com>
Date: Sun, 28 Oct 2007 21:27:13 +0100
Subject: [PATCH] Speedup scanning for excluded files.

Try to avoid a lot of work scanning for excluded files,
by caching some more information when setting up the exclusion
data structure.

Speeds up 'git runstatus' on a repository containing the Qt sources by 30% and
reduces the amount of instructions executed (as measured by valgrind) by a
factor of 2.
---

Fixed both the indentation and the no_wildcard method as Pierre pointed out. 

Lars

 dir.c |   60 +++++++++++++++++++++++++++++++++++++++++++-----------------
 dir.h |    7 +++++++
 2 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/dir.c b/dir.c
index 4c17d36..f4e130c 100644
--- a/dir.c
+++ b/dir.c
@@ -118,14 +118,32 @@ int match_pathspec(const char **pathspec, const char *name, int namelen, int pre
 	return retval;
 }
 
+static int no_wildcard(const char *string)
+{
+	return string[strcspn(string, "*?[{")] == '\0';
+}
+
 void add_exclude(const char *string, const char *base,
 		 int baselen, struct exclude_list *which)
 {
 	struct exclude *x = xmalloc(sizeof (*x));
 
+	x->to_exclude = 1;
+	if (*string == '!') {
+		x->to_exclude = 0;
+		string++;
+	}
 	x->pattern = string;
+	x->patternlen = strlen(string);
 	x->base = base;
 	x->baselen = baselen;
+	x->flags = 0;
+	if (!strchr(string, '/'))
+		x->flags |= EXC_FLAG_NODIR;
+	if (no_wildcard(string))
+		x->flags |= EXC_FLAG_NOWILDCARD;
+	if (*string == '*' && no_wildcard(string+1))
+		x->flags |= EXC_FLAG_ENDSWITH;
 	if (which->nr == which->alloc) {
 		which->alloc = alloc_nr(which->alloc);
 		which->excludes = xrealloc(which->excludes,
@@ -209,7 +227,7 @@ void pop_exclude_per_directory(struct dir_struct *dir, int stk)
  * Return 1 for exclude, 0 for include and -1 for undecided.
  */
 static int excluded_1(const char *pathname,
-		      int pathlen,
+		      int pathlen, const char *basename, 
 		      struct exclude_list *el)
 {
 	int i;
@@ -218,19 +236,20 @@ static int excluded_1(const char *pathname,
 		for (i = el->nr - 1; 0 <= i; i--) {
 			struct exclude *x = el->excludes[i];
 			const char *exclude = x->pattern;
-			int to_exclude = 1;
+			int to_exclude = x->to_exclude;
 
-			if (*exclude == '!') {
-				to_exclude = 0;
-				exclude++;
-			}
-
-			if (!strchr(exclude, '/')) {
+			if (x->flags & EXC_FLAG_NODIR) {
 				/* match basename */
-				const char *basename = strrchr(pathname, '/');
-				basename = (basename) ? basename+1 : pathname;
-				if (fnmatch(exclude, basename, 0) == 0)
-					return to_exclude;
+				if (x->flags & EXC_FLAG_NOWILDCARD) {
+					if (!strcmp(exclude, basename))
+						return to_exclude;
+				} else if (x->flags & EXC_FLAG_ENDSWITH) {
+					if (!strcmp(exclude + 1, pathname + pathlen -x->patternlen + 1))
+						return to_exclude;
+				} else {
+					if (fnmatch(exclude, basename, 0) == 0)
+						return to_exclude;
+				}
 			}
 			else {
 				/* match with FNM_PATHNAME:
@@ -246,9 +265,14 @@ static int excluded_1(const char *pathname,
 				    strncmp(pathname, x->base, baselen))
 				    continue;
 
-				if (fnmatch(exclude, pathname+baselen,
-					    FNM_PATHNAME) == 0)
-					return to_exclude;
+				if (x->flags & EXC_FLAG_NOWILDCARD) {
+					if (!strcmp(exclude, pathname + baselen))
+						return to_exclude;
+				} else {
+					if (fnmatch(exclude, pathname+baselen,
+						    FNM_PATHNAME) == 0)
+					    return to_exclude;
+				}
 			}
 		}
 	}
@@ -259,9 +283,11 @@ int excluded(struct dir_struct *dir, const char *pathname)
 {
 	int pathlen = strlen(pathname);
 	int st;
-
+	const char *basename = strrchr(pathname, '/');
+	basename = (basename) ? basename+1 : pathname;
+	
 	for (st = EXC_CMDL; st <= EXC_FILE; st++) {
-		switch (excluded_1(pathname, pathlen, &dir->exclude_list[st])) {
+		switch (excluded_1(pathname, pathlen, basename, &dir->exclude_list[st])) {
 		case 0:
 			return 0;
 		case 1:
diff --git a/dir.h b/dir.h
index a248a23..3ce8dbe 100644
--- a/dir.h
+++ b/dir.h
@@ -17,13 +17,20 @@ struct dir_entry {
 	char name[FLEX_ARRAY]; /* more */
 };
 
+#define EXC_FLAG_NODIR 1
+#define EXC_FLAG_NOWILDCARD 2
+#define EXC_FLAG_ENDSWITH 4
+
 struct exclude_list {
 	int nr;
 	int alloc;
 	struct exclude {
 		const char *pattern;
+		int patternlen;		   
 		const char *base;
 		int baselen;
+		int to_exclude;
+		int flags;
 	} **excludes;
 };
 
-- 
1.5.3.4.383.gd90a7

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] Speedup scanning for excluded files.
  2007-10-29  8:59   ` Lars Knoll
@ 2007-10-29 22:17     ` Junio C Hamano
  2007-10-29 22:59     ` Morten Welinder
  1 sibling, 0 replies; 8+ messages in thread
From: Junio C Hamano @ 2007-10-29 22:17 UTC (permalink / raw)
  To: Lars Knoll; +Cc: git, Pierre Habouzit

Thanks.  This looks obviously correct and tempts me to apply
directly to 'master'.

There are similar logic on the gitattributes side (attr.c) which
might benefit from such an optimization as well.  In the longer
run, we might even want to introduce 'ignored' attribute to the
gitattributes mechanism and make the unified result to supersede
gitignore.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Speedup scanning for excluded files.
  2007-10-29  8:59   ` Lars Knoll
  2007-10-29 22:17     ` Junio C Hamano
@ 2007-10-29 22:59     ` Morten Welinder
  2007-10-30  0:00       ` Junio C Hamano
  1 sibling, 1 reply; 8+ messages in thread
From: Morten Welinder @ 2007-10-29 22:59 UTC (permalink / raw)
  To: Lars Knoll; +Cc: git, Pierre Habouzit, Junio C Hamano

> +                               } else if (x->flags & EXC_FLAG_ENDSWITH) {
> +                                       if (!strcmp(exclude + 1, pathname + pathlen -x->patternlen + 1))

Is there some guarantee that the result of that subtraction is still within
the string?

Morten

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Speedup scanning for excluded files.
  2007-10-29 22:59     ` Morten Welinder
@ 2007-10-30  0:00       ` Junio C Hamano
  2007-10-30  7:28         ` Lars Knoll
  0 siblings, 1 reply; 8+ messages in thread
From: Junio C Hamano @ 2007-10-30  0:00 UTC (permalink / raw)
  To: Morten Welinder; +Cc: Lars Knoll, git, Pierre Habouzit, Junio C Hamano

"Morten Welinder" <mwelinder@gmail.com> writes:

>> +                               } else if (x->flags & EXC_FLAG_ENDSWITH) {
>> +                                       if (!strcmp(exclude + 1, pathname + pathlen -x->patternlen + 1))
>
> Is there some guarantee that the result of that subtraction is still within
> the string?

Good eyes.

If pattern is "*.exe", patternlen is 5, and strcmp wants to
compare 4 chars, so pathlen is better be at least that long, and
we do allow that pattern to match a hidden file ".exe".

Like this?

	if (x->patternlen - 1 <= pathlen &&
        	!strcmp(exclude + 1, pathname + pathlen - x->patternlen + 1))
		return to_exclude;

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Speedup scanning for excluded files.
  2007-10-30  0:00       ` Junio C Hamano
@ 2007-10-30  7:28         ` Lars Knoll
  2007-10-30  7:46           ` [PATCH v3] " Lars Knoll
  0 siblings, 1 reply; 8+ messages in thread
From: Lars Knoll @ 2007-10-30  7:28 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Morten Welinder, git, Pierre Habouzit

On Tuesday 30 October 2007, Junio C Hamano wrote:
> "Morten Welinder" <mwelinder@gmail.com> writes:
> >> +                               } else if (x->flags & EXC_FLAG_ENDSWITH)
> >> { +                                       if (!strcmp(exclude + 1,
> >> pathname + pathlen -x->patternlen + 1))
> >
> > Is there some guarantee that the result of that subtraction is still
> > within the string?
>
> Good eyes.
>
> If pattern is "*.exe", patternlen is 5, and strcmp wants to
> compare 4 chars, so pathlen is better be at least that long, and
> we do allow that pattern to match a hidden file ".exe".
>
> Like this?
>
> 	if (x->patternlen - 1 <= pathlen &&
>         	!strcmp(exclude + 1, pathname + pathlen - x->patternlen + 1))
> 		return to_exclude;

Yes, that looks right. Thanks for catching that one.

Lars

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v3] Speedup scanning for excluded files.
  2007-10-30  7:28         ` Lars Knoll
@ 2007-10-30  7:46           ` Lars Knoll
  0 siblings, 0 replies; 8+ messages in thread
From: Lars Knoll @ 2007-10-30  7:46 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Morten Welinder, git, Pierre Habouzit

Try to avoid a lot of work scanning for excluded files,
by caching some more information when setting up the exclusion
data structure.

Speeds up 'git runstatus' on a repository containing the Qt sources by 30% and
reduces the amount of instructions executed (as measured by valgrind) by a
factor of 2. A 'git runstatus' on the git repository goes from 100M instructions
down to about 22M.

Signed-off-by: Lars Knoll <lars@trolltech.com>
---

Included an out of bounds check for the issue Morten found.

 dir.c |   61 ++++++++++++++++++++++++++++++++++++++++++++-----------------
 dir.h |    7 +++++++
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/dir.c b/dir.c
index 4c17d36..b3d7462 100644
--- a/dir.c
+++ b/dir.c
@@ -118,14 +118,32 @@ int match_pathspec(const char **pathspec, const char *name, int namelen, int pre
 	return retval;
 }
 
+static int no_wildcard(const char *string)
+{
+	return string[strcspn(string, "*?[{")] == '\0';
+}
+
 void add_exclude(const char *string, const char *base,
 		 int baselen, struct exclude_list *which)
 {
 	struct exclude *x = xmalloc(sizeof (*x));
 
+	x->to_exclude = 1;
+	if (*string == '!') {
+		x->to_exclude = 0;
+		string++;
+	}
 	x->pattern = string;
+	x->patternlen = strlen(string);
 	x->base = base;
 	x->baselen = baselen;
+	x->flags = 0;
+	if (!strchr(string, '/'))
+		x->flags |= EXC_FLAG_NODIR;
+	if (no_wildcard(string))
+		x->flags |= EXC_FLAG_NOWILDCARD;
+	if (*string == '*' && no_wildcard(string+1))
+		x->flags |= EXC_FLAG_ENDSWITH;
 	if (which->nr == which->alloc) {
 		which->alloc = alloc_nr(which->alloc);
 		which->excludes = xrealloc(which->excludes,
@@ -209,7 +227,7 @@ void pop_exclude_per_directory(struct dir_struct *dir, int stk)
  * Return 1 for exclude, 0 for include and -1 for undecided.
  */
 static int excluded_1(const char *pathname,
-		      int pathlen,
+		      int pathlen, const char *basename, 
 		      struct exclude_list *el)
 {
 	int i;
@@ -218,19 +236,21 @@ static int excluded_1(const char *pathname,
 		for (i = el->nr - 1; 0 <= i; i--) {
 			struct exclude *x = el->excludes[i];
 			const char *exclude = x->pattern;
-			int to_exclude = 1;
+			int to_exclude = x->to_exclude;
 
-			if (*exclude == '!') {
-				to_exclude = 0;
-				exclude++;
-			}
-
-			if (!strchr(exclude, '/')) {
+			if (x->flags & EXC_FLAG_NODIR) {
 				/* match basename */
-				const char *basename = strrchr(pathname, '/');
-				basename = (basename) ? basename+1 : pathname;
-				if (fnmatch(exclude, basename, 0) == 0)
-					return to_exclude;
+				if (x->flags & EXC_FLAG_NOWILDCARD) {
+					if (!strcmp(exclude, basename))
+						return to_exclude;
+				} else if (x->flags & EXC_FLAG_ENDSWITH) {
+					if (x->patternlen - 1 <= pathlen &&
+                                            !strcmp(exclude + 1, pathname + pathlen - x->patternlen + 1))
+						return to_exclude;
+				} else {
+					if (fnmatch(exclude, basename, 0) == 0)
+						return to_exclude;
+				}
 			}
 			else {
 				/* match with FNM_PATHNAME:
@@ -246,9 +266,14 @@ static int excluded_1(const char *pathname,
 				    strncmp(pathname, x->base, baselen))
 				    continue;
 
-				if (fnmatch(exclude, pathname+baselen,
-					    FNM_PATHNAME) == 0)
-					return to_exclude;
+				if (x->flags & EXC_FLAG_NOWILDCARD) {
+					if (!strcmp(exclude, pathname + baselen))
+						return to_exclude;
+				} else {
+					if (fnmatch(exclude, pathname+baselen,
+						    FNM_PATHNAME) == 0)
+					    return to_exclude;
+				}
 			}
 		}
 	}
@@ -259,9 +284,11 @@ int excluded(struct dir_struct *dir, const char *pathname)
 {
 	int pathlen = strlen(pathname);
 	int st;
-
+	const char *basename = strrchr(pathname, '/');
+	basename = (basename) ? basename+1 : pathname;
+	
 	for (st = EXC_CMDL; st <= EXC_FILE; st++) {
-		switch (excluded_1(pathname, pathlen, &dir->exclude_list[st])) {
+		switch (excluded_1(pathname, pathlen, basename, &dir->exclude_list[st])) {
 		case 0:
 			return 0;
 		case 1:
diff --git a/dir.h b/dir.h
index a248a23..3ce8dbe 100644
--- a/dir.h
+++ b/dir.h
@@ -17,13 +17,20 @@ struct dir_entry {
 	char name[FLEX_ARRAY]; /* more */
 };
 
+#define EXC_FLAG_NODIR 1
+#define EXC_FLAG_NOWILDCARD 2
+#define EXC_FLAG_ENDSWITH 4
+
 struct exclude_list {
 	int nr;
 	int alloc;
 	struct exclude {
 		const char *pattern;
+		int patternlen;		   
 		const char *base;
 		int baselen;
+		int to_exclude;
+		int flags;
 	} **excludes;
 };
 
-- 
1.5.3.4.383.gd90a7

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2007-10-30  7:46 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-10-29  7:45 [PATCH] Speedup scanning for excluded files Lars Knoll
2007-10-29  8:02 ` Pierre Habouzit
2007-10-29  8:59   ` Lars Knoll
2007-10-29 22:17     ` Junio C Hamano
2007-10-29 22:59     ` Morten Welinder
2007-10-30  0:00       ` Junio C Hamano
2007-10-30  7:28         ` Lars Knoll
2007-10-30  7:46           ` [PATCH v3] " Lars Knoll

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).