git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] Add git-grep threads-num param
@ 2015-10-23  9:15 Victor Leschuk
  2015-10-23 22:40 ` Junio C Hamano
  0 siblings, 1 reply; 2+ messages in thread
From: Victor Leschuk @ 2015-10-23  9:15 UTC (permalink / raw)
  To: git; +Cc: Victor Leschuk

It's a follow up to "[PATCH] Add git-grep threads-num param":

Make number of git-grep worker threads a configuration parameter.
I have run several tests on systems with different number of CPU cores.
It appeared that the hard-coded number 8 lowers performance on both of my systems:
on my 4-core and 8-core systems the thread number of 4 worked about 20% faster than
default 8. So I think it is better to allow users tune this parameter.

Signed-off-by: Victor Leschuk <vleschuk@accesssoftek.com>
---
 Documentation/config.txt               |  4 ++++
 Documentation/git-grep.txt             |  4 ++++
 builtin/grep.c                         | 20 ++++++++++++--------
 contrib/completion/git-completion.bash |  1 +
 grep.c                                 | 11 +++++++++++
 grep.h                                 |  2 ++
 6 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 391a0c3..1c95587 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1447,6 +1447,10 @@ grep.extendedRegexp::
 	option is ignored when the 'grep.patternType' option is set to a value
 	other than 'default'.
 
+grep.threads::
+	Number of grep worker threads, use it to tune up performance on
+	multicore machines. Default value is 8.
+
 gpg.program::
 	Use this custom program instead of "gpg" found on $PATH when
 	making or verifying a PGP signature. The program must support the
diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
index 4a44d6d..fbd4f83 100644
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@@ -22,6 +22,7 @@ SYNOPSIS
 	   [--color[=<when>] | --no-color]
 	   [--break] [--heading] [-p | --show-function]
 	   [-A <post-context>] [-B <pre-context>] [-C <context>]
+	   [--threads <num>]
 	   [-W | --function-context]
 	   [-f <file>] [-e] <pattern>
 	   [--and|--or|--not|(|)|-e <pattern>...]
@@ -220,6 +221,9 @@ OPTIONS
 	Show <num> leading lines, and place a line containing
 	`--` between contiguous groups of matches.
 
+--threads <num>::
+	Set number of worker threads to <num>. Default is 8.
+
 -W::
 --function-context::
 	Show the surrounding text from the previous line containing a
diff --git a/builtin/grep.c b/builtin/grep.c
index d04f440..3950725 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -27,8 +27,7 @@ static char const * const grep_usage[] = {
 static int use_threads = 1;
 
 #ifndef NO_PTHREADS
-#define THREADS 8
-static pthread_t threads[THREADS];
+static pthread_t *threads;
 
 /* We use one producer thread and THREADS consumer
  * threads. The producer adds struct work_items to 'todo' and the
@@ -206,7 +205,8 @@ static void start_threads(struct grep_opt *opt)
 		strbuf_init(&todo[i].out, 0);
 	}
 
-	for (i = 0; i < ARRAY_SIZE(threads); i++) {
+	threads = xcalloc(opt->num_threads, sizeof(pthread_t));
+	for (i = 0; i < opt->num_threads; i++) {
 		int err;
 		struct grep_opt *o = grep_opt_dup(opt);
 		o->output = strbuf_out;
@@ -220,7 +220,7 @@ static void start_threads(struct grep_opt *opt)
 	}
 }
 
-static int wait_all(void)
+static int wait_all(struct grep_opt *opt)
 {
 	int hit = 0;
 	int i;
@@ -238,12 +238,14 @@ static int wait_all(void)
 	pthread_cond_broadcast(&cond_add);
 	grep_unlock();
 
-	for (i = 0; i < ARRAY_SIZE(threads); i++) {
+	for (i = 0; i < opt->num_threads; i++) {
 		void *h;
 		pthread_join(threads[i], &h);
 		hit |= (int) (intptr_t) h;
 	}
 
+	free(threads);
+
 	pthread_mutex_destroy(&grep_mutex);
 	pthread_mutex_destroy(&grep_read_mutex);
 	pthread_mutex_destroy(&grep_attr_mutex);
@@ -256,7 +258,7 @@ static int wait_all(void)
 }
 #else /* !NO_PTHREADS */
 
-static int wait_all(void)
+static int wait_all(struct grep_opt *opt)
 {
 	return 0;
 }
@@ -702,6 +704,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 			N_("show <n> context lines before matches")),
 		OPT_INTEGER('A', "after-context", &opt.post_context,
 			N_("show <n> context lines after matches")),
+		OPT_INTEGER(0, "threads", &opt.num_threads,
+			N_("use <n> worker threads")),
 		OPT_NUMBER_CALLBACK(&opt, N_("shortcut for -C NUM"),
 			context_callback),
 		OPT_BOOL('p', "show-function", &opt.funcname,
@@ -832,7 +836,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 	}
 
 #ifndef NO_PTHREADS
-	if (list.nr || cached || online_cpus() == 1)
+	if (list.nr || cached || online_cpus() == 1 || opt.num_threads <= 1)
 		use_threads = 0;
 #else
 	use_threads = 0;
@@ -910,7 +914,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 	}
 
 	if (use_threads)
-		hit |= wait_all();
+		hit |= wait_all(&opt);
 	if (hit && show_in_pager)
 		run_pager(&opt, prefix);
 	free_grep_patterns(&opt);
diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index 482ca84..390d9c0 100644
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -1310,6 +1310,7 @@ _git_grep ()
 			--full-name --line-number
 			--extended-regexp --basic-regexp --fixed-strings
 			--perl-regexp
+			--threads
 			--files-with-matches --name-only
 			--files-without-match
 			--max-depth
diff --git a/grep.c b/grep.c
index 7b2b96a..9914fe9 100644
--- a/grep.c
+++ b/grep.c
@@ -40,6 +40,7 @@ void init_grep_defaults(void)
 	color_set(opt->color_selected, "");
 	color_set(opt->color_sep, GIT_COLOR_CYAN);
 	opt->color = -1;
+	opt->num_threads = GREP_NUM_THREADS_DEFAULT;
 }
 
 static int parse_pattern_type_arg(const char *opt, const char *arg)
@@ -124,6 +125,14 @@ int grep_config(const char *var, const char *value, void *cb)
 			return config_error_nonbool(var);
 		return color_parse(value, color);
 	}
+
+	if (!strcmp(var, "grep.threads")) {
+		int threads = git_config_int(var, value);
+		if (threads < 0)
+			die("invalid number of threads specified (%d)", threads);
+		opt->num_threads = threads;
+		return 0;
+	}
 	return 0;
 }
 
@@ -150,6 +159,8 @@ void grep_init(struct grep_opt *opt, const char *prefix)
 	opt->pathname = def->pathname;
 	opt->regflags = def->regflags;
 	opt->relative = def->relative;
+	if(!opt->num_threads)
+		opt->num_threads = def->num_threads;
 
 	color_set(opt->color_context, def->color_context);
 	color_set(opt->color_filename, def->color_filename);
diff --git a/grep.h b/grep.h
index 95f197a..f05874c 100644
--- a/grep.h
+++ b/grep.h
@@ -132,6 +132,8 @@ struct grep_opt {
 	unsigned pre_context;
 	unsigned post_context;
 	unsigned last_shown;
+#define GREP_NUM_THREADS_DEFAULT 8
+	unsigned num_threads;
 	int show_hunk_mark;
 	int file_break;
 	int heading;
-- 
2.6.2.281.g1c71ee1.dirty

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v2] Add git-grep threads-num param
  2015-10-23  9:15 [PATCH v2] Add git-grep threads-num param Victor Leschuk
@ 2015-10-23 22:40 ` Junio C Hamano
  0 siblings, 0 replies; 2+ messages in thread
From: Junio C Hamano @ 2015-10-23 22:40 UTC (permalink / raw)
  To: Victor Leschuk; +Cc: git, Victor Leschuk

Please pay attention to your title.  It no longer matches what the
patch does.

It may also be beneficial to study recent titles and log messages
from other developers (you can find them in "git log --no-merges"
and "git shortlog --no-merges") and learn and imitate their format,
style and tone.  We want our log to tell a story in a consistent
voice, no matter who the authors of individual commits are.

Victor Leschuk <vleschuk@gmail.com> writes:

> It's a follow up to "[PATCH] Add git-grep threads-num param":

Do you think anybody wants to see this line in the output from "git
log" six months from now?  I doubt it.  The previous one will not be
committed to my tree anyway, so the readers would not know (nor
care) what other patch you are talking about.

> @@ -832,7 +836,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
>  	}
>  
>  #ifndef NO_PTHREADS
> -	if (list.nr || cached || online_cpus() == 1)
> +	if (list.nr || cached || online_cpus() == 1 || opt.num_threads <= 1)
>  		use_threads = 0;

This avoid --threads=0 to take the threading codepath and spawning
no threads, which would have happend in the previous patch.

But it makes me wonder if the logic should be more like this:
    
 - Because the code is not prepared to go multi-thread when
   searching in the object data (not working tree), we always
   disable threading if 'list' is not empty or 'cached' is given;
   otherwise

 - If the user explicitly said that she wants N threads, we use that
   many threads; otherwise

 - If there is only one CPU, we do not do multi-thread; otherwise

 - We use the default number of threads.

IOW, I'd suggest making opt.num_threads an "int" (not "unsigned"),
initialize it to -1 (unspecified), and then make this part more like
this, perhaps?

	if (!opt.num_threads)
        	use_threads = 0; /* the user tells us not to use threads */
	else if (list.nr || cached)
        	use_threads = 0; /* cannot multi-thread object lookup */
	else if (opt.num_threads >= 1)
		use_threads = 1; /* the user explicitly wants this many */
	else if (online_cpus() <= 1)
        	use_threads = 0;
	else {
        	use_threads = 1;
                opt.num_threads = GREP_NUM_THREADS_DEFAULT;
	}

Something like this code structure makes it very clear what needs to
be changed when we want to add some sort of auto-scaling (instead of
assigning the DEFAULT constant, you'd see how many cores you have,
how many files you will be grepping in, etc. and come up with a good
number dynamically).

> @@ -150,6 +159,8 @@ void grep_init(struct grep_opt *opt, const char *prefix)
>  	opt->pathname = def->pathname;
>  	opt->regflags = def->regflags;
>  	opt->relative = def->relative;
> +	if(!opt->num_threads)

You forgot a required SP between a keyword for a syntactic construct
and its open parenthesis.

Thanks.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2015-10-23 22:41 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-10-23  9:15 [PATCH v2] Add git-grep threads-num param Victor Leschuk
2015-10-23 22:40 ` Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).