git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] Threaded grep
@ 2010-01-18 10:33 Fredrik Kuivinen
  2010-01-18 11:11 ` Johannes Sixt
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Fredrik Kuivinen @ 2010-01-18 10:33 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Linus Torvalds

Make git grep use threads when it is available.

The results below are best of five runs in the Linux repository (on a
box with two cores).

With the patch:

git grep qwerty
1.58user 0.55system 0:01.16elapsed 183%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+800outputs (0major+5774minor)pagefaults 0swaps

Without:

git grep qwerty
1.59user 0.43system 0:02.02elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+800outputs (0major+3716minor)pagefaults 0swaps


And with a pattern with quite a few matches:

With the patch:

$ /usr/bin/time git grep void
5.61user 0.56system 0:03.44elapsed 179%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+800outputs (0major+5587minor)pagefaults 0swaps

Without:

$ /usr/bin/time git grep void
5.36user 0.51system 0:05.87elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+800outputs (0major+3693minor)pagefaults 0swaps

In either case we gain about 40% by the threading.

Signed-off-by: Fredrik Kuivinen <frekui@gmail.com>
---

The patch has been rebased on top of next and I believe that it is now
ready for inclusion. It is time to decide if the added code complexity
is worth the increased performance.

 builtin-grep.c |  325 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
 grep.c         |  104 +++++++++++++++---
 grep.h         |    6 +
 3 files changed, 400 insertions(+), 35 deletions(-)

diff --git a/builtin-grep.c b/builtin-grep.c
index 24ae1ce..dc07e9e 100644
--- a/builtin-grep.c
+++ b/builtin-grep.c
@@ -15,11 +15,257 @@
 #include "grep.h"
 #include "quote.h"
 
+#ifndef NO_PTHREADS
+#include "thread-utils.h"
+#include <pthread.h>
+#endif
+
 static char const * const grep_usage[] = {
 	"git grep [options] [-e] <pattern> [<rev>...] [[--] path...]",
 	NULL
 };
 
+static int use_threads = 1;
+
+#ifndef NO_PTHREADS
+#define THREADS 8
+static pthread_t threads[THREADS];
+
+static void* load_file(const char *filename, size_t *sz);
+
+enum work_type {WORK_BUF, WORK_FILE};
+
+/* We use one producer thread and number_of_threads consumer
+   threads. The producer adds struct work_items to 'todo' and the
+   consumers pick work items from the same array. */
+struct work_item
+{
+	enum work_type type;
+	char *name;
+
+	/* if type == WORK_BUF, then 'buf' points to a buffer of size
+	   'size' otherwise type == WORK_FILE and 'buf' is a NUL
+	   terminated filename. */
+	char *buf;
+	size_t size;
+	char done;
+	struct strbuf out;
+};
+
+/* In the range [todo_done, todo_start) in 'todo' we have work_items
+   that have been or are processed by a consumer thread. We haven't
+   written the result for these to stdout yet.
+
+   The work_items in [todo_start, todo_end) are waiting to be picked
+   up by a consumer thread.
+
+   The ranges are modulo TODO_SIZE.
+*/
+#define TODO_SIZE 128
+static struct work_item todo[TODO_SIZE];
+static int todo_start = 0;
+static int todo_end = 0;
+static int todo_done = 0;
+
+/* Has all work items been added? */
+static int all_work_added = 0;
+
+/* This lock protects all the variables above. */
+static pthread_mutex_t grep_lock = PTHREAD_MUTEX_INITIALIZER;
+
+/* Signalled when a new work_item is added to todo. */
+static pthread_cond_t cond_add = PTHREAD_COND_INITIALIZER;
+
+/* Signalled when the result from one work_item is written to
+   stdout. */
+static pthread_cond_t cond_write = PTHREAD_COND_INITIALIZER;
+
+/* Signalled when we are finished with everything. */
+static pthread_cond_t cond_result = PTHREAD_COND_INITIALIZER;
+
+static void add_work(enum work_type type, char *name, char *buf, size_t size)
+{
+	pthread_mutex_lock(&grep_lock);
+
+	while ((todo_end+1) % ARRAY_SIZE(todo) == todo_done) {
+		pthread_cond_wait(&cond_write, &grep_lock);
+	}
+
+	todo[todo_end].type = type;
+	todo[todo_end].name = name;
+	todo[todo_end].buf = buf;
+	todo[todo_end].size = size;
+	todo[todo_end].done = 0;
+	strbuf_reset(&todo[todo_end].out);
+	todo_end = (todo_end + 1) % ARRAY_SIZE(todo);
+
+	pthread_mutex_unlock(&grep_lock);
+	pthread_cond_signal(&cond_add);
+}
+
+static struct work_item* get_work()
+{
+	struct work_item* ret;
+
+	pthread_mutex_lock(&grep_lock);
+	while (todo_start == todo_end && !all_work_added) {
+		pthread_cond_wait(&cond_add, &grep_lock);
+	}
+
+	if (todo_start == todo_end && all_work_added) {
+		ret = NULL;
+	} else {
+		ret = &todo[todo_start];
+		todo_start = (todo_start + 1) % ARRAY_SIZE(todo);
+	}
+	pthread_mutex_unlock(&grep_lock);
+	return ret;
+}
+
+/* This function takes ownership of 'name' and 'buf'. They will be
+   deallocated with free. */
+static int grep_buffer_async(struct grep_opt *opt, char *name, char *buf,
+			     size_t size)
+{
+	add_work(WORK_BUF, name, buf, size);
+	return 0;
+}
+
+static int grep_file_async(struct grep_opt *opt, char *name,
+			   const char *filename)
+{
+	add_work(WORK_FILE, name, (char*) filename, 0);
+	return 0;
+}
+
+static void work_done(struct work_item *w)
+{
+	int old_done;
+
+	pthread_mutex_lock(&grep_lock);
+	w->done = 1;
+	old_done = todo_done;
+	for(; todo[todo_done].done && todo_done != todo_start;
+	    todo_done = (todo_done+1) % ARRAY_SIZE(todo)) {
+		w = &todo[todo_done];
+		write_or_die(1, w->out.buf, w->out.len);
+		if (w->type == WORK_BUF)
+			free(w->buf);
+
+		free(w->name);
+	}
+
+	if (old_done != todo_done)
+		pthread_cond_signal(&cond_write);
+
+	if (all_work_added && todo_done == todo_end)
+		pthread_cond_signal(&cond_result);
+
+	pthread_mutex_unlock(&grep_lock);
+}
+
+static void* run(void *arg)
+{
+	int hit = 0;
+	struct grep_opt *opt = arg;
+
+	while (1) {
+		struct work_item *w = get_work();
+		if (!w)
+			break;
+
+		opt->output_priv = w;
+		if (w->type == WORK_BUF) {
+			hit |= grep_buffer(opt, w->name, w->buf, w->size);
+		} else {
+			size_t sz;
+			void* data = load_file(w->buf, &sz);
+			if (data) {
+				hit |= grep_buffer(opt, w->name, data, sz);
+				free(data);
+			}
+		}
+
+		work_done(w);
+	}
+
+	return (void*) (intptr_t) hit;
+}
+
+static void strbuf_out(struct grep_opt *opt, const void *buf, size_t size)
+{
+	struct work_item *w = opt->output_priv;
+	strbuf_add(&w->out, buf, size);
+}
+
+static void start_threads(struct grep_opt *opt)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(todo); i++) {
+		strbuf_init(&todo[i].out, 0);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(threads); i++) {
+		struct grep_opt *o = grep_opt_dup(opt);
+		o->output = strbuf_out;
+		compile_grep_patterns(o);
+		int err = pthread_create(&threads[i], NULL, run, o);
+
+		if (err)
+			die("grep: failed to create thread: %s", strerror(err));
+	}
+}
+#endif /* !NO_PTHREADS */
+
+#ifndef NO_PTHREADS
+static int wait_all()
+{
+	int hit = 0;
+	int i;
+
+	pthread_mutex_lock(&grep_lock);
+	all_work_added = 1;
+
+	/* Wait until all work is done. */
+	while (todo_done != todo_end)
+		pthread_cond_wait(&cond_result, &grep_lock);
+
+	/* Wake up all the consumer threads so they can see that there
+	   is no more work to do. */
+	pthread_cond_broadcast(&cond_add);
+	pthread_mutex_unlock(&grep_lock);
+
+	for (i = 0; i < ARRAY_SIZE(threads); i++) {
+		void *h;
+		pthread_join(threads[i], &h);
+		hit |= (int) (intptr_t) h;
+	}
+
+	return hit;
+}
+#else
+static int wait_all()
+{
+	return 0;
+}
+#endif
+
+static int grep_buffer_internal(struct grep_opt *opt, char *name, char *buf,
+				size_t size)
+{
+#ifndef NO_PTHREADS
+	if (use_threads)
+		return grep_buffer_async(opt, name, buf, size);
+#endif
+	{
+		int hit = grep_buffer(opt, name, buf, size);
+		free(name);
+		free(buf);
+		return hit;
+	}
+}
+
 static int grep_config(const char *var, const char *value, void *cb)
 {
 	struct grep_opt *opt = cb;
@@ -159,21 +405,21 @@ static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char
 	if (opt->relative && opt->prefix_length) {
 		quote_path_relative(name + tree_name_len, -1, &pathbuf, opt->prefix);
 		strbuf_insert(&pathbuf, 0, name, tree_name_len);
-		name = pathbuf.buf;
+	} else {
+		strbuf_addstr(&pathbuf, name);
 	}
-	hit = grep_buffer(opt, name, data, size);
-	strbuf_release(&pathbuf);
-	free(data);
+
+	hit = grep_buffer_internal(opt, strbuf_detach(&pathbuf, NULL),
+				   data, size);
+
 	return hit;
 }
 
-static int grep_file(struct grep_opt *opt, const char *filename)
+static void* load_file(const char *filename, size_t *sz)
 {
 	struct stat st;
+	char* data;
 	int i;
-	char *data;
-	size_t sz;
-	struct strbuf buf = STRBUF_INIT;
 
 	if (lstat(filename, &st) < 0) {
 	err_ret:
@@ -183,24 +429,47 @@ static int grep_file(struct grep_opt *opt, const char *filename)
 	}
 	if (!S_ISREG(st.st_mode))
 		return 0;
-	sz = xsize_t(st.st_size);
+	*sz = xsize_t(st.st_size);
 	i = open(filename, O_RDONLY);
 	if (i < 0)
 		goto err_ret;
-	data = xmalloc(sz + 1);
-	if (st.st_size != read_in_full(i, data, sz)) {
+	data = xmalloc(*sz + 1);
+	data[*sz] = 0;
+	if (st.st_size != read_in_full(i, data, *sz)) {
 		error("'%s': short read %s", filename, strerror(errno));
 		close(i);
 		free(data);
 		return 0;
 	}
 	close(i);
+	return data;
+}
+
+static int grep_file(struct grep_opt *opt, const char *filename)
+{
+	char *data;
+	size_t sz;
+	struct strbuf buf = STRBUF_INIT;
+	char *name;
+
 	if (opt->relative && opt->prefix_length)
-		filename = quote_path_relative(filename, -1, &buf, opt->prefix);
-	i = grep_buffer(opt, filename, data, sz);
-	strbuf_release(&buf);
-	free(data);
-	return i;
+		quote_path_relative(filename, -1, &buf, opt->prefix);
+	else
+		strbuf_addstr(&buf, filename);
+	name = strbuf_detach(&buf, NULL);
+
+#ifndef NO_PTHREADS
+	if (use_threads) {
+		return grep_file_async(opt, name, filename);
+	} else
+#endif
+	{
+		data = load_file(filename, &sz);
+		if (!data)
+			return 0;
+
+		return grep_buffer_internal(opt, name, data, sz);
+	}
 }
 
 static int grep_cache(struct grep_opt *opt, const char **paths, int cached)
@@ -546,6 +815,20 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 		opt.regflags |= REG_ICASE;
 	if ((opt.regflags != REG_NEWLINE) && opt.fixed)
 		die("cannot mix --fixed-strings and regexp");
+
+#ifndef NO_PTHREADS
+	if (!grep_threads_ok(&opt))
+		use_threads = 0;
+
+	if (online_cpus() == 1)
+		use_threads = 0;
+
+	if (use_threads)
+		start_threads(&opt);
+#else
+	use_threads = 0;
+#endif
+
 	compile_grep_patterns(&opt);
 
 	/* Check revs and then paths */
@@ -583,9 +866,14 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 	}
 
 	if (!list.nr) {
+		int hit;
 		if (!cached)
 			setup_work_tree();
-		return !grep_cache(&opt, paths, cached);
+
+		hit = grep_cache(&opt, paths, cached);
+		if (use_threads)
+			hit |= wait_all();
+		return !hit;
 	}
 
 	if (cached)
@@ -597,6 +885,9 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 		if (grep_object(&opt, paths, real_obj, list.objects[i].name))
 			hit = 1;
 	}
+
+	if (use_threads)
+		hit |= wait_all();
 	free_grep_patterns(&opt);
 	return !hit;
 }
diff --git a/grep.c b/grep.c
index 8e1f7de..05aab15 100644
--- a/grep.c
+++ b/grep.c
@@ -29,6 +29,28 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat,
 	p->next = NULL;
 }
 
+struct grep_opt* grep_opt_dup(const struct grep_opt *opt)
+{
+	struct grep_pat *pat;
+	struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
+	*ret = *opt;
+
+	ret->pattern_list = NULL;
+	ret->pattern_tail = &ret->pattern_list;
+
+	for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
+	{
+		if(pat->token == GREP_PATTERN_HEAD)
+			append_header_grep_pattern(ret, pat->field,
+						   pat->pattern);
+		else
+			append_grep_pattern(ret, pat->pattern, pat->origin,
+					    pat->no, pat->token);
+	}
+
+	return ret;
+}
+
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
 	int err;
@@ -253,7 +275,8 @@ static int word_char(char ch)
 
 static void show_name(struct grep_opt *opt, const char *name)
 {
-	printf("%s%c", name, opt->null_following_name ? '\0' : '\n');
+	opt->output(opt, name, strlen(name));
+	opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
 }
 
 
@@ -490,24 +513,32 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol,
 		      const char *name, unsigned lno, char sign)
 {
 	int rest = eol - bol;
+	char sign_str[1];
 
+	sign_str[0] = sign;
 	if (opt->pre_context || opt->post_context) {
 		if (opt->last_shown == 0) {
 			if (opt->show_hunk_mark)
-				fputs("--\n", stdout);
+				opt->output(opt, "--\n", 3);
 			else
 				opt->show_hunk_mark = 1;
 		} else if (lno > opt->last_shown + 1)
-			fputs("--\n", stdout);
+			opt->output(opt, "--\n", 3);
 	}
 	opt->last_shown = lno;
 
 	if (opt->null_following_name)
-		sign = '\0';
-	if (opt->pathname)
-		printf("%s%c", name, sign);
-	if (opt->linenum)
-		printf("%d%c", lno, sign);
+		sign_str[0] = '\0';
+	if (opt->pathname) {
+		opt->output(opt, name, strlen(name));
+		opt->output(opt, sign_str, 1);
+	}
+	if (opt->linenum) {
+		char buf[32];
+		snprintf(buf, sizeof(buf), "%d", lno);
+		opt->output(opt, buf, strlen(buf));
+		opt->output(opt, sign_str, 1);
+	}
 	if (opt->color) {
 		regmatch_t match;
 		enum grep_context ctx = GREP_CONTEXT_BODY;
@@ -518,18 +549,22 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol,
 		while (next_match(opt, bol, eol, ctx, &match, eflags)) {
 			if (match.rm_so == match.rm_eo)
 				break;
-			printf("%.*s%s%.*s%s",
-			       (int)match.rm_so, bol,
-			       opt->color_match,
-			       (int)(match.rm_eo - match.rm_so), bol + match.rm_so,
-			       GIT_COLOR_RESET);
+
+			opt->output(opt, bol, match.rm_so);
+			opt->output(opt, opt->color_match,
+				strlen(opt->color_match));
+			opt->output(opt, bol + match.rm_so,
+				(int)(match.rm_eo - match.rm_so));
+			opt->output(opt, GIT_COLOR_RESET,
+				    strlen(GIT_COLOR_RESET));
 			bol += match.rm_eo;
 			rest -= match.rm_eo;
 			eflags = REG_NOTBOL;
 		}
 		*eol = ch;
 	}
-	printf("%.*s\n", rest, bol);
+	opt->output(opt, bol, rest);
+	opt->output(opt, "\n", 1);
 }
 
 static int match_funcname(struct grep_opt *opt, char *bol, char *eol)
@@ -667,6 +702,30 @@ static int look_ahead(struct grep_opt *opt,
 	return 0;
 }
 
+int grep_threads_ok(const struct grep_opt *opt)
+{
+	/* If this condition is true, then we may use the attribute
+	   machinery in grep_buffer_1. The attribute code is not
+	   thread safe, so we disable the use of threads. */
+	if (opt->funcname && !opt->unmatch_name_only && !opt->status_only &&
+	    !opt->name_only)
+		return 0;
+
+	/* If we are showing hunk marks, we should not do it for the
+	   first match. The synchronization problem we get for this
+	   constraint is not yet solved, so we disable threading in
+	   this case. */
+	if (opt->pre_context || opt->post_context)
+		return 0;
+
+	return 1;
+}
+
+static void std_output(struct grep_opt *opt, const void* buf, size_t size)
+{
+	fwrite(buf, size, 1, stdout);
+}
+
 static int grep_buffer_1(struct grep_opt *opt, const char *name,
 			 char *buf, unsigned long size, int collect_hits)
 {
@@ -682,6 +741,9 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
 
 	opt->last_shown = 0;
 
+	if (!opt->output)
+		opt->output = std_output;
+
 	if (buffer_is_binary(buf, size)) {
 		switch (opt->binary) {
 		case GREP_BINARY_DEFAULT:
@@ -754,7 +816,9 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
 			if (opt->status_only)
 				return 1;
 			if (binary_match_only) {
-				printf("Binary file %s matches\n", name);
+				opt->output(opt, "Binary file ", 12);
+				opt->output(opt, name, strlen(name));
+				opt->output(opt, " matches\n", 9);
 				return 1;
 			}
 			if (opt->name_only) {
@@ -810,9 +874,13 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
 	 * which feels mostly useless but sometimes useful.  Maybe
 	 * make it another option?  For now suppress them.
 	 */
-	if (opt->count && count)
-		printf("%s%c%u\n", name,
-		       opt->null_following_name ? '\0' : ':', count);
+	if (opt->count && count) {
+		char buf[32];
+		opt->output(opt, name, strlen(name));
+		snprintf(buf, sizeof(buf), "%c%u\n",
+			opt->null_following_name ? '\0' : ':', count);
+		opt->output(opt, buf, strlen(buf));
+	}
 	return !!last_hit;
 }
 
diff --git a/grep.h b/grep.h
index 0c61b00..6517909 100644
--- a/grep.h
+++ b/grep.h
@@ -91,6 +91,9 @@ struct grep_opt {
 	unsigned last_shown;
 	int show_hunk_mark;
 	void *priv;
+
+	void (*output)(struct grep_opt*, const void* data, size_t size);
+	void *output_priv;
 };
 
 extern void append_grep_pattern(struct grep_opt *opt, const char *pat, const char *origin, int no, enum grep_pat_token t);
@@ -99,4 +102,7 @@ extern void compile_grep_patterns(struct grep_opt *opt);
 extern void free_grep_patterns(struct grep_opt *opt);
 extern int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size);
 
+extern struct grep_opt* grep_opt_dup(const struct grep_opt *opt);
+extern int grep_threads_ok(const struct grep_opt *opt);
+
 #endif

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] Threaded grep
  2010-01-18 10:33 [PATCH v3] Threaded grep Fredrik Kuivinen
@ 2010-01-18 11:11 ` Johannes Sixt
  2010-01-18 13:28   ` Fredrik Kuivinen
  2010-01-18 18:05 ` Linus Torvalds
       [not found] ` <7vmy0bhxn1.fsf@alter.siamese.dyndns.org>
  2 siblings, 1 reply; 10+ messages in thread
From: Johannes Sixt @ 2010-01-18 11:11 UTC (permalink / raw)
  To: Fredrik Kuivinen
  Cc: git, Junio C Hamano, Linus Torvalds, Andrzej K. Haczewski

Fredrik Kuivinen schrieb:
> The patch has been rebased on top of next and I believe that it is now
> ready for inclusion. It is time to decide if the added code complexity
> is worth the increased performance.

I also have to add a few nits to make this play better on Windows.

> +/* This lock protects all the variables above. */
> +static pthread_mutex_t grep_lock = PTHREAD_MUTEX_INITIALIZER;
> +
> +/* Signalled when a new work_item is added to todo. */
> +static pthread_cond_t cond_add = PTHREAD_COND_INITIALIZER;
> +
> +/* Signalled when the result from one work_item is written to
> +   stdout. */
> +static pthread_cond_t cond_write = PTHREAD_COND_INITIALIZER;
> +
> +/* Signalled when we are finished with everything. */
> +static pthread_cond_t cond_result = PTHREAD_COND_INITIALIZER;

Please do not use PTHREAD_MUTEX_INITIALIZER nor PTHREAD_COND_INITIALIZER;
call pthread_mutex_init and pthread_cond_init (and the corresponding
*_destroy!!) from the code.

> +static void add_work(enum work_type type, char *name, char *buf, size_t size)
> +{
> +...
> +	pthread_mutex_unlock(&grep_lock);
> +	pthread_cond_signal(&cond_add);

Please swap these two lines, so that pthread_cond_signal() is called while
the lock is held.

> +	/* Wake up all the consumer threads so they can see that there
> +	   is no more work to do. */
> +	pthread_cond_broadcast(&cond_add);
> +	pthread_mutex_unlock(&grep_lock);

Ouch! We do not have pthread_cond_broadcast() on Windows, yet. This
shouldn't be a stopper for this patch, though. Perhaps Andrzej (cc:ed) can
implement it?

-- Hannes

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] Threaded grep
  2010-01-18 11:11 ` Johannes Sixt
@ 2010-01-18 13:28   ` Fredrik Kuivinen
  2010-01-18 13:45     ` Johannes Sixt
  0 siblings, 1 reply; 10+ messages in thread
From: Fredrik Kuivinen @ 2010-01-18 13:28 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: git, Junio C Hamano, Linus Torvalds, Andrzej K. Haczewski

On Mon, Jan 18, 2010 at 12:11, Johannes Sixt <j.sixt@viscovery.net> wrote:
> Fredrik Kuivinen schrieb:
>> +/* This lock protects all the variables above. */
>> +static pthread_mutex_t grep_lock = PTHREAD_MUTEX_INITIALIZER;
>> +
>> +/* Signalled when a new work_item is added to todo. */
>> +static pthread_cond_t cond_add = PTHREAD_COND_INITIALIZER;
>> +
>> +/* Signalled when the result from one work_item is written to
>> +   stdout. */
>> +static pthread_cond_t cond_write = PTHREAD_COND_INITIALIZER;
>> +
>> +/* Signalled when we are finished with everything. */
>> +static pthread_cond_t cond_result = PTHREAD_COND_INITIALIZER;
>
> Please do not use PTHREAD_MUTEX_INITIALIZER nor PTHREAD_COND_INITIALIZER;
> call pthread_mutex_init and pthread_cond_init (and the corresponding
> *_destroy!!) from the code.

Ok. Will fix.

>> +static void add_work(enum work_type type, char *name, char *buf, size_t size)
>> +{
>> +...
>> +     pthread_mutex_unlock(&grep_lock);
>> +     pthread_cond_signal(&cond_add);
>
> Please swap these two lines, so that pthread_cond_signal() is called while
> the lock is held.

May I ask why? (just curious)


Thanks for your comments.

- Fredrik

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] Threaded grep
  2010-01-18 13:28   ` Fredrik Kuivinen
@ 2010-01-18 13:45     ` Johannes Sixt
  0 siblings, 0 replies; 10+ messages in thread
From: Johannes Sixt @ 2010-01-18 13:45 UTC (permalink / raw)
  To: Fredrik Kuivinen
  Cc: git, Junio C Hamano, Linus Torvalds, Andrzej K. Haczewski

Fredrik Kuivinen schrieb:
> On Mon, Jan 18, 2010 at 12:11, Johannes Sixt <j.sixt@viscovery.net> wrote:
>> Fredrik Kuivinen schrieb:
>>> +     pthread_mutex_unlock(&grep_lock);
>>> +     pthread_cond_signal(&cond_add);
>> Please swap these two lines, so that pthread_cond_signal() is called while
>> the lock is held.
> 
> May I ask why? (just curious)

Because our pthreads_cond_* functions on Windows are not POSIXly correct
and work reliably only when pthread_cond_signal() is called while the
mutex is held.

-- Hannes

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] Threaded grep
  2010-01-18 10:33 [PATCH v3] Threaded grep Fredrik Kuivinen
  2010-01-18 11:11 ` Johannes Sixt
@ 2010-01-18 18:05 ` Linus Torvalds
  2010-01-19  7:34   ` Fredrik Kuivinen
       [not found] ` <7vmy0bhxn1.fsf@alter.siamese.dyndns.org>
  2 siblings, 1 reply; 10+ messages in thread
From: Linus Torvalds @ 2010-01-18 18:05 UTC (permalink / raw)
  To: Fredrik Kuivinen; +Cc: git, Junio C Hamano



On Mon, 18 Jan 2010, Fredrik Kuivinen wrote:
>
> Make git grep use threads when it is available.

Ok, this is much better.

On my machine (4 cores with HT, so 8 threads total), I get the 
following:

 [ Note: the --no-ext-grep is because I'm comparing with a git that has 
  the original grep optimization, but hasn't removed the external grep 
  functionality yet. I just used the same command line when then comparing 
  to next+your patch, so don't mind it.

  Also, the three examples are chosen to be: "trivial single match", 
  "regex single match", and "trivial lots of matches" ]

Before (all best-of-five), with the non-threaded internal grep:

 - git grep --no-ext-grep qwerty

	real	0m0.311s
	user	0m0.164s
	sys	0m0.144s

 - git grep --no-ext-grep qwerty.*as

	real	0m0.555s
	user	0m0.416s
	sys	0m0.132s

 - git grep --no-ext-grep function

	real	0m0.461s
	user	0m0.276s
	sys	0m0.180s

After, with threading:

 - git grep --no-ext-grep qwerty

	real	0m0.152s
	user	0m0.788s
	sys	0m0.212s

 - git grep --no-ext-grep qwerty.*as

	real	0m0.148s
	user	0m0.724s
	sys	0m0.284s

 - git grep --no-ext-grep function

	real	0m0.241s
	user	0m1.436s
	sys	0m0.216s

so now it's a clear win in all cases. And as you'd expect, the "complex 
case with single output" is the one that wins most, since it's the one 
that should have the most CPU usage, with the least synchronization.

That said, it still does waste quite a bit of time doing this, and while 
it almost doubles performance in the last case too, it does so at the cost 
of quite a bit of overhead.

(Some overhead is natural, especially since I have HT. Running two threads 
on the same core does _not_ give double the performance, so the CPU time 
almost doubling - because the threads themselves run slower - is not 
unexpected. It's when the CPU time more than quadruples that I suspect 
that it could be improved a bit).

		Linus

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] Threaded grep
       [not found] ` <7vmy0bhxn1.fsf@alter.siamese.dyndns.org>
@ 2010-01-19  0:12   ` Fredrik Kuivinen
  2010-01-19  7:03     ` Johannes Sixt
  2010-01-25 22:47     ` Fredrik Kuivinen
  0 siblings, 2 replies; 10+ messages in thread
From: Fredrik Kuivinen @ 2010-01-19  0:12 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Linus Torvalds, Git Mailing List, Johannes Sixt

[I am adding git@vger to the Cc list as this may be interesting to
others as well.]

On Mon, Jan 18, 2010 at 23:10, Junio C Hamano <gitster@pobox.com> wrote:
> Fredrik Kuivinen <frekui@gmail.com> writes:
>
>> diff --git a/builtin-grep.c b/builtin-grep.c
>> index 24ae1ce..dc07e9e 100644
>> --- a/builtin-grep.c
>> +++ b/builtin-grep.c
>> @@ -15,11 +15,257 @@
>>  #include "grep.h"
>>  #include "quote.h"
>>
>> +#ifndef NO_PTHREADS
>> +#include "thread-utils.h"
>> +#include <pthread.h>
>> +#endif
>> +
>>  static char const * const grep_usage[] = {
>>       "git grep [options] [-e] <pattern> [<rev>...] [[--] path...]",
>>       NULL
>>  };
>>
>> +static int use_threads = 1;
>> +
>> +#ifndef NO_PTHREADS
>> +#define THREADS 8
>> +static pthread_t threads[THREADS];
>
> I had to wonder why builtin-pack-objects.c doesn't have an array like
> this.  It uses an array of structure each of which bundles per-thread
> work item data and pthread_t.  Would an arrangement like that make it
> easier to read for this patch as well?

From a cursory look at builtin-pack-objects.c I think it depends on
how you structure the work you are about to do.

In builtin-pack-objects.c the work seems to be divided evenly among
the threads at the start. When some thread don't have anything to do
it then steals work from some other thread.

In the current patch we instead add work to a FIFO from one thread and
then the consumer threads pick work from the FIFO. I think the current
structure suits the FIFO style quite well.

>> +static void* load_file(const char *filename, size_t *sz);
>
> Style: asterisks stick to identifiers, not types.  There are other
> places with the same issue in the patch, e.g.
>> +static struct work_item* get_work()
> that I'll omit from the rest of my comments.

Will fix. I looked carefully at the arguments, but forgot the return types.

>> +enum work_type {WORK_BUF, WORK_FILE};
>> +
>> +/* We use one producer thread and number_of_threads consumer
>> +   threads. The producer adds struct work_items to 'todo' and the
>> +   consumers pick work items from the same array. */
>
> Style:
>
>        /*
>         * We write multi line comments
>         * like this.
>         */

Will fix.

>> +#define TODO_SIZE 128
>
> How was this size tuned?  Can everybody (from teeny machines to beefy
> ones) use the same setting?

I simply increased it until I didn't see any more significant
performance improvements on two the boxes I have access to. As I only
have access to two boxes, it can most certainly be better tuned for
boxes with significantly different characteristics. However, I think
that 128 is reasonable in the sense that it gives a nice improvement
for everyone with more than one core.

>> +static struct work_item todo[TODO_SIZE];
>> +static int todo_start = 0;
>
> We try not to explicitly initialize statics to 0 and let BSS segment
> take care of them.

Will fix.

>> +/* This lock protects all the variables above. */
>> +static pthread_mutex_t grep_lock = PTHREAD_MUTEX_INITIALIZER;
>
> J6t had comments on these initializers; do they also apply to
> builtin-pack-objects.c?

I believe so, but I do not know. Johannes?

>> +/* This function takes ownership of 'name' and 'buf'. They will be
>> +   deallocated with free. */
>> +static int grep_buffer_async(struct grep_opt *opt, char *name, char *buf,
>> +                          size_t size)
>> +{
>> +     add_work(WORK_BUF, name, buf, size);
>> +     return 0;
>> +}
>> +
>> +static int grep_file_async(struct grep_opt *opt, char *name,
>> +                        const char *filename)
>> +{
>> +     add_work(WORK_FILE, name, (char*) filename, 0);
>> +     return 0;
>> +}
>
> If I am reading the code correctly, it seems that you read quite many
> blobs in core and queue them in todo[] (up to the size limit of the
> array), and the worker bees in run() consumes them.  Shouldn't the
> rate of feeding blobs be limited in some way (other than just having a
> fixed TODO_SIZE, but somehow linked to the rate the worker bees finish
> their work) to avoid wasting memory like this?  One approach might be
> to queue only the blob SHA-1 not the data in work item queue just like
> you queue only the filename not the contents, and have the worker bee
> to read the blob into memory.

Hm, yes that would be an improvement. One just have serialize the
calls to read_sha1_file (as I don't think read_sha1_file safely can be
called simultaneously from multiple threads).

>> ...
>> +static int grep_buffer_internal(struct grep_opt *opt, char *name, char *buf,
>> +                             size_t size)
>> +{
>> +#ifndef NO_PTHREADS
>> +     if (use_threads)
>> +             return grep_buffer_async(opt, name, buf, size);
>> +#endif
>> +     {
>> +             int hit = grep_buffer(opt, name, buf, size);
>> +             free(name);
>> +             free(buf);
>> +             return hit;
>> +     }
>> +}
>> +
>>  static int grep_config(const char *var, const char *value, void *cb)
>>  {
>>       struct grep_opt *opt = cb;
>> @@ -159,21 +405,21 @@ static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char
>>       if (opt->relative && opt->prefix_length) {
>>               quote_path_relative(name + tree_name_len, -1, &pathbuf, opt->prefix);
>>               strbuf_insert(&pathbuf, 0, name, tree_name_len);
>> -             name = pathbuf.buf;
>> +     } else {
>> +             strbuf_addstr(&pathbuf, name);
>>       }
>> -     hit = grep_buffer(opt, name, data, size);
>> -     strbuf_release(&pathbuf);
>> -     free(data);
>> +
>> +     hit = grep_buffer_internal(opt, strbuf_detach(&pathbuf, NULL),
>> +                                data, size);
>> +
>>       return hit;
>>  }
>
> This is quite a puzzling code.  Your grep_buffer_internal() returns
> the return value from grep_buffer_async() which always return 0 but
> then that 0 is returned to the caller as "we didn't see any hit".
> What happens if we start optimizing for a case where the end user is
> interested only in one bit of information "does it find anything, or
> is there absolutely no hits?" and wanted to add an early return
> codepath to grep_tree() and grep_cache() so that we say "Ah, we have
> seen a hit, so we don't have to look in others"?
>
> IOW, this patch makes the variable "hit" meaningless and confuses the
> readers.  This actually is the point that most worries me from longer
> term maintenance point of view.

The idea was to keep the current calling conventions as much as
possible, so that grep_cache and grep_tree could stay nearly
unchanged. So hit = 1 now means "we found something" and hit = 0 means
"we didn't find anything right now, but we may find something later
for this entry". The early return you sketched above will still
produce correct results, but it will not be an optimization for the
threaded code.

With the patch the return value from grep_file and grep_sha1 has to be
ORed with the return value from wait_all to determine if we found
something.

> How is the order of output controlled by the way?  I see you are
> keeping more than one threads from doing write_or_die() of the
> accumulated output at the same time with a mutex in work_done(), but
> if we have large files whose names come near the beginning and then a
> small file whose name come later in the input, we should defer the
> output of hits from the later smaller one before the earlier larger
> ones even if the worker bee assigned to the smaller one finishes while
> the other worker bees are still searching in the larger ones; it is
> not obvious to me where you are guaranteeing that ordering.

The output order is controlled by todo_done, todo_start, and work_item::done.

The rule is that the range [todo_done, todo_start) (modulo
ARRAY_SIZE(todo)) in "todo" contains work_items have been or are
processed by a consumer thread. If a particular work_item has been
processed, then .done = 1 and otherwise .done = 0. When a work_item is
marked as done in "work_done" we traverse a prefix of the range
[todo_done, todo_start): we stop as soon as .done = 0. We then set
todo_done to the first work_item with .done = 0.

In your example todo_done = 0 and todo_start = 2. todo[0] represents a
large file and todo[1] represents a small file. If the small file gets
done first we set todo[1].done to 1 in work_done. As todo_done = 0 and
todo[0].done = 0 we will not write any output now. When work_done is
called for todo[0], we set todo[0].done to 1. We will then write the
output for todo[0] (as todo_done = 0 and todo[0].done = 1) and after
that we write the output for todo[1].

In the current patch the mutex grep_lock is held when the output is
written. I'm guessing that this unnecessary serialization is the cause
of the waste of CPU time Linus is seeing. (Strangely enough I haven't
observed this phenomena on the boxes I have tested it on.)

> I am wondering if this can somehow be made into a change with alot
> smaller inpact, in spirit similar to "preloading".  The higher level
> loop may walk the input (be it cache, tree, or directory), issuing one
> grep_file() or grep_sha1() at a time just like the existing code, but
> the thread-aware code adds "priming" phase that (1) populates a "work
> queue" with a very small memory footprint, and that (2) starts more
> than one underlying grep on different files and blobs (up to the
> number of threads you are allowed, like your "#define THREADS 8"), and
> that (3) upon completion of one "work queue" item, the work queue item
> is marked with its result.
>
> Then grep_file() and grep_sha1() _may_ notice that the work queue item
> hasn't completed, and would wait in such a case, or just emits the
> output produced earlier (could be much earlier) by the worker bee.
>
> The low-memory-footprint work queue could be implemented as a lazy
> one, and may be very different depending on how the input is created.
> If we are grepping in the index, it could be a small array of <array
> index in active_cache[], done-bit, result-bit, result-strbuf> with a
> single integer that is a pointer into the index to indicate up to
> which index entry the work queue has been populated.

I will have to think about this a bit more. It's getting late here.

- Fredrik

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] Threaded grep
  2010-01-19  0:12   ` Fredrik Kuivinen
@ 2010-01-19  7:03     ` Johannes Sixt
  2010-01-25 22:47     ` Fredrik Kuivinen
  1 sibling, 0 replies; 10+ messages in thread
From: Johannes Sixt @ 2010-01-19  7:03 UTC (permalink / raw)
  To: Fredrik Kuivinen; +Cc: Junio C Hamano, Linus Torvalds, Git Mailing List

Fredrik Kuivinen schrieb:
> On Mon, Jan 18, 2010 at 23:10, Junio C Hamano <gitster@pobox.com> wrote:
>> Fredrik Kuivinen <frekui@gmail.com> writes:
>>> +/* This lock protects all the variables above. */
>>> +static pthread_mutex_t grep_lock = PTHREAD_MUTEX_INITIALIZER;
>> J6t had comments on these initializers; do they also apply to
>> builtin-pack-objects.c?
> 
> I believe so, but I do not know. Johannes?

44626dc (MSVC: Windows-native implementation for subset of Pthreads API)
that is currently queued in pu makes the necessary adjustments to
builtin-pack-objects.c.

-- Hannes

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] Threaded grep
  2010-01-18 18:05 ` Linus Torvalds
@ 2010-01-19  7:34   ` Fredrik Kuivinen
  2010-01-19 15:41     ` Linus Torvalds
  0 siblings, 1 reply; 10+ messages in thread
From: Fredrik Kuivinen @ 2010-01-19  7:34 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git, Junio C Hamano

On Mon, Jan 18, 2010 at 10:05:19AM -0800, Linus Torvalds wrote:
> On my machine (4 cores with HT, so 8 threads total), I get the 
> following:
> 
>  [ Note: the --no-ext-grep is because I'm comparing with a git that has 
>   the original grep optimization, but hasn't removed the external grep 
>   functionality yet. I just used the same command line when then comparing 
>   to next+your patch, so don't mind it.
> 
>   Also, the three examples are chosen to be: "trivial single match", 
>   "regex single match", and "trivial lots of matches" ]


You may be testing slightly different things: next has 885d211 (grep:
rip out pessimization to use fixmatch()) and bbc09c2 (grep: rip out
support for external grep) was added before. So next+patch uses
regexec and the version with support for external greps uses
strstr. IIRC strstr was a bit faster than regexec on your
box. However, this only explains a small part of the results you are
seeing.

> 
> Before (all best-of-five), with the non-threaded internal grep:
> 
>  - git grep --no-ext-grep qwerty
> 
> 	real	0m0.311s
> 	user	0m0.164s
> 	sys	0m0.144s
> 
>  - git grep --no-ext-grep qwerty.*as
> 
> 	real	0m0.555s
> 	user	0m0.416s
> 	sys	0m0.132s
> 
>  - git grep --no-ext-grep function
> 
> 	real	0m0.461s
> 	user	0m0.276s
> 	sys	0m0.180s
> 
> After, with threading:
> 
>  - git grep --no-ext-grep qwerty
> 
> 	real	0m0.152s
> 	user	0m0.788s
> 	sys	0m0.212s
> 
>  - git grep --no-ext-grep qwerty.*as
> 
> 	real	0m0.148s
> 	user	0m0.724s
> 	sys	0m0.284s
> 
>  - git grep --no-ext-grep function
> 
> 	real	0m0.241s
> 	user	0m1.436s
> 	sys	0m0.216s
> 
> so now it's a clear win in all cases. And as you'd expect, the "complex 
> case with single output" is the one that wins most, since it's the one 
> that should have the most CPU usage, with the least synchronization.
> 
> That said, it still does waste quite a bit of time doing this, and while 
> it almost doubles performance in the last case too, it does so at the cost 
> of quite a bit of overhead.
> 
> (Some overhead is natural, especially since I have HT. Running two threads 
> on the same core does _not_ give double the performance, so the CPU time 
> almost doubling - because the threads themselves run slower - is not 
> unexpected. It's when the CPU time more than quadruples that I suspect 
> that it could be improved a bit).


I haven't seen this overhead during my tests. But I'm _guessing_ that
the problem is that the mutex grep_lock is held while the result is
written to stdout. So when we are writing, no significant amount of
work can be done by any thread. Here is a patch to fix this (applies
on to of v3).



diff --git a/builtin-grep.c b/builtin-grep.c
index 8fca7a6..422b0ec 100644
--- a/builtin-grep.c
+++ b/builtin-grep.c
@@ -139,21 +139,48 @@ static int grep_file_async(struct grep_opt *opt, char *name,
 	return 0;
 }
 
+/* Mark the work_item as done. The function guarantees that w->done is
+ * set to 1 and that if w is included in a prefix of the range
+ * [todo_done, todo_start) where each work_item has .done == 1, then
+ * w->out will eventually be written to stdout. The writing is done
+ * either by this thread or by the thread which has set 'writing' to
+ * 1.
+ */
 static void work_done(struct work_item *w)
 {
-	int old_done;
+	/* 1 if there is a thread which is writing data to stdout and
+	   0 otherwise. */
+	static int writing = 0;
+	int old_done, write_idx, write_to;
 
 	pthread_mutex_lock(&grep_lock);
 	w->done = 1;
 	old_done = todo_done;
-	for(; todo[todo_done].done && todo_done != todo_start;
-	    todo_done = (todo_done+1) % ARRAY_SIZE(todo)) {
-		w = &todo[todo_done];
-		write_or_die(1, w->out.buf, w->out.len);
-		if (w->type == WORK_BUF)
-			free(w->buf);
-
-		free(w->name);
+
+	/* We need a loop here if todo_start is changed while we write
+	 * some data. */
+	while (!writing && todo[todo_done].done && todo_done != todo_start) {
+		write_idx = todo_done;
+		write_to = todo_start;
+		writing = 1;
+
+		/* Unlock the mutex while we write the data. This is
+		 * safe as writing == 1 and hence there is at most one
+		 * thread which writes data at any time. */
+		pthread_mutex_unlock(&grep_lock);
+		for(; todo[write_idx].done && write_idx != write_to;
+		    write_idx = (write_idx+1) % ARRAY_SIZE(todo)) {
+			w = &todo[write_idx];
+			write_or_die(1, w->out.buf, w->out.len);
+			if (w->type == WORK_BUF)
+				free(w->buf);
+		
+			free(w->name);
+		}
+
+		pthread_mutex_lock(&grep_lock);
+		writing = 0;
+		todo_done = write_idx;
 	}
 
 	if (old_done != todo_done)

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] Threaded grep
  2010-01-19  7:34   ` Fredrik Kuivinen
@ 2010-01-19 15:41     ` Linus Torvalds
  0 siblings, 0 replies; 10+ messages in thread
From: Linus Torvalds @ 2010-01-19 15:41 UTC (permalink / raw)
  To: Fredrik Kuivinen; +Cc: git, Junio C Hamano



On Tue, 19 Jan 2010, Fredrik Kuivinen wrote:
> > 
> >  - git grep --no-ext-grep function
> > 
> > 	real	0m0.241s
> > 	user	0m1.436s
> > 	sys	0m0.216s
> 
> I haven't seen this overhead during my tests. But I'm _guessing_ that
> the problem is that the mutex grep_lock is held while the result is
> written to stdout. So when we are writing, no significant amount of
> work can be done by any thread. Here is a patch to fix this (applies
> on to of v3).

No, I get basically the same timings with this patch:

	real	0m0.239s
	user	0m1.372s
	sys	0m0.280s

(that _may_ be a slight real improvement, but it's more likely that the 
changing in user/sys time is just noise).

But I do think that you're right that there's a difference in my timings, 
and I am comparing to the one that uses strstr(). Your patches didn't have 
a "disable threads" option that I could see, so I just compared against my 
old numbers. 

So I guess a better example is to use a "complex" grep pattern like 
'function.*a' which also gets a lot of hits, and the threaded case does 
look much better in comparison. Pre-threaded:

	real	0m0.921s
	user	0m0.728s
	sys	0m0.184s

so it's less than 2x the CPU time, and a 3.85x real-time improvement. And 
that "less than 2x the CPU time" is the factor I'd expect from HT.

It's also worth noting that at least _some_ versions of glibc would 
actually use different versions of subroutines for the threaded vs 
non-threaded case, ie they'd avoid locking overhead when they know they 
aren't running with threads. So things like stdio actually got slower when 
you did any pthreads things, because suddenly glibc knew that it needed to 
do locking around the functions.

			Linus

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3] Threaded grep
  2010-01-19  0:12   ` Fredrik Kuivinen
  2010-01-19  7:03     ` Johannes Sixt
@ 2010-01-25 22:47     ` Fredrik Kuivinen
  1 sibling, 0 replies; 10+ messages in thread
From: Fredrik Kuivinen @ 2010-01-25 22:47 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Linus Torvalds, Git Mailing List, Johannes Sixt

On Tue, Jan 19, 2010 at 01:12, Fredrik Kuivinen <frekui@gmail.com> wrote:
> On Mon, Jan 18, 2010 at 23:10, Junio C Hamano <gitster@pobox.com> wrote:
>>
>> I am wondering if this can somehow be made into a change with alot
>> smaller inpact, in spirit similar to "preloading".  The higher level
>> loop may walk the input (be it cache, tree, or directory), issuing one
>> grep_file() or grep_sha1() at a time just like the existing code, but
>> the thread-aware code adds "priming" phase that (1) populates a "work
>> queue" with a very small memory footprint, and that (2) starts more
>> than one underlying grep on different files and blobs (up to the
>> number of threads you are allowed, like your "#define THREADS 8"), and
>> that (3) upon completion of one "work queue" item, the work queue item
>> is marked with its result.
>>
>> Then grep_file() and grep_sha1() _may_ notice that the work queue item
>> hasn't completed, and would wait in such a case, or just emits the
>> output produced earlier (could be much earlier) by the worker bee.
>>
>> The low-memory-footprint work queue could be implemented as a lazy
>> one, and may be very different depending on how the input is created.
>> If we are grepping in the index, it could be a small array of <array
>> index in active_cache[], done-bit, result-bit, result-strbuf> with a
>> single integer that is a pointer into the index to indicate up to
>> which index entry the work queue has been populated.
>
> I will have to think about this a bit more. It's getting late here.

I will be sending v4 of the patch in a couple of minutes, but I want
to comment on this first.

Sure, it is probably possible to structure the code as you suggest.
However, I am not so sure that it will be a significantly smaller
change. I find the approach taken in the patch to be quite nice as the
threaded and non-threaded cases are fairly similar. There is a block
of code which deals with the threading, but that part is mostly
self-contained. In v4 I have not changed the overall approach to the
problem.

- Fredrik

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2010-01-25 22:47 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-01-18 10:33 [PATCH v3] Threaded grep Fredrik Kuivinen
2010-01-18 11:11 ` Johannes Sixt
2010-01-18 13:28   ` Fredrik Kuivinen
2010-01-18 13:45     ` Johannes Sixt
2010-01-18 18:05 ` Linus Torvalds
2010-01-19  7:34   ` Fredrik Kuivinen
2010-01-19 15:41     ` Linus Torvalds
     [not found] ` <7vmy0bhxn1.fsf@alter.siamese.dyndns.org>
2010-01-19  0:12   ` Fredrik Kuivinen
2010-01-19  7:03     ` Johannes Sixt
2010-01-25 22:47     ` Fredrik Kuivinen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).