git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Stefan Beller <sbeller@google.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, jrnieder@gmail.com, hvoigt@hvoigt.net,
	jens.lehmann@web.de, Stefan Beller <sbeller@google.com>
Subject: [RFC PATCH 2/3] run-commands: add an async queue processor
Date: Thu, 20 Aug 2015 18:40:36 -0700	[thread overview]
Message-ID: <1440121237-24576-2-git-send-email-sbeller@google.com> (raw)
In-Reply-To: <1440121237-24576-1-git-send-email-sbeller@google.com>

This adds functionality to do work in parallel.

The whole life cycle of such a thread pool would look like

    struct task_queue * tq = create_task_queue(32); // no of threads
    for (...)
        add_task(tq, process_one_item_function, item); // non blocking
    ...
    int ret = finish_task_queue(tq); // blocks until all tasks are done
    if (!tq)
        die ("Not all items were be processed");

The caller must take care of handling the output.

Signed-off-by: Stefan Beller <sbeller@google.com>
---

I sent this a while ago to the list, no comments on it :(
The core functionality stayed the same, but I hope to improved naming and
location of the code.

The WIP is only for the NO_PTHREADS case.


 run-command.c | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 run-command.h |  30 +++++++++
 2 files changed, 230 insertions(+), 12 deletions(-)

diff --git a/run-command.c b/run-command.c
index 28e1d55..4029011 100644
--- a/run-command.c
+++ b/run-command.c
@@ -4,6 +4,21 @@
 #include "sigchain.h"
 #include "argv-array.h"
 
+#ifdef NO_PTHREADS
+
+#else
+
+#include "thread-utils.h"
+
+#include <pthread.h>
+#include <semaphore.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#endif
+
+#include "git-compat-util.h"
+
 void child_process_init(struct child_process *child)
 {
 	memset(child, 0, sizeof(*child));
@@ -668,6 +683,22 @@ int git_atexit(void (*handler)(void))
 
 #endif
 
+void setup_main_thread()
+{
+	if (!main_thread_set) {
+		/*
+		 * We assume that the first time that start_async is called
+		 * it is from the main thread.
+		 */
+		main_thread_set = 1;
+		main_thread = pthread_self();
+		pthread_key_create(&async_key, NULL);
+		pthread_key_create(&async_die_counter, NULL);
+		set_die_routine(die_async);
+		set_die_is_recursing_routine(async_die_is_recursing);
+	}
+}
+
 int start_async(struct async *async)
 {
 	int need_in, need_out;
@@ -740,18 +771,7 @@ int start_async(struct async *async)
 	else if (async->out)
 		close(async->out);
 #else
-	if (!main_thread_set) {
-		/*
-		 * We assume that the first time that start_async is called
-		 * it is from the main thread.
-		 */
-		main_thread_set = 1;
-		main_thread = pthread_self();
-		pthread_key_create(&async_key, NULL);
-		pthread_key_create(&async_die_counter, NULL);
-		set_die_routine(die_async);
-		set_die_is_recursing_routine(async_die_is_recursing);
-	}
+	setup_main_thread();
 
 	if (proc_in >= 0)
 		set_cloexec(proc_in);
@@ -852,3 +872,171 @@ int capture_command(struct child_process *cmd, struct strbuf *buf, size_t hint)
 	close(cmd->out);
 	return finish_command(cmd);
 }
+
+#ifndef NO_PTHREADS
+struct job_list {
+	int (*fct)(struct task_queue *aq, void *task);
+	void *task;
+	struct job_list *next;
+};
+#endif
+
+struct task_queue {
+#ifndef NO_PTHREADS
+	/*
+	 * To avoid deadlocks always aquire the semaphores with lowest priority
+	 * first, priorites are in descending order as listed.
+	 *
+	 * The `mutex` is a general purpose lock for modifying data in the async
+	 * queue, such as adding a new task or adding a return value from
+	 * an already run task.
+	 *
+	 * `workingcount` and `freecount` are opposing semaphores, the sum of
+	 * their values should equal `max_threads` at any time while the `mutex`
+	 * is available.
+	 */
+	sem_t mutex;
+	sem_t workingcount;
+	sem_t freecount;
+
+	pthread_t *threads;
+	unsigned max_threads;
+
+	struct job_list *first;
+	struct job_list *last;
+#endif
+	int early_return;
+};
+
+#ifndef NO_PTHREADS
+
+static void get_task(struct task_queue *aq,
+		     int (**fct)(struct task_queue *aq, void *task),
+		     void **task,
+		     int *early_return)
+{
+	struct job_list *job;
+
+	sem_wait(&aq->workingcount);
+	sem_wait(&aq->mutex);
+
+	if (!aq->first)
+		die("BUG: internal error with dequeuing jobs for threads");
+	job = aq->first;
+	*fct = job->fct;
+	*task = job->task;
+	aq->early_return |= *early_return;
+	*early_return = aq->early_return;
+	aq->first = job->next;
+	if (!aq->first)
+		aq->last = NULL;
+
+	sem_post(&aq->freecount);
+	sem_post(&aq->mutex);
+
+	free(job);
+}
+
+static void* dispatcher(void *args)
+{
+	void *task;
+	int (*fct)(struct task_queue *aq, void *data);
+	int early_return = 0;
+	struct task_queue *aq = args;
+
+	get_task(aq, &fct, &task, &early_return);
+	while (fct || early_return != 0) {
+		early_return = fct(aq, task);
+		get_task(aq, &fct, &task, &early_return);
+	}
+
+	pthread_exit(0);
+}
+#endif
+
+struct task_queue *create_task_queue(unsigned max_threads)
+{
+	struct task_queue *aq = xmalloc(sizeof(*aq));
+
+#ifndef NO_PTHREADS
+	int i;
+	if (!max_threads)
+		aq->max_threads = online_cpus();
+	else
+		aq->max_threads = max_threads;
+
+	sem_init(&aq->mutex, 0, 1);
+	sem_init(&aq->workingcount, 0, 0);
+	sem_init(&aq->freecount, 0, aq->max_threads);
+	aq->threads = xmalloc(aq->max_threads * sizeof(pthread_t));
+
+	for (i = 0; i < aq->max_threads; i++)
+		pthread_create(&aq->threads[i], 0, &dispatcher, aq);
+
+	aq->first = NULL;
+	aq->last = NULL;
+
+	setup_main_thread();
+#endif
+	aq->early_return = 0;
+
+	return aq;
+}
+
+void add_task(struct task_queue *aq,
+	      int (*fct)(struct task_queue *aq, void *task),
+	      void *task)
+{
+#ifndef NO_PTHREADS
+	struct job_list *job_list;
+
+	job_list = xmalloc(sizeof(*job_list));
+	job_list->task = task;
+	job_list->fct = fct;
+	job_list->next = NULL;
+
+	sem_wait(&aq->freecount);
+	sem_wait(&aq->mutex);
+
+	if (!aq->last) {
+		aq->last = job_list;
+		aq->first = aq->last;
+	} else {
+		aq->last->next = job_list;
+		aq->last = aq->last->next;
+	}
+
+	sem_post(&aq->workingcount);
+	sem_post(&aq->mutex);
+#else
+	ALLOC_GROW(aq->ret->ret, aq->ret->count + 1, aq->ret->alloc);
+	aq->ret->ret[aq->ret->count++] = aq->function(job);
+#endif
+}
+
+int finish_task_queue(struct task_queue *aq)
+{
+	int ret;
+#ifndef NO_PTHREADS
+	int i;
+	for (i = 0; i < aq->max_threads; i++)
+		add_task(aq, NULL, NULL);
+
+	for (i = 0; i < aq->max_threads; i++)
+		pthread_join(aq->threads[i], 0);
+
+	sem_destroy(&aq->mutex);
+	sem_destroy(&aq->workingcount);
+	sem_destroy(&aq->freecount);
+
+	if (aq->first)
+		die("BUG: internal error with queuing jobs for threads");
+
+	free(aq->threads);
+#endif
+	ret = aq->early_return;
+
+	free(aq);
+	return ret;
+}
+
diff --git a/run-command.h b/run-command.h
index 5b4425a..c2cfd49 100644
--- a/run-command.h
+++ b/run-command.h
@@ -119,4 +119,34 @@ struct async {
 int start_async(struct async *async);
 int finish_async(struct async *async);
 
+/*
+ * Creates a struct `task_queue`, which holds a list of tasks. Up to
+ * `max_threads` threads are active to process the enqueued tasks
+ * processing the tasks in a first in first out order.
+ *
+ * If `max_threads` is zero the number of cores available will be used.
+ *
+ * Currently this only works in environments with pthreads, in other
+ * environments, the task will be processed sequentially in `add_task`.
+ */
+struct task_queue *create_task_queue(unsigned max_threads);
+
+/*
+ * The function and data are put into the task queue.
+ *
+ * The function `fct` must not be NULL, as that's used internally
+ * in `finish_task_queue` to signal shutdown. If the return code
+ * of `fct` is unequal to 0, the tasks will stop eventually,
+ * the current parallel tasks will be flushed out.
+ */
+void add_task(struct task_queue *aq,
+	      int (*fct)(struct task_queue *aq, void *task),
+	      void *task);
+
+/*
+ * Waits for all tasks to be done and frees the object. The return code
+ * is zero if all enqueued tasks were processed.
+ */
+int finish_task_queue(struct task_queue *aq);
+
 #endif
-- 
2.5.0.264.g01b5c38.dirty

  reply	other threads:[~2015-08-21  1:40 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-21  1:40 [PATCH 1/3] submodule: implement `module_clone` as a builtin helper Stefan Beller
2015-08-21  1:40 ` Stefan Beller [this message]
2015-08-21 19:05   ` [RFC PATCH 2/3] run-commands: add an async queue processor Junio C Hamano
2015-08-21 19:44     ` Jeff King
2015-08-21 19:48       ` Stefan Beller
2015-08-21 19:51         ` Jeff King
2015-08-21 20:12           ` Stefan Beller
2015-08-21 20:41       ` Junio C Hamano
2015-08-21 23:40       ` Stefan Beller
2015-08-24 21:22         ` Junio C Hamano
2015-08-21 19:45     ` Stefan Beller
2015-08-21 20:47       ` Junio C Hamano
2015-08-21 20:56         ` Stefan Beller
2015-08-21  1:40 ` [WIP/PATCH 3/3] submodule: helper to run foreach in parallel Stefan Beller
2015-08-21 19:23   ` Junio C Hamano
2015-08-21 20:21     ` Stefan Beller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1440121237-24576-2-git-send-email-sbeller@google.com \
    --to=sbeller@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=hvoigt@hvoigt.net \
    --cc=jens.lehmann@web.de \
    --cc=jrnieder@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).