[PATCH 1/2] Add support for multi threaded checkout

git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: James Pickens <james.e.pickens@intel.com>
To: git@vger.kernel.org
Cc: James Pickens <james.e.pickens@intel.com>
Subject: [PATCH 1/2] Add support for multi threaded checkout
Date: Thu, 18 Dec 2008 13:56:50 -0700	[thread overview]
Message-ID: <1229633811-3877-1-git-send-email-james.e.pickens@intel.com> (raw)
In-Reply-To: <3BA20DF9B35F384F8B7395B001EC3FB3265B2A01@azsmsx507.amr.corp.intel.com>

This speeds up operations like 'git clone' on NFS drives tremendously, but
slows down the same operations on local disks.

Partitioning the work and launching threads is done in unpack-trees.c.  The code
is mostly copied from preload_index.c.  The maximum number of threads is set to
8, which seemed to give a reasonable tradeoff between performance improvement on
NFS and degradation on local disks.

Some code was added to entry.c for serialization.  Most of the contents of
checkout_entry and write_entry are serialized, except writing the checked out
files to disk.
---
 entry.c        |   42 +++++++++++++++++---
 unpack-trees.c |  115 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 151 insertions(+), 6 deletions(-)

diff --git a/entry.c b/entry.c
index aa2ee46..764d2db 100644
--- a/entry.c
+++ b/entry.c
@@ -1,6 +1,21 @@
 #include "cache.h"
 #include "blob.h"
 
+#ifdef NO_PTHREADS
+
+#define checkout_lock()		(void)0
+#define checkout_unlock()	(void)0
+
+#else
+
+#include <pthread.h>
+
+static pthread_mutex_t checkout_mutex = PTHREAD_MUTEX_INITIALIZER;
+#define checkout_lock()		pthread_mutex_lock(&checkout_mutex)
+#define checkout_unlock()	pthread_mutex_unlock(&checkout_mutex)
+
+#endif
+
 static void create_directories(const char *path, const struct checkout *state)
 {
 	int len = strlen(path);
@@ -100,7 +115,7 @@ static void *read_blob_entry(struct cache_entry *ce, const char *path, unsigned
 
 static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile)
 {
-	int fd;
+	int fd, retval;
 	long wrote;
 
 	switch (ce->ce_mode & S_IFMT) {
@@ -109,10 +124,15 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
 		unsigned long size;
 
 	case S_IFREG:
+		checkout_lock();
 		new = read_blob_entry(ce, path, &size);
-		if (!new)
-			return error("git checkout-index: unable to read sha1 file of %s (%s)",
+
+		if (!new) {
+			retval = error("git checkout-index: unable to read sha1 file of %s (%s)",
 				path, sha1_to_hex(ce->sha1));
+			checkout_unlock();
+			return retval;
+		}
 
 		/*
 		 * Convert from git internal format to working tree format
@@ -124,6 +144,7 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
 			new = strbuf_detach(&buf, &newsize);
 			size = newsize;
 		}
+		checkout_unlock();
 
 		if (to_tempfile) {
 			strcpy(path, ".merge_file_XXXXXX");
@@ -143,10 +164,17 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
 			return error("git checkout-index: unable to write file %s", path);
 		break;
 	case S_IFLNK:
+		checkout_lock();
 		new = read_blob_entry(ce, path, &size);
-		if (!new)
-			return error("git checkout-index: unable to read sha1 file of %s (%s)",
+
+		if (!new) {
+			retval = error("git checkout-index: unable to read sha1 file of %s (%s)",
 				path, sha1_to_hex(ce->sha1));
+			checkout_unlock();
+			return retval;
+		}
+		checkout_unlock();
+
 		if (to_tempfile || !has_symlinks) {
 			if (to_tempfile) {
 				strcpy(path, ".merge_link_XXXXXX");
@@ -192,7 +220,7 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
 
 int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath)
 {
-	static char path[PATH_MAX + 1];
+	char path[PATH_MAX + 1];
 	struct stat st;
 	int len = state->base_dir_len;
 
@@ -229,6 +257,8 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *t
 			return error("unable to unlink old '%s' (%s)", path, strerror(errno));
 	} else if (state->not_new)
 		return 0;
+	checkout_lock();
 	create_directories(path, state);
+	checkout_unlock();
 	return write_entry(ce, path, state, 0);
 }
diff --git a/unpack-trees.c b/unpack-trees.c
index 54f301d..30b9862 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -8,6 +8,10 @@
 #include "progress.h"
 #include "refs.h"
 
+#ifndef NO_PTHREADS
+#include <pthread.h>
+#endif
+
 /*
  * Error messages expected by scripts out of plumbing commands such as
  * read-tree.  Non-scripted Porcelain is not required to use these messages
@@ -85,6 +89,115 @@ static void unlink_entry(struct cache_entry *ce)
 }
 
 static struct checkout state;
+
+#ifdef NO_PTHREADS
+#define progress_lock()		(void)0
+#define progress_unlock()	(void)0
+
+static int threaded_checkout(struct index_state *index, int update, struct progress *prog, unsigned *prog_cnt)
+{
+	return 0; /* do nothing */
+}
+
+#else
+
+#include <pthread.h>
+
+static pthread_mutex_t progress_mutex = PTHREAD_MUTEX_INITIALIZER;
+#define progress_lock()		pthread_mutex_lock(&progress_mutex)
+#define progress_unlock()	pthread_mutex_unlock(&progress_mutex)
+
+/*
+ * Mostly randomly chosen maximum thread counts: we
+ * cap the parallelism to 8 threads, and we want
+ * to have at least 500 files per thread for it to
+ * be worth starting a thread.
+ */
+#define MAX_PARALLEL (8)
+#define THREAD_COST (500)
+
+struct thread_data {
+	pthread_t pthread;
+	struct index_state *index;
+	struct checkout *state;
+	int update, offset, nr, errs;
+	struct progress *progress;
+	unsigned *progress_cnt;
+};
+
+static void *checkout_thread(void *_data)
+{
+	int nr;
+	struct thread_data *p = _data;
+	struct index_state *index = p->index;
+	struct cache_entry **cep = index->cache + p->offset;
+
+	p->errs = 0;
+
+	nr = p->nr;
+	if (0 == nr) {
+		return NULL;
+	}
+
+	if (nr + p->offset > index->cache_nr)
+		nr = index->cache_nr - p->offset;
+
+	do {
+		struct cache_entry *ce = *cep++;
+
+		if (ce->ce_flags & CE_UPDATE) {
+			progress_lock();
+			display_progress(p->progress, ++(*p->progress_cnt));
+			progress_unlock();
+			ce->ce_flags &= ~CE_UPDATE;
+			if (p->update) {
+				p->errs |= checkout_entry(ce, p->state, NULL);
+				fflush(stdout);
+			}
+		}
+	} while (--nr > 0);
+	return NULL;
+}
+
+static int threaded_checkout(struct index_state *index, int update, struct progress *prog, unsigned *prog_cnt)
+{
+	int threads, work, offset, i;
+	struct thread_data data[MAX_PARALLEL];
+	int errs = 0;
+
+	threads = index->cache_nr / THREAD_COST;
+	if (threads > MAX_PARALLEL)
+		threads = MAX_PARALLEL;
+	else if (threads == 0)
+		return 0;
+
+	offset = 0;
+	work = (index->cache_nr + threads - 1) / threads;
+	for (i = 0; i < threads; i++) {
+		struct thread_data *p = data+i;
+		p->index = index;
+		p->offset = offset;
+		p->nr = work;
+		p->state = &state;
+		p->update = update;
+		p->progress = prog;
+		p->progress_cnt = prog_cnt;
+		offset += work;
+		if (pthread_create(&p->pthread, NULL, checkout_thread, p))
+			die("unable to create threaded checkout");
+	}
+	for (i = 0; i < threads; i++) {
+		struct thread_data *p = data+i;
+		if (pthread_join(p->pthread, NULL))
+			die("unable to join threaded checkout");
+		errs |= p->errs;
+	}
+
+	return errs;
+}
+
+#endif
+
 static int check_updates(struct unpack_trees_options *o)
 {
 	unsigned cnt = 0, total = 0;
@@ -118,6 +231,8 @@ static int check_updates(struct unpack_trees_options *o)
 		}
 	}
 
+	errs |= threaded_checkout(index, o->update, progress, &cnt);
+
 	for (i = 0; i < index->cache_nr; i++) {
 		struct cache_entry *ce = index->cache[i];
 
-- 
1.6.0.4.1116.gc5d7

next prev parent reply	other threads:[~2008-12-18 20:58 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-12-18 20:51 [RFC PATCH 0/2] Add support for multi threaded checkout Pickens, James E
2008-12-18 20:56 ` James Pickens [this message]
2008-12-18 20:56   ` [PATCH 2/2] Add core.threadedcheckout config option James Pickens
2008-12-18 21:41   ` [PATCH 1/2] Add support for multi threaded checkout Linus Torvalds
2008-12-18 23:35     ` James Pickens
2008-12-19  0:13       ` Linus Torvalds
2008-12-18 21:02 ` [RFC PATCH 0/2] " Nicolas Pitre
2008-12-18 21:13   ` James Pickens
2008-12-18 21:16 ` Nicolas Morey-Chaisemartin
2008-12-18 21:42   ` James Pickens
2008-12-19  1:04   ` James Pickens

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:aa2ee46 dfblob:764d2db dfblob:54f301d dfblob:30b9862 )
 OR (
bs:"[PATCH 1/2] Add support for multi threaded checkout" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1229633811-3877-1-git-send-email-james.e.pickens@intel.com \
    --to=james.e.pickens@intel.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).