All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] index-pack: avoid false sharing on multicore systems
@ 2012-04-27 16:52 Nguyễn Thái Ngọc Duy
  0 siblings, 0 replies; only message in thread
From: Nguyễn Thái Ngọc Duy @ 2012-04-27 16:52 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

thread_data[] is frequently accessed by all threads and it's
packed tight. If two threads running on two different processors,
updating one "struct thread_local" may invalidate the cache line that
also contains struct thread_local of the other thread, leading to
cache misses (aka false sharing [1])

Pad it properly. Make sure each "struct thread_local" is on a separate
cache line.

This only works with real processors though. Multithread on single
processor may be harmed by this change. Keep the tight layout in that
case.

[1] http://software.intel.com/en-us/articles/avoiding-and-identifying-false-sharing-among-threads/

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 My laptop has one physical core with hyperthread, so this patch does
 no good for me. I won't have access to a multicore system until the
 middle of next week, so I'm not sure if it actually helps at the
 moment.

 builtin/index-pack.c |   37 +++++++++++++++++++++++++++++++++----
 1 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index c1c3c81..598b18e 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -93,7 +93,8 @@ static int input_fd, output_fd, pack_fd;
 
 #ifndef NO_PTHREADS
 
-static struct thread_local *thread_data;
+static struct thread_local **thread_data;
+static char *thread_data_buffer;
 
 static pthread_mutex_t read_mutex;
 #define read_lock()		lock_mutex(&read_mutex)
@@ -126,11 +127,38 @@ static inline void unlock_mutex(pthread_mutex_t *mutex)
  */
 static void init_thread(void)
 {
+	int i, stripe;
+
 	init_recursive_mutex(&read_mutex);
 	pthread_mutex_init(&counter_mutex, NULL);
 	pthread_mutex_init(&work_mutex, NULL);
 	pthread_key_create(&key, NULL);
 	thread_data = xcalloc(nr_threads, sizeof(*thread_data));
+
+
+	if (online_cpus() && !getenv("GIT_ONE_CPU_CORE")) {
+		/*
+		 * A typical cache line is 64 byte long. Avoid false
+		 * sharing by putting thread data of each thread on a
+		 * separate cache line
+		 */
+		stripe = 64;
+		assert(sizeof(struct thread_local) <= stripe);
+		thread_data_buffer = xmalloc((nr_threads + 1) * stripe);
+#define ALIGN(x, stripe) (((x) + (stripe) - 1) & ~((stripe) - 1))
+		thread_data[0] = (struct thread_local *)ALIGN((unsigned int)thread_data_buffer, stripe);
+	} else {
+		/*
+		 * On the other hand, on single physical cpu, padding
+		 * wastes cache for nothing. Don't do it.
+		 */
+		stripe = sizeof(struct thread_local);
+		thread_data_buffer = xmalloc(nr_threads * stripe);
+		thread_data[0] = (struct thread_local *)thread_data_buffer;
+	}
+	memset(thread_data[0], 0, stripe * nr_threads);
+	for (i = 1; i < nr_threads; i++)
+		thread_data[i] = (struct thread_local*)((char*)(thread_data[0]) + i * stripe);
 	threads_active = 1;
 }
 
@@ -143,6 +171,7 @@ static void cleanup_thread(void)
 	pthread_mutex_destroy(&counter_mutex);
 	pthread_mutex_destroy(&work_mutex);
 	pthread_key_delete(key);
+	free(thread_data_buffer);
 	free(thread_data);
 }
 
@@ -890,13 +919,13 @@ static void parse_pack_objects(unsigned char *sha1)
 	if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) {
 		init_thread();
 		for (i = 0; i < nr_threads; i++) {
-			int ret = pthread_create(&thread_data[i].thread, NULL,
-						 threaded_second_pass, thread_data + i);
+			int ret = pthread_create(&thread_data[i]->thread, NULL,
+						 threaded_second_pass, thread_data[i]);
 			if (ret)
 				die("unable to create thread: %s", strerror(ret));
 		}
 		for (i = 0; i < nr_threads; i++)
-			pthread_join(thread_data[i].thread, NULL);
+			pthread_join(thread_data[i]->thread, NULL);
 
 		cleanup_thread();
 		return;
-- 
1.7.8.36.g69ee2

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2012-04-27 16:56 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-04-27 16:52 [PATCH] index-pack: avoid false sharing on multicore systems Nguyễn Thái Ngọc Duy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.