All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] perf bench futex: Support parallel waker threads
@ 2015-05-08 18:37 Davidlohr Bueso
  2015-05-08 18:38 ` [PATCH 2/2] perf bench futex: Handle spurious wakeups Davidlohr Bueso
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Davidlohr Bueso @ 2015-05-08 18:37 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ingo Molnar; +Cc: linux-kernel, dave, Davidlohr Bueso

The futex-wake benchmark only measures wakeups done within
a single process. While this has value in its own, it does
not really generate any hb->lock contention. A new benchmark
'wake-parallel' is added, by extending the futex-wake code
such that we can measure parallel waker threads. The program
output shows the avg per-thread latency in order to complete
its share of wakeups:

Run summary [PID 13474]: blocking on 512 threads (at [private] futex 0xa88668), 8 threads waking up 64 at a time.

[Run 1]: Avg per-thread latency (waking 64/512 threads) in 0.6230 ms (+-15.31%)
[Run 2]: Avg per-thread latency (waking 64/512 threads) in 0.5175 ms (+-29.95%)
[Run 3]: Avg per-thread latency (waking 64/512 threads) in 0.7578 ms (+-18.03%)
[Run 4]: Avg per-thread latency (waking 64/512 threads) in 0.8944 ms (+-12.54%)
[Run 5]: Avg per-thread latency (waking 64/512 threads) in 1.1204 ms (+-23.85%)
Avg per-thread latency (waking 64/512 threads) in 0.7826 ms (+-9.91%)

Naturally, different combinations of numbers of blocking and waker
threads will exhibit different information.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 tools/perf/Documentation/perf-bench.txt |   3 +
 tools/perf/bench/Build                  |   1 +
 tools/perf/bench/bench.h                |   2 +
 tools/perf/bench/futex-wake-parallel.c  | 294 ++++++++++++++++++++++++++++++++
 tools/perf/builtin-bench.c              |   1 +
 5 files changed, 301 insertions(+)
 create mode 100644 tools/perf/bench/futex-wake-parallel.c

diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index f6480cb..bf3d064 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -210,6 +210,9 @@ Suite for evaluating hash tables.
 *wake*::
 Suite for evaluating wake calls.
 
+*wake-parallel*::
+Suite for evaluating parallel wake calls.
+
 *requeue*::
 Suite for evaluating requeue calls.
 
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 5ce9802..c3ab760 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -3,6 +3,7 @@ perf-y += sched-pipe.o
 perf-y += mem-memcpy.o
 perf-y += futex-hash.o
 perf-y += futex-wake.o
+perf-y += futex-wake-parallel.o
 perf-y += futex-requeue.o
 
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 3c4dd44..70b2f71 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -33,6 +33,8 @@ extern int bench_mem_memcpy(int argc, const char **argv,
 extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
 extern int bench_futex_hash(int argc, const char **argv, const char *prefix);
 extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
+extern int bench_futex_wake_parallel(int argc, const char **argv,
+				     const char *prefix);
 extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
 
 #define BENCH_FORMAT_DEFAULT_STR	"default"
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
new file mode 100644
index 0000000..6d8c9fa
--- /dev/null
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2015 Davidlohr Bueso.
+ *
+ * Block a bunch of threads and let parallel waker threads wakeup an
+ * equal amount of them. The program output reflects the avg latency
+ * for each individual thread to service its share of work. Ultimately
+ * it can be used to measure futex_wake() changes.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/stat.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "futex.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+struct thread_data {
+	pthread_t worker;
+	unsigned int nwoken;
+	struct timeval runtime;
+};
+
+static unsigned int nwakes = 1;
+
+/* all threads will block on the same futex -- hash bucket chaos ;) */
+static u_int32_t futex = 0;
+
+static pthread_t *blocked_worker;
+static bool done = false, silent = false, fshared = false;
+static unsigned int nblocked_threads = 0, nwaking_threads = 0;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static struct stats waketime_stats, wakeup_stats;
+static unsigned int ncpus, threads_starting;
+static int futex_flag = 0;
+
+static const struct option options[] = {
+	OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"),
+	OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"),
+	OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
+	OPT_END()
+};
+
+static const char * const bench_futex_wake_parallel_usage[] = {
+	"perf bench futex wake-parallel <options>",
+	NULL
+};
+
+static void *waking_workerfn(void *arg)
+{
+	struct thread_data *waker = (struct thread_data *) arg;
+	struct timeval start, end;
+
+	gettimeofday(&start, NULL);
+
+	waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
+	if (waker->nwoken != nwakes)
+		warnx("couldn't wakeup all tasks (%d/%d)",
+		      waker->nwoken, nwakes);
+
+	gettimeofday(&end, NULL);
+	timersub(&end, &start, &waker->runtime);
+
+	pthread_exit(NULL);
+	return NULL;
+}
+
+static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
+{
+	unsigned int i;
+
+	pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
+
+	/* create and block all threads */
+	for (i = 0; i < nwaking_threads; i++) {
+		/*
+		 * Thread creation order will impact per-thread latency
+		 * as it will affect the order to acquire the hb spinlock.
+		 * For now let the scheduler decide.
+		 */
+		if (pthread_create(&td[i].worker, &thread_attr,
+				   waking_workerfn, (void *)&td[i]))
+			err(EXIT_FAILURE, "pthread_create");
+	}
+
+	for (i = 0; i < nwaking_threads; i++)
+		if (pthread_join(td[i].worker, NULL))
+			err(EXIT_FAILURE, "pthread_join");
+}
+
+static void *blocked_workerfn(void *arg __maybe_unused)
+{
+	pthread_mutex_lock(&thread_lock);
+	threads_starting--;
+	if (!threads_starting)
+		pthread_cond_signal(&thread_parent);
+	pthread_cond_wait(&thread_worker, &thread_lock);
+	pthread_mutex_unlock(&thread_lock);
+
+	while (1) { /* handle spurious wakeups */
+		if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR)
+			break;
+	}
+
+	pthread_exit(NULL);
+	return NULL;
+}
+
+static void block_threads(pthread_t *w, pthread_attr_t thread_attr)
+{
+	cpu_set_t cpu;
+	unsigned int i;
+
+	threads_starting = nblocked_threads;
+
+	/* create and block all threads */
+	for (i = 0; i < nblocked_threads; i++) {
+		CPU_ZERO(&cpu);
+		CPU_SET(i % ncpus, &cpu);
+
+		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+		if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
+			err(EXIT_FAILURE, "pthread_create");
+	}
+}
+
+static void print_run(struct thread_data *waking_worker, unsigned int run_num)
+{
+	unsigned int i, wakeup_avg;
+	double waketime_avg, waketime_stddev;
+	struct stats __waketime_stats, __wakeup_stats;
+
+	init_stats(&__wakeup_stats);
+	init_stats(&__waketime_stats);
+
+	for (i = 0; i < nwaking_threads; i++) {
+		update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec);
+		update_stats(&__wakeup_stats, waking_worker[i].nwoken);
+	}
+
+	waketime_avg = avg_stats(&__waketime_stats);
+	waketime_stddev = stddev_stats(&__waketime_stats);
+	wakeup_avg = avg_stats(&__wakeup_stats);
+
+	printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
+	       "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,
+	       nblocked_threads, waketime_avg/1e3,
+	       rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+static void print_summary(void)
+{
+	unsigned int wakeup_avg;
+	double waketime_avg, waketime_stddev;
+
+	waketime_avg = avg_stats(&waketime_stats);
+	waketime_stddev = stddev_stats(&waketime_stats);
+	wakeup_avg = avg_stats(&wakeup_stats);
+
+	printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
+	       wakeup_avg,
+	       nblocked_threads,
+	       waketime_avg/1e3,
+	       rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+
+static void do_run_stats(struct thread_data *waking_worker)
+{
+	unsigned int i;
+
+	for (i = 0; i < nwaking_threads; i++) {
+		update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec);
+		update_stats(&wakeup_stats, waking_worker[i].nwoken);
+	}
+
+}
+
+static void toggle_done(int sig __maybe_unused,
+			siginfo_t *info __maybe_unused,
+			void *uc __maybe_unused)
+{
+	done = true;
+}
+
+int bench_futex_wake_parallel(int argc, const char **argv,
+			      const char *prefix __maybe_unused)
+{
+	int ret = 0;
+	unsigned int i, j;
+	struct sigaction act;
+	pthread_attr_t thread_attr;
+	struct thread_data *waking_worker;
+
+	argc = parse_options(argc, argv, options,
+			     bench_futex_wake_parallel_usage, 0);
+	if (argc) {
+		usage_with_options(bench_futex_wake_parallel_usage, options);
+		exit(EXIT_FAILURE);
+	}
+
+	sigfillset(&act.sa_mask);
+	act.sa_sigaction = toggle_done;
+	sigaction(SIGINT, &act, NULL);
+
+	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	if (!nblocked_threads)
+		nblocked_threads = ncpus;
+
+	/* some sanity checks */
+	if (nwaking_threads > nblocked_threads || !nwaking_threads)
+		nwaking_threads = nblocked_threads;
+
+	if (nblocked_threads % nwaking_threads)
+		errx(EXIT_FAILURE, "Must be perfectly divisible");
+	/*
+	 * Each thread will wakeup nwakes tasks in
+	 * a single futex_wait call.
+	 */
+	nwakes = nblocked_threads/nwaking_threads;
+
+	blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker));
+	if (!blocked_worker)
+		err(EXIT_FAILURE, "calloc");
+
+	if (!fshared)
+		futex_flag = FUTEX_PRIVATE_FLAG;
+
+	printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
+	       "futex %p), %d threads waking up %d at a time.\n\n",
+	       getpid(), nblocked_threads, fshared ? "shared":"private",
+	       &futex, nwaking_threads, nwakes);
+
+	init_stats(&wakeup_stats);
+	init_stats(&waketime_stats);
+
+	pthread_attr_init(&thread_attr);
+	pthread_mutex_init(&thread_lock, NULL);
+	pthread_cond_init(&thread_parent, NULL);
+	pthread_cond_init(&thread_worker, NULL);
+
+	for (j = 0; j < bench_repeat && !done; j++) {
+		waking_worker = calloc(nwaking_threads, sizeof(*waking_worker));
+		if (!waking_worker)
+			err(EXIT_FAILURE, "calloc");
+
+		/* create, launch & block all threads */
+		block_threads(blocked_worker, thread_attr);
+
+		/* make sure all threads are already blocked */
+		pthread_mutex_lock(&thread_lock);
+		while (threads_starting)
+			pthread_cond_wait(&thread_parent, &thread_lock);
+		pthread_cond_broadcast(&thread_worker);
+		pthread_mutex_unlock(&thread_lock);
+
+		usleep(100000);
+
+		/* Ok, all threads are patiently blocked, start waking folks up */
+		wakeup_threads(waking_worker, thread_attr);
+
+		for (i = 0; i < nblocked_threads; i++) {
+			ret = pthread_join(blocked_worker[i], NULL);
+			if (ret)
+				err(EXIT_FAILURE, "pthread_join");
+		}
+
+		do_run_stats(waking_worker);
+		if (!silent)
+			print_run(waking_worker, j);
+
+		free(waking_worker);
+	}
+
+	/* cleanup & report results */
+	pthread_cond_destroy(&thread_parent);
+	pthread_cond_destroy(&thread_worker);
+	pthread_mutex_destroy(&thread_lock);
+	pthread_attr_destroy(&thread_attr);
+
+	print_summary();
+
+	free(blocked_worker);
+	return ret;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index b9a56fa..b5314e4 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -58,6 +58,7 @@ static struct bench mem_benchmarks[] = {
 static struct bench futex_benchmarks[] = {
 	{ "hash",	"Benchmark for futex hash table",               bench_futex_hash	},
 	{ "wake",	"Benchmark for futex wake calls",               bench_futex_wake	},
+	{ "wake-parallel", "Benchmark for parallel futex wake calls",   bench_futex_wake_parallel },
 	{ "requeue",	"Benchmark for futex requeue calls",            bench_futex_requeue	},
 	{ "all",	"Test all futex benchmarks",			NULL			},
 	{ NULL,		NULL,						NULL			}
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] perf bench futex: Handle spurious wakeups
  2015-05-08 18:37 [PATCH 1/2] perf bench futex: Support parallel waker threads Davidlohr Bueso
@ 2015-05-08 18:38 ` Davidlohr Bueso
  2015-05-10  7:11   ` [tip:perf/core] " tip-bot for Davidlohr Bueso
  2015-05-08 19:24 ` [PATCH 1/2] perf bench futex: Support parallel waker threads Arnaldo Carvalho de Melo
  2015-05-10  7:10 ` [tip:perf/core] " tip-bot for Davidlohr Bueso
  2 siblings, 1 reply; 5+ messages in thread
From: Davidlohr Bueso @ 2015-05-08 18:38 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ingo Molnar; +Cc: linux-kernel, dave, Davidlohr Bueso

Wrap futex_wait around a loop and catch for EINTR.
Either a spurious wakeup occurred or a signal interrupted
is, either way we need to block again.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 tools/perf/bench/futex-wake.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 929f762..e5e41d3 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -60,7 +60,12 @@ static void *workerfn(void *arg __maybe_unused)
 	pthread_cond_wait(&thread_worker, &thread_lock);
 	pthread_mutex_unlock(&thread_lock);
 
-	futex_wait(&futex1, 0, NULL, futex_flag);
+	while (1) {
+		if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR)
+			break;
+	}
+
+	pthread_exit(NULL);
 	return NULL;
 }
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] perf bench futex: Support parallel waker threads
  2015-05-08 18:37 [PATCH 1/2] perf bench futex: Support parallel waker threads Davidlohr Bueso
  2015-05-08 18:38 ` [PATCH 2/2] perf bench futex: Handle spurious wakeups Davidlohr Bueso
@ 2015-05-08 19:24 ` Arnaldo Carvalho de Melo
  2015-05-10  7:10 ` [tip:perf/core] " tip-bot for Davidlohr Bueso
  2 siblings, 0 replies; 5+ messages in thread
From: Arnaldo Carvalho de Melo @ 2015-05-08 19:24 UTC (permalink / raw)
  To: Davidlohr Bueso; +Cc: Ingo Molnar, linux-kernel, Davidlohr Bueso

Em Fri, May 08, 2015 at 11:37:59AM -0700, Davidlohr Bueso escreveu:
> The futex-wake benchmark only measures wakeups done within
> a single process. While this has value in its own, it does
> not really generate any hb->lock contention. A new benchmark
> 'wake-parallel' is added, by extending the futex-wake code
> such that we can measure parallel waker threads. The program
> output shows the avg per-thread latency in order to complete
> its share of wakeups:
> 
> Run summary [PID 13474]: blocking on 512 threads (at [private] futex 0xa88668), 8 threads waking up 64 at a time.

Thanks, applied both to perf/core.

- Arnaldo

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [tip:perf/core] perf bench futex: Support parallel waker threads
  2015-05-08 18:37 [PATCH 1/2] perf bench futex: Support parallel waker threads Davidlohr Bueso
  2015-05-08 18:38 ` [PATCH 2/2] perf bench futex: Handle spurious wakeups Davidlohr Bueso
  2015-05-08 19:24 ` [PATCH 1/2] perf bench futex: Support parallel waker threads Arnaldo Carvalho de Melo
@ 2015-05-10  7:10 ` tip-bot for Davidlohr Bueso
  2 siblings, 0 replies; 5+ messages in thread
From: tip-bot for Davidlohr Bueso @ 2015-05-10  7:10 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: mingo, dave, tglx, acme, dbueso, hpa, linux-kernel

Commit-ID:  d65817b4e707068c2dd3e002e87c2a0294aabc2c
Gitweb:     http://git.kernel.org/tip/d65817b4e707068c2dd3e002e87c2a0294aabc2c
Author:     Davidlohr Bueso <dave@stgolabs.net>
AuthorDate: Fri, 8 May 2015 11:37:59 -0700
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Fri, 8 May 2015 16:23:50 -0300

perf bench futex: Support parallel waker threads

The futex-wake benchmark only measures wakeups done within a single
process. While this has value in its own, it does not really generate
any hb->lock contention.

A new benchmark 'wake-parallel' is added, by extending the futex-wake
code such that we can measure parallel waker threads. The program output
shows the avg per-thread latency in order to complete its share of
wakeups:

Run summary [PID 13474]: blocking on 512 threads (at [private] futex 0xa88668), 8 threads waking up 64 at a time.

[Run 1]: Avg per-thread latency (waking 64/512 threads) in 0.6230 ms (+-15.31%)
[Run 2]: Avg per-thread latency (waking 64/512 threads) in 0.5175 ms (+-29.95%)
[Run 3]: Avg per-thread latency (waking 64/512 threads) in 0.7578 ms (+-18.03%)
[Run 4]: Avg per-thread latency (waking 64/512 threads) in 0.8944 ms (+-12.54%)
[Run 5]: Avg per-thread latency (waking 64/512 threads) in 1.1204 ms (+-23.85%)
Avg per-thread latency (waking 64/512 threads) in 0.7826 ms (+-9.91%)

Naturally, different combinations of numbers of blocking and waker
threads will exhibit different information.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Davidlohr Bueso <dbueso@suse.de>
Link: http://lkml.kernel.org/r/1431110280-20231-1-git-send-email-dave@stgolabs.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-bench.txt |   3 +
 tools/perf/bench/Build                  |   1 +
 tools/perf/bench/bench.h                |   2 +
 tools/perf/bench/futex-wake-parallel.c  | 294 ++++++++++++++++++++++++++++++++
 tools/perf/builtin-bench.c              |   1 +
 5 files changed, 301 insertions(+)

diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index f6480cb..bf3d064 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -210,6 +210,9 @@ Suite for evaluating hash tables.
 *wake*::
 Suite for evaluating wake calls.
 
+*wake-parallel*::
+Suite for evaluating parallel wake calls.
+
 *requeue*::
 Suite for evaluating requeue calls.
 
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 5ce9802..c3ab760 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -3,6 +3,7 @@ perf-y += sched-pipe.o
 perf-y += mem-memcpy.o
 perf-y += futex-hash.o
 perf-y += futex-wake.o
+perf-y += futex-wake-parallel.o
 perf-y += futex-requeue.o
 
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 3c4dd44..70b2f71 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -33,6 +33,8 @@ extern int bench_mem_memcpy(int argc, const char **argv,
 extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
 extern int bench_futex_hash(int argc, const char **argv, const char *prefix);
 extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
+extern int bench_futex_wake_parallel(int argc, const char **argv,
+				     const char *prefix);
 extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
 
 #define BENCH_FORMAT_DEFAULT_STR	"default"
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
new file mode 100644
index 0000000..6d8c9fa
--- /dev/null
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2015 Davidlohr Bueso.
+ *
+ * Block a bunch of threads and let parallel waker threads wakeup an
+ * equal amount of them. The program output reflects the avg latency
+ * for each individual thread to service its share of work. Ultimately
+ * it can be used to measure futex_wake() changes.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/stat.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "futex.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+struct thread_data {
+	pthread_t worker;
+	unsigned int nwoken;
+	struct timeval runtime;
+};
+
+static unsigned int nwakes = 1;
+
+/* all threads will block on the same futex -- hash bucket chaos ;) */
+static u_int32_t futex = 0;
+
+static pthread_t *blocked_worker;
+static bool done = false, silent = false, fshared = false;
+static unsigned int nblocked_threads = 0, nwaking_threads = 0;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static struct stats waketime_stats, wakeup_stats;
+static unsigned int ncpus, threads_starting;
+static int futex_flag = 0;
+
+static const struct option options[] = {
+	OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"),
+	OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"),
+	OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
+	OPT_END()
+};
+
+static const char * const bench_futex_wake_parallel_usage[] = {
+	"perf bench futex wake-parallel <options>",
+	NULL
+};
+
+static void *waking_workerfn(void *arg)
+{
+	struct thread_data *waker = (struct thread_data *) arg;
+	struct timeval start, end;
+
+	gettimeofday(&start, NULL);
+
+	waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
+	if (waker->nwoken != nwakes)
+		warnx("couldn't wakeup all tasks (%d/%d)",
+		      waker->nwoken, nwakes);
+
+	gettimeofday(&end, NULL);
+	timersub(&end, &start, &waker->runtime);
+
+	pthread_exit(NULL);
+	return NULL;
+}
+
+static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
+{
+	unsigned int i;
+
+	pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
+
+	/* create and block all threads */
+	for (i = 0; i < nwaking_threads; i++) {
+		/*
+		 * Thread creation order will impact per-thread latency
+		 * as it will affect the order to acquire the hb spinlock.
+		 * For now let the scheduler decide.
+		 */
+		if (pthread_create(&td[i].worker, &thread_attr,
+				   waking_workerfn, (void *)&td[i]))
+			err(EXIT_FAILURE, "pthread_create");
+	}
+
+	for (i = 0; i < nwaking_threads; i++)
+		if (pthread_join(td[i].worker, NULL))
+			err(EXIT_FAILURE, "pthread_join");
+}
+
+static void *blocked_workerfn(void *arg __maybe_unused)
+{
+	pthread_mutex_lock(&thread_lock);
+	threads_starting--;
+	if (!threads_starting)
+		pthread_cond_signal(&thread_parent);
+	pthread_cond_wait(&thread_worker, &thread_lock);
+	pthread_mutex_unlock(&thread_lock);
+
+	while (1) { /* handle spurious wakeups */
+		if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR)
+			break;
+	}
+
+	pthread_exit(NULL);
+	return NULL;
+}
+
+static void block_threads(pthread_t *w, pthread_attr_t thread_attr)
+{
+	cpu_set_t cpu;
+	unsigned int i;
+
+	threads_starting = nblocked_threads;
+
+	/* create and block all threads */
+	for (i = 0; i < nblocked_threads; i++) {
+		CPU_ZERO(&cpu);
+		CPU_SET(i % ncpus, &cpu);
+
+		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+		if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
+			err(EXIT_FAILURE, "pthread_create");
+	}
+}
+
+static void print_run(struct thread_data *waking_worker, unsigned int run_num)
+{
+	unsigned int i, wakeup_avg;
+	double waketime_avg, waketime_stddev;
+	struct stats __waketime_stats, __wakeup_stats;
+
+	init_stats(&__wakeup_stats);
+	init_stats(&__waketime_stats);
+
+	for (i = 0; i < nwaking_threads; i++) {
+		update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec);
+		update_stats(&__wakeup_stats, waking_worker[i].nwoken);
+	}
+
+	waketime_avg = avg_stats(&__waketime_stats);
+	waketime_stddev = stddev_stats(&__waketime_stats);
+	wakeup_avg = avg_stats(&__wakeup_stats);
+
+	printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
+	       "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,
+	       nblocked_threads, waketime_avg/1e3,
+	       rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+static void print_summary(void)
+{
+	unsigned int wakeup_avg;
+	double waketime_avg, waketime_stddev;
+
+	waketime_avg = avg_stats(&waketime_stats);
+	waketime_stddev = stddev_stats(&waketime_stats);
+	wakeup_avg = avg_stats(&wakeup_stats);
+
+	printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
+	       wakeup_avg,
+	       nblocked_threads,
+	       waketime_avg/1e3,
+	       rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+
+static void do_run_stats(struct thread_data *waking_worker)
+{
+	unsigned int i;
+
+	for (i = 0; i < nwaking_threads; i++) {
+		update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec);
+		update_stats(&wakeup_stats, waking_worker[i].nwoken);
+	}
+
+}
+
+static void toggle_done(int sig __maybe_unused,
+			siginfo_t *info __maybe_unused,
+			void *uc __maybe_unused)
+{
+	done = true;
+}
+
+int bench_futex_wake_parallel(int argc, const char **argv,
+			      const char *prefix __maybe_unused)
+{
+	int ret = 0;
+	unsigned int i, j;
+	struct sigaction act;
+	pthread_attr_t thread_attr;
+	struct thread_data *waking_worker;
+
+	argc = parse_options(argc, argv, options,
+			     bench_futex_wake_parallel_usage, 0);
+	if (argc) {
+		usage_with_options(bench_futex_wake_parallel_usage, options);
+		exit(EXIT_FAILURE);
+	}
+
+	sigfillset(&act.sa_mask);
+	act.sa_sigaction = toggle_done;
+	sigaction(SIGINT, &act, NULL);
+
+	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	if (!nblocked_threads)
+		nblocked_threads = ncpus;
+
+	/* some sanity checks */
+	if (nwaking_threads > nblocked_threads || !nwaking_threads)
+		nwaking_threads = nblocked_threads;
+
+	if (nblocked_threads % nwaking_threads)
+		errx(EXIT_FAILURE, "Must be perfectly divisible");
+	/*
+	 * Each thread will wakeup nwakes tasks in
+	 * a single futex_wait call.
+	 */
+	nwakes = nblocked_threads/nwaking_threads;
+
+	blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker));
+	if (!blocked_worker)
+		err(EXIT_FAILURE, "calloc");
+
+	if (!fshared)
+		futex_flag = FUTEX_PRIVATE_FLAG;
+
+	printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
+	       "futex %p), %d threads waking up %d at a time.\n\n",
+	       getpid(), nblocked_threads, fshared ? "shared":"private",
+	       &futex, nwaking_threads, nwakes);
+
+	init_stats(&wakeup_stats);
+	init_stats(&waketime_stats);
+
+	pthread_attr_init(&thread_attr);
+	pthread_mutex_init(&thread_lock, NULL);
+	pthread_cond_init(&thread_parent, NULL);
+	pthread_cond_init(&thread_worker, NULL);
+
+	for (j = 0; j < bench_repeat && !done; j++) {
+		waking_worker = calloc(nwaking_threads, sizeof(*waking_worker));
+		if (!waking_worker)
+			err(EXIT_FAILURE, "calloc");
+
+		/* create, launch & block all threads */
+		block_threads(blocked_worker, thread_attr);
+
+		/* make sure all threads are already blocked */
+		pthread_mutex_lock(&thread_lock);
+		while (threads_starting)
+			pthread_cond_wait(&thread_parent, &thread_lock);
+		pthread_cond_broadcast(&thread_worker);
+		pthread_mutex_unlock(&thread_lock);
+
+		usleep(100000);
+
+		/* Ok, all threads are patiently blocked, start waking folks up */
+		wakeup_threads(waking_worker, thread_attr);
+
+		for (i = 0; i < nblocked_threads; i++) {
+			ret = pthread_join(blocked_worker[i], NULL);
+			if (ret)
+				err(EXIT_FAILURE, "pthread_join");
+		}
+
+		do_run_stats(waking_worker);
+		if (!silent)
+			print_run(waking_worker, j);
+
+		free(waking_worker);
+	}
+
+	/* cleanup & report results */
+	pthread_cond_destroy(&thread_parent);
+	pthread_cond_destroy(&thread_worker);
+	pthread_mutex_destroy(&thread_lock);
+	pthread_attr_destroy(&thread_attr);
+
+	print_summary();
+
+	free(blocked_worker);
+	return ret;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index b9a56fa..b5314e4 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -58,6 +58,7 @@ static struct bench mem_benchmarks[] = {
 static struct bench futex_benchmarks[] = {
 	{ "hash",	"Benchmark for futex hash table",               bench_futex_hash	},
 	{ "wake",	"Benchmark for futex wake calls",               bench_futex_wake	},
+	{ "wake-parallel", "Benchmark for parallel futex wake calls",   bench_futex_wake_parallel },
 	{ "requeue",	"Benchmark for futex requeue calls",            bench_futex_requeue	},
 	{ "all",	"Test all futex benchmarks",			NULL			},
 	{ NULL,		NULL,						NULL			}

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [tip:perf/core] perf bench futex: Handle spurious wakeups
  2015-05-08 18:38 ` [PATCH 2/2] perf bench futex: Handle spurious wakeups Davidlohr Bueso
@ 2015-05-10  7:11   ` tip-bot for Davidlohr Bueso
  0 siblings, 0 replies; 5+ messages in thread
From: tip-bot for Davidlohr Bueso @ 2015-05-10  7:11 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: acme, dave, hpa, mingo, dbueso, tglx, linux-kernel

Commit-ID:  598adc5c9c1cfd3f154f6d9df72b38eda63e306e
Gitweb:     http://git.kernel.org/tip/598adc5c9c1cfd3f154f6d9df72b38eda63e306e
Author:     Davidlohr Bueso <dave@stgolabs.net>
AuthorDate: Fri, 8 May 2015 11:38:00 -0700
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Fri, 8 May 2015 16:24:02 -0300

perf bench futex: Handle spurious wakeups

Wrap futex_wait around a loop and catch for EINTR.

Either a spurious wakeup occurred or a signal interrupted is, either way
we need to block again.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Davidlohr Bueso <dbueso@suse.de>
Link: http://lkml.kernel.org/r/1431110280-20231-2-git-send-email-dave@stgolabs.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/bench/futex-wake.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 929f762..e5e41d3 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -60,7 +60,12 @@ static void *workerfn(void *arg __maybe_unused)
 	pthread_cond_wait(&thread_worker, &thread_lock);
 	pthread_mutex_unlock(&thread_lock);
 
-	futex_wait(&futex1, 0, NULL, futex_flag);
+	while (1) {
+		if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR)
+			break;
+	}
+
+	pthread_exit(NULL);
 	return NULL;
 }
 

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-05-10  7:11 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-05-08 18:37 [PATCH 1/2] perf bench futex: Support parallel waker threads Davidlohr Bueso
2015-05-08 18:38 ` [PATCH 2/2] perf bench futex: Handle spurious wakeups Davidlohr Bueso
2015-05-10  7:11   ` [tip:perf/core] " tip-bot for Davidlohr Bueso
2015-05-08 19:24 ` [PATCH 1/2] perf bench futex: Support parallel waker threads Arnaldo Carvalho de Melo
2015-05-10  7:10 ` [tip:perf/core] " tip-bot for Davidlohr Bueso

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.