From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ian Rogers <irogers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@redhat.com>,
Mark Rutland <mark.rutland@arm.com>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
Thomas Gleixner <tglx@linutronix.de>,
Andi Kleen <ak@linux.intel.com>,
linux-kernel@vger.kernel.org,
Stephane Eranian <eranian@google.com>
Subject: Re: [PATCH] perf bench: Add benchmark of find_next_bit
Date: Tue, 28 Jul 2020 08:51:52 -0300 [thread overview]
Message-ID: <20200728115152.GB3328@kernel.org> (raw)
In-Reply-To: <20200724071959.3110510-1-irogers@google.com>
Em Fri, Jul 24, 2020 at 12:19:59AM -0700, Ian Rogers escreveu:
> for_each_set_bit, or similar functions like for_each_cpu, may be hot
> within the kernel. If many bits were set then one could imagine on
> Intel a "bt" instruction with every bit may be faster than the function
> call and word length find_next_bit logic. Add a benchmark to measure
> this.
Thanks, applied.
- Arnaldo
> This benchmark on AMD rome and Intel skylakex shows "bt" is not a good
> option except for very small bitmaps.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> tools/perf/bench/Build | 1 +
> tools/perf/bench/bench.h | 1 +
> tools/perf/bench/find-bit-bench.c | 135 ++++++++++++++++++++++++++++++
> tools/perf/builtin-bench.c | 1 +
> 4 files changed, 138 insertions(+)
> create mode 100644 tools/perf/bench/find-bit-bench.c
>
> diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
> index 768e408757a0..fb114bca3a8d 100644
> --- a/tools/perf/bench/Build
> +++ b/tools/perf/bench/Build
> @@ -10,6 +10,7 @@ perf-y += epoll-wait.o
> perf-y += epoll-ctl.o
> perf-y += synthesize.o
> perf-y += kallsyms-parse.o
> +perf-y += find-bit-bench.o
>
> perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
> perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
> diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
> index 61cae4966cae..3291b0ddddfe 100644
> --- a/tools/perf/bench/bench.h
> +++ b/tools/perf/bench/bench.h
> @@ -35,6 +35,7 @@ int bench_sched_messaging(int argc, const char **argv);
> int bench_sched_pipe(int argc, const char **argv);
> int bench_mem_memcpy(int argc, const char **argv);
> int bench_mem_memset(int argc, const char **argv);
> +int bench_mem_find_bit(int argc, const char **argv);
> int bench_futex_hash(int argc, const char **argv);
> int bench_futex_wake(int argc, const char **argv);
> int bench_futex_wake_parallel(int argc, const char **argv);
> diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c
> new file mode 100644
> index 000000000000..1aadbd9d7e86
> --- /dev/null
> +++ b/tools/perf/bench/find-bit-bench.c
> @@ -0,0 +1,135 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Benchmark find_next_bit and related bit operations.
> + *
> + * Copyright 2020 Google LLC.
> + */
> +#include <stdlib.h>
> +#include "bench.h"
> +#include "../util/stat.h"
> +#include <linux/bitmap.h>
> +#include <linux/bitops.h>
> +#include <linux/time64.h>
> +#include <subcmd/parse-options.h>
> +
> +static unsigned int outer_iterations = 5;
> +static unsigned int inner_iterations = 100000;
> +
> +static const struct option options[] = {
> + OPT_UINTEGER('i', "outer-iterations", &outer_iterations,
> + "Number of outerer iterations used"),
> + OPT_UINTEGER('j', "inner-iterations", &inner_iterations,
> + "Number of outerer iterations used"),
> + OPT_END()
> +};
> +
> +static const char *const bench_usage[] = {
> + "perf bench mem find_bit <options>",
> + NULL
> +};
> +
> +static unsigned int accumulator;
> +static unsigned int use_of_val;
> +
> +static noinline void workload(int val)
> +{
> + use_of_val += val;
> + accumulator++;
> +}
> +
> +#if defined(__i386__) || defined(__x86_64__)
> +static bool asm_test_bit(long nr, const unsigned long *addr)
> +{
> + bool oldbit;
> +
> + asm volatile("bt %2,%1"
> + : "=@ccc" (oldbit)
> + : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
> +
> + return oldbit;
> +}
> +#else
> +#define asm_test_bit test_bit
> +#endif
> +
> +static int do_for_each_set_bit(unsigned int num_bits)
> +{
> + unsigned long *to_test = bitmap_alloc(num_bits);
> + struct timeval start, end, diff;
> + u64 runtime_us;
> + struct stats fb_time_stats, tb_time_stats;
> + double time_average, time_stddev;
> + unsigned int bit, i, j;
> + unsigned int set_bits, skip;
> + unsigned int old;
> +
> + init_stats(&fb_time_stats);
> + init_stats(&tb_time_stats);
> +
> + for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) {
> + bitmap_zero(to_test, num_bits);
> + skip = num_bits / set_bits;
> + for (i = 0; i < num_bits; i += skip)
> + set_bit(i, to_test);
> +
> + for (i = 0; i < outer_iterations; i++) {
> + old = accumulator;
> + gettimeofday(&start, NULL);
> + for (j = 0; j < inner_iterations; j++) {
> + for_each_set_bit(bit, to_test, num_bits)
> + workload(bit);
> + }
> + gettimeofday(&end, NULL);
> + assert(old + (inner_iterations * set_bits) == accumulator);
> + timersub(&end, &start, &diff);
> + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
> + update_stats(&fb_time_stats, runtime_us);
> +
> + old = accumulator;
> + gettimeofday(&start, NULL);
> + for (j = 0; j < inner_iterations; j++) {
> + for (bit = 0; bit < num_bits; bit++) {
> + if (asm_test_bit(bit, to_test))
> + workload(bit);
> + }
> + }
> + gettimeofday(&end, NULL);
> + assert(old + (inner_iterations * set_bits) == accumulator);
> + timersub(&end, &start, &diff);
> + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
> + update_stats(&tb_time_stats, runtime_us);
> + }
> +
> + printf("%d operations %d bits set of %d bits\n",
> + inner_iterations, set_bits, num_bits);
> + time_average = avg_stats(&fb_time_stats);
> + time_stddev = stddev_stats(&fb_time_stats);
> + printf(" Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n",
> + time_average, time_stddev);
> + time_average = avg_stats(&tb_time_stats);
> + time_stddev = stddev_stats(&tb_time_stats);
> + printf(" Average test_bit loop took: %.3f usec (+- %.3f usec)\n",
> + time_average, time_stddev);
> +
> + if (use_of_val == accumulator) /* Try to avoid compiler tricks. */
> + printf("\n");
> + }
> + bitmap_free(to_test);
> + return 0;
> +}
> +
> +int bench_mem_find_bit(int argc, const char **argv)
> +{
> + int err = 0, i;
> +
> + argc = parse_options(argc, argv, options, bench_usage, 0);
> + if (argc) {
> + usage_with_options(bench_usage, options);
> + exit(EXIT_FAILURE);
> + }
> +
> + for (i = 1; i <= 2048; i <<= 1)
> + do_for_each_set_bit(i);
> +
> + return err;
> +}
> diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
> index cad31b1d3438..690eee1120a7 100644
> --- a/tools/perf/builtin-bench.c
> +++ b/tools/perf/builtin-bench.c
> @@ -52,6 +52,7 @@ static struct bench sched_benchmarks[] = {
> static struct bench mem_benchmarks[] = {
> { "memcpy", "Benchmark for memcpy() functions", bench_mem_memcpy },
> { "memset", "Benchmark for memset() functions", bench_mem_memset },
> + { "find_bit", "Benchmark for find_bit() functions", bench_mem_find_bit },
> { "all", "Run all memory access benchmarks", NULL },
> { NULL, NULL, NULL }
> };
> --
> 2.28.0.rc0.142.g3c755180ce-goog
>
--
- Arnaldo
next prev parent reply other threads:[~2020-07-28 11:52 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-24 7:19 [PATCH] perf bench: Add benchmark of find_next_bit Ian Rogers
2020-07-24 14:45 ` Andi Kleen
2020-07-24 18:13 ` Ian Rogers
2020-07-28 11:51 ` Arnaldo Carvalho de Melo [this message]
2020-07-29 19:59 ` Arnaldo Carvalho de Melo
2020-07-29 20:44 ` Arnaldo Carvalho de Melo
2020-07-29 22:03 ` Ian Rogers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200728115152.GB3328@kernel.org \
--to=acme@kernel.org \
--cc=ak@linux.intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=eranian@google.com \
--cc=irogers@google.com \
--cc=jolsa@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.