[PATCH bpf-next] selftests/bpf: add multi-uprobe benchmarks

BPF List
 help / color / mirror / Atom feed

* [PATCH bpf-next] selftests/bpf: add multi-uprobe benchmarks
@ 2024-08-06  4:29 Andrii Nakryiko
  2024-08-06  7:25 ` Jiri Olsa
  2024-08-23 17:10 ` patchwork-bot+netdevbpf
  0 siblings, 2 replies; 5+ messages in thread
From: Andrii Nakryiko @ 2024-08-06  4:29 UTC (permalink / raw)
  To: bpf, ast, daniel, martin.lau; +Cc: andrii, kernel-team

Add multi-uprobe and multi-uretprobe benchmarks to bench tool.
Multi- and classic uprobes/uretprobes have different low-level
triggering code paths, so it's sometimes important to be able to
benchmark both flavors of uprobes/uretprobes.

Sample examples from my dev machine below. Single-threaded peformance
almost doesn't differ, but with more parallel CPUs triggering the same
uprobe/uretprobe the difference grows. This might be due to [0], but
given the code is slightly different, there could be other sources of
slowdown.

Note, all these numbers will change due to ongoing work to improve
uprobe/uretprobe scalability (e.g., [1]), but having benchmark like this
is useful for measurements and debugging nevertheless.

uprobe-nop            ( 1 cpus):    1.020 ± 0.005M/s  (  1.020M/s/cpu)
uretprobe-nop         ( 1 cpus):    0.515 ± 0.009M/s  (  0.515M/s/cpu)
uprobe-multi-nop      ( 1 cpus):    1.036 ± 0.004M/s  (  1.036M/s/cpu)
uretprobe-multi-nop   ( 1 cpus):    0.512 ± 0.005M/s  (  0.512M/s/cpu)

uprobe-nop            ( 8 cpus):    3.481 ± 0.030M/s  (  0.435M/s/cpu)
uretprobe-nop         ( 8 cpus):    2.222 ± 0.008M/s  (  0.278M/s/cpu)
uprobe-multi-nop      ( 8 cpus):    3.769 ± 0.094M/s  (  0.471M/s/cpu)
uretprobe-multi-nop   ( 8 cpus):    2.482 ± 0.007M/s  (  0.310M/s/cpu)

uprobe-nop            (16 cpus):    2.968 ± 0.011M/s  (  0.185M/s/cpu)
uretprobe-nop         (16 cpus):    1.870 ± 0.002M/s  (  0.117M/s/cpu)
uprobe-multi-nop      (16 cpus):    3.541 ± 0.037M/s  (  0.221M/s/cpu)
uretprobe-multi-nop   (16 cpus):    2.123 ± 0.026M/s  (  0.133M/s/cpu)

uprobe-nop            (32 cpus):    2.524 ± 0.026M/s  (  0.079M/s/cpu)
uretprobe-nop         (32 cpus):    1.572 ± 0.003M/s  (  0.049M/s/cpu)
uprobe-multi-nop      (32 cpus):    2.717 ± 0.003M/s  (  0.085M/s/cpu)
uretprobe-multi-nop   (32 cpus):    1.687 ± 0.007M/s  (  0.053M/s/cpu)

  [0] https://lore.kernel.org/linux-trace-kernel/20240805202803.1813090-1-andrii@kernel.org/
  [1] https://lore.kernel.org/linux-trace-kernel/20240731214256.3588718-1-andrii@kernel.org/

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 tools/testing/selftests/bpf/bench.c           | 12 +++
 .../selftests/bpf/benchs/bench_trigger.c      | 81 +++++++++++++++----
 .../selftests/bpf/progs/trigger_bench.c       |  7 ++
 3 files changed, 85 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 90dc3aca32bd..1bd403a5ef7b 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -520,6 +520,12 @@ extern const struct bench bench_trig_uprobe_push;
 extern const struct bench bench_trig_uretprobe_push;
 extern const struct bench bench_trig_uprobe_ret;
 extern const struct bench bench_trig_uretprobe_ret;
+extern const struct bench bench_trig_uprobe_multi_nop;
+extern const struct bench bench_trig_uretprobe_multi_nop;
+extern const struct bench bench_trig_uprobe_multi_push;
+extern const struct bench bench_trig_uretprobe_multi_push;
+extern const struct bench bench_trig_uprobe_multi_ret;
+extern const struct bench bench_trig_uretprobe_multi_ret;
 
 extern const struct bench bench_rb_libbpf;
 extern const struct bench bench_rb_custom;
@@ -574,6 +580,12 @@ static const struct bench *benchs[] = {
 	&bench_trig_uretprobe_push,
 	&bench_trig_uprobe_ret,
 	&bench_trig_uretprobe_ret,
+	&bench_trig_uprobe_multi_nop,
+	&bench_trig_uretprobe_multi_nop,
+	&bench_trig_uprobe_multi_push,
+	&bench_trig_uretprobe_multi_push,
+	&bench_trig_uprobe_multi_ret,
+	&bench_trig_uretprobe_multi_ret,
 	/* ringbuf/perfbuf benchmarks */
 	&bench_rb_libbpf,
 	&bench_rb_custom,
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 4b05539f167d..a220545a3238 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -332,7 +332,7 @@ static void *uprobe_producer_ret(void *input)
 	return NULL;
 }
 
-static void usetup(bool use_retprobe, void *target_addr)
+static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
 {
 	size_t uprobe_offset;
 	struct bpf_link *link;
@@ -346,7 +346,10 @@ static void usetup(bool use_retprobe, void *target_addr)
 		exit(1);
 	}
 
-	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
+	if (use_multi)
+		bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe_multi, true);
+	else
+		bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
 
 	err = trigger_bench__load(ctx.skel);
 	if (err) {
@@ -355,16 +358,28 @@ static void usetup(bool use_retprobe, void *target_addr)
 	}
 
 	uprobe_offset = get_uprobe_offset(target_addr);
-	link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
-					  use_retprobe,
-					  -1 /* all PIDs */,
-					  "/proc/self/exe",
-					  uprobe_offset);
+	if (use_multi) {
+		LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
+			.retprobe = use_retprobe,
+			.cnt = 1,
+			.offsets = &uprobe_offset,
+		);
+		link = bpf_program__attach_uprobe_multi(
+			ctx.skel->progs.bench_trigger_uprobe_multi,
+			-1 /* all PIDs */, "/proc/self/exe", NULL, &opts);
+		ctx.skel->links.bench_trigger_uprobe_multi = link;
+	} else {
+		link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
+						  use_retprobe,
+						  -1 /* all PIDs */,
+						  "/proc/self/exe",
+						  uprobe_offset);
+		ctx.skel->links.bench_trigger_uprobe = link;
+	}
 	if (!link) {
-		fprintf(stderr, "failed to attach uprobe!\n");
+		fprintf(stderr, "failed to attach %s!\n", use_multi ? "multi-uprobe" : "uprobe");
 		exit(1);
 	}
-	ctx.skel->links.bench_trigger_uprobe = link;
 }
 
 static void usermode_count_setup(void)
@@ -374,32 +389,62 @@ static void usermode_count_setup(void)
 
 static void uprobe_nop_setup(void)
 {
-	usetup(false, &uprobe_target_nop);
+	usetup(false, false /* !use_multi */, &uprobe_target_nop);
 }
 
 static void uretprobe_nop_setup(void)
 {
-	usetup(true, &uprobe_target_nop);
+	usetup(true, false /* !use_multi */, &uprobe_target_nop);
 }
 
 static void uprobe_push_setup(void)
 {
-	usetup(false, &uprobe_target_push);
+	usetup(false, false /* !use_multi */, &uprobe_target_push);
 }
 
 static void uretprobe_push_setup(void)
 {
-	usetup(true, &uprobe_target_push);
+	usetup(true, false /* !use_multi */, &uprobe_target_push);
 }
 
 static void uprobe_ret_setup(void)
 {
-	usetup(false, &uprobe_target_ret);
+	usetup(false, false /* !use_multi */, &uprobe_target_ret);
 }
 
 static void uretprobe_ret_setup(void)
 {
-	usetup(true, &uprobe_target_ret);
+	usetup(true, false /* !use_multi */, &uprobe_target_ret);
+}
+
+static void uprobe_multi_nop_setup(void)
+{
+	usetup(false, true /* use_multi */, &uprobe_target_nop);
+}
+
+static void uretprobe_multi_nop_setup(void)
+{
+	usetup(true, true /* use_multi */, &uprobe_target_nop);
+}
+
+static void uprobe_multi_push_setup(void)
+{
+	usetup(false, true /* use_multi */, &uprobe_target_push);
+}
+
+static void uretprobe_multi_push_setup(void)
+{
+	usetup(true, true /* use_multi */, &uprobe_target_push);
+}
+
+static void uprobe_multi_ret_setup(void)
+{
+	usetup(false, true /* use_multi */, &uprobe_target_ret);
+}
+
+static void uretprobe_multi_ret_setup(void)
+{
+	usetup(true, true /* use_multi */, &uprobe_target_ret);
 }
 
 const struct bench bench_trig_syscall_count = {
@@ -454,3 +499,9 @@ BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret");
 BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop");
 BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push");
 BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret");
+BENCH_TRIG_USERMODE(uprobe_multi_nop, nop, "uprobe-multi-nop");
+BENCH_TRIG_USERMODE(uprobe_multi_push, push, "uprobe-multi-push");
+BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret");
+BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
+BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
+BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 2619ed193c65..044a6d78923e 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -32,6 +32,13 @@ int bench_trigger_uprobe(void *ctx)
 	return 0;
 }
 
+SEC("?uprobe.multi")
+int bench_trigger_uprobe_multi(void *ctx)
+{
+	inc_counter();
+	return 0;
+}
+
 const volatile int batch_iters = 0;
 
 SEC("?raw_tp")
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf-next] selftests/bpf: add multi-uprobe benchmarks
  2024-08-06  4:29 [PATCH bpf-next] selftests/bpf: add multi-uprobe benchmarks Andrii Nakryiko
@ 2024-08-06  7:25 ` Jiri Olsa
  2024-08-06 17:30   ` Andrii Nakryiko
  2024-08-23 17:10 ` patchwork-bot+netdevbpf
  1 sibling, 1 reply; 5+ messages in thread
From: Jiri Olsa @ 2024-08-06  7:25 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: bpf, ast, daniel, martin.lau, kernel-team

On Mon, Aug 05, 2024 at 09:29:35PM -0700, Andrii Nakryiko wrote:
> Add multi-uprobe and multi-uretprobe benchmarks to bench tool.
> Multi- and classic uprobes/uretprobes have different low-level
> triggering code paths, so it's sometimes important to be able to
> benchmark both flavors of uprobes/uretprobes.
> 
> Sample examples from my dev machine below. Single-threaded peformance
> almost doesn't differ, but with more parallel CPUs triggering the same
> uprobe/uretprobe the difference grows. This might be due to [0], but
> given the code is slightly different, there could be other sources of
> slowdown.
> 
> Note, all these numbers will change due to ongoing work to improve
> uprobe/uretprobe scalability (e.g., [1]), but having benchmark like this
> is useful for measurements and debugging nevertheless.
> 
> uprobe-nop            ( 1 cpus):    1.020 ± 0.005M/s  (  1.020M/s/cpu)
> uretprobe-nop         ( 1 cpus):    0.515 ± 0.009M/s  (  0.515M/s/cpu)
> uprobe-multi-nop      ( 1 cpus):    1.036 ± 0.004M/s  (  1.036M/s/cpu)
> uretprobe-multi-nop   ( 1 cpus):    0.512 ± 0.005M/s  (  0.512M/s/cpu)
> 
> uprobe-nop            ( 8 cpus):    3.481 ± 0.030M/s  (  0.435M/s/cpu)
> uretprobe-nop         ( 8 cpus):    2.222 ± 0.008M/s  (  0.278M/s/cpu)
> uprobe-multi-nop      ( 8 cpus):    3.769 ± 0.094M/s  (  0.471M/s/cpu)
> uretprobe-multi-nop   ( 8 cpus):    2.482 ± 0.007M/s  (  0.310M/s/cpu)
> 
> uprobe-nop            (16 cpus):    2.968 ± 0.011M/s  (  0.185M/s/cpu)
> uretprobe-nop         (16 cpus):    1.870 ± 0.002M/s  (  0.117M/s/cpu)
> uprobe-multi-nop      (16 cpus):    3.541 ± 0.037M/s  (  0.221M/s/cpu)
> uretprobe-multi-nop   (16 cpus):    2.123 ± 0.026M/s  (  0.133M/s/cpu)
> 
> uprobe-nop            (32 cpus):    2.524 ± 0.026M/s  (  0.079M/s/cpu)
> uretprobe-nop         (32 cpus):    1.572 ± 0.003M/s  (  0.049M/s/cpu)
> uprobe-multi-nop      (32 cpus):    2.717 ± 0.003M/s  (  0.085M/s/cpu)
> uretprobe-multi-nop   (32 cpus):    1.687 ± 0.007M/s  (  0.053M/s/cpu)

nice, do you have script for this output? 
we could add it to benchs/run_bench_uprobes.sh

lgtm

Acked-by: Jiri Olsa <jolsa@kernel.org>

jirka

> 
>   [0] https://lore.kernel.org/linux-trace-kernel/20240805202803.1813090-1-andrii@kernel.org/
>   [1] https://lore.kernel.org/linux-trace-kernel/20240731214256.3588718-1-andrii@kernel.org/
> 
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>  tools/testing/selftests/bpf/bench.c           | 12 +++
>  .../selftests/bpf/benchs/bench_trigger.c      | 81 +++++++++++++++----
>  .../selftests/bpf/progs/trigger_bench.c       |  7 ++
>  3 files changed, 85 insertions(+), 15 deletions(-)
> 
> diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
> index 90dc3aca32bd..1bd403a5ef7b 100644
> --- a/tools/testing/selftests/bpf/bench.c
> +++ b/tools/testing/selftests/bpf/bench.c
> @@ -520,6 +520,12 @@ extern const struct bench bench_trig_uprobe_push;
>  extern const struct bench bench_trig_uretprobe_push;
>  extern const struct bench bench_trig_uprobe_ret;
>  extern const struct bench bench_trig_uretprobe_ret;
> +extern const struct bench bench_trig_uprobe_multi_nop;
> +extern const struct bench bench_trig_uretprobe_multi_nop;
> +extern const struct bench bench_trig_uprobe_multi_push;
> +extern const struct bench bench_trig_uretprobe_multi_push;
> +extern const struct bench bench_trig_uprobe_multi_ret;
> +extern const struct bench bench_trig_uretprobe_multi_ret;

SNIP

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf-next] selftests/bpf: add multi-uprobe benchmarks
  2024-08-06  7:25 ` Jiri Olsa
@ 2024-08-06 17:30   ` Andrii Nakryiko
  2024-08-23 17:02     ` Alexei Starovoitov
  0 siblings, 1 reply; 5+ messages in thread
From: Andrii Nakryiko @ 2024-08-06 17:30 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: Andrii Nakryiko, bpf, ast, daniel, martin.lau, kernel-team

On Tue, Aug 6, 2024 at 12:25 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Mon, Aug 05, 2024 at 09:29:35PM -0700, Andrii Nakryiko wrote:
> > Add multi-uprobe and multi-uretprobe benchmarks to bench tool.
> > Multi- and classic uprobes/uretprobes have different low-level
> > triggering code paths, so it's sometimes important to be able to
> > benchmark both flavors of uprobes/uretprobes.
> >
> > Sample examples from my dev machine below. Single-threaded peformance
> > almost doesn't differ, but with more parallel CPUs triggering the same
> > uprobe/uretprobe the difference grows. This might be due to [0], but
> > given the code is slightly different, there could be other sources of
> > slowdown.
> >
> > Note, all these numbers will change due to ongoing work to improve
> > uprobe/uretprobe scalability (e.g., [1]), but having benchmark like this
> > is useful for measurements and debugging nevertheless.
> >
> > uprobe-nop            ( 1 cpus):    1.020 ± 0.005M/s  (  1.020M/s/cpu)
> > uretprobe-nop         ( 1 cpus):    0.515 ± 0.009M/s  (  0.515M/s/cpu)
> > uprobe-multi-nop      ( 1 cpus):    1.036 ± 0.004M/s  (  1.036M/s/cpu)
> > uretprobe-multi-nop   ( 1 cpus):    0.512 ± 0.005M/s  (  0.512M/s/cpu)
> >
> > uprobe-nop            ( 8 cpus):    3.481 ± 0.030M/s  (  0.435M/s/cpu)
> > uretprobe-nop         ( 8 cpus):    2.222 ± 0.008M/s  (  0.278M/s/cpu)
> > uprobe-multi-nop      ( 8 cpus):    3.769 ± 0.094M/s  (  0.471M/s/cpu)
> > uretprobe-multi-nop   ( 8 cpus):    2.482 ± 0.007M/s  (  0.310M/s/cpu)
> >
> > uprobe-nop            (16 cpus):    2.968 ± 0.011M/s  (  0.185M/s/cpu)
> > uretprobe-nop         (16 cpus):    1.870 ± 0.002M/s  (  0.117M/s/cpu)
> > uprobe-multi-nop      (16 cpus):    3.541 ± 0.037M/s  (  0.221M/s/cpu)
> > uretprobe-multi-nop   (16 cpus):    2.123 ± 0.026M/s  (  0.133M/s/cpu)
> >
> > uprobe-nop            (32 cpus):    2.524 ± 0.026M/s  (  0.079M/s/cpu)
> > uretprobe-nop         (32 cpus):    1.572 ± 0.003M/s  (  0.049M/s/cpu)
> > uprobe-multi-nop      (32 cpus):    2.717 ± 0.003M/s  (  0.085M/s/cpu)
> > uretprobe-multi-nop   (32 cpus):    1.687 ± 0.007M/s  (  0.053M/s/cpu)
>
> nice, do you have script for this output?
> we could add it to benchs/run_bench_uprobes.sh
>

I keep tuning those scripts to my own needs, so I'm not sure if it's
worth adding all of them to selftests. It's very similar to what we
already have, but see the exact script below:

#!/bin/bash

set -eufo pipefail

for p in 1 8 16 32; do
    for i in uprobe-nop uretprobe-nop uprobe-multi-nop uretprobe-multi-nop; do
        summary=$(sudo ./bench -w1 -d3 -p$p -a trig-$i | tail -n1)
        total=$(echo "$summary" | cut -d'(' -f1 | cut -d' ' -f3-)
        percpu=$(echo "$summary" | cut -d'(' -f2 | cut -d')' -f1 | cut
-d'/' -f1)
        printf "%-21s (%2d cpus): %s (%s/s/cpu)\n" $i $p "$total" "$percpu"
    done
    echo
done


> lgtm
>
> Acked-by: Jiri Olsa <jolsa@kernel.org>
>
> jirka
>
> >
> >   [0] https://lore.kernel.org/linux-trace-kernel/20240805202803.1813090-1-andrii@kernel.org/
> >   [1] https://lore.kernel.org/linux-trace-kernel/20240731214256.3588718-1-andrii@kernel.org/
> >
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > ---
> >  tools/testing/selftests/bpf/bench.c           | 12 +++
> >  .../selftests/bpf/benchs/bench_trigger.c      | 81 +++++++++++++++----
> >  .../selftests/bpf/progs/trigger_bench.c       |  7 ++
> >  3 files changed, 85 insertions(+), 15 deletions(-)
> >
> > diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
> > index 90dc3aca32bd..1bd403a5ef7b 100644
> > --- a/tools/testing/selftests/bpf/bench.c
> > +++ b/tools/testing/selftests/bpf/bench.c
> > @@ -520,6 +520,12 @@ extern const struct bench bench_trig_uprobe_push;
> >  extern const struct bench bench_trig_uretprobe_push;
> >  extern const struct bench bench_trig_uprobe_ret;
> >  extern const struct bench bench_trig_uretprobe_ret;
> > +extern const struct bench bench_trig_uprobe_multi_nop;
> > +extern const struct bench bench_trig_uretprobe_multi_nop;
> > +extern const struct bench bench_trig_uprobe_multi_push;
> > +extern const struct bench bench_trig_uretprobe_multi_push;
> > +extern const struct bench bench_trig_uprobe_multi_ret;
> > +extern const struct bench bench_trig_uretprobe_multi_ret;
>
> SNIP

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf-next] selftests/bpf: add multi-uprobe benchmarks
  2024-08-06 17:30   ` Andrii Nakryiko
@ 2024-08-23 17:02     ` Alexei Starovoitov
  0 siblings, 0 replies; 5+ messages in thread
From: Alexei Starovoitov @ 2024-08-23 17:02 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Jiri Olsa, Andrii Nakryiko, bpf, Alexei Starovoitov,
	Daniel Borkmann, Martin KaFai Lau, Kernel Team

On Tue, Aug 6, 2024 at 10:31 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Tue, Aug 6, 2024 at 12:25 AM Jiri Olsa <olsajiri@gmail.com> wrote:
> >
> > On Mon, Aug 05, 2024 at 09:29:35PM -0700, Andrii Nakryiko wrote:
> > > Add multi-uprobe and multi-uretprobe benchmarks to bench tool.
> > > Multi- and classic uprobes/uretprobes have different low-level
> > > triggering code paths, so it's sometimes important to be able to
> > > benchmark both flavors of uprobes/uretprobes.
> > >
> > > Sample examples from my dev machine below. Single-threaded peformance
> > > almost doesn't differ, but with more parallel CPUs triggering the same
> > > uprobe/uretprobe the difference grows. This might be due to [0], but
> > > given the code is slightly different, there could be other sources of
> > > slowdown.
> > >
> > > Note, all these numbers will change due to ongoing work to improve
> > > uprobe/uretprobe scalability (e.g., [1]), but having benchmark like this
> > > is useful for measurements and debugging nevertheless.
> > >
> > > uprobe-nop            ( 1 cpus):    1.020 ± 0.005M/s  (  1.020M/s/cpu)
> > > uretprobe-nop         ( 1 cpus):    0.515 ± 0.009M/s  (  0.515M/s/cpu)
> > > uprobe-multi-nop      ( 1 cpus):    1.036 ± 0.004M/s  (  1.036M/s/cpu)
> > > uretprobe-multi-nop   ( 1 cpus):    0.512 ± 0.005M/s  (  0.512M/s/cpu)
> > >
> > > uprobe-nop            ( 8 cpus):    3.481 ± 0.030M/s  (  0.435M/s/cpu)
> > > uretprobe-nop         ( 8 cpus):    2.222 ± 0.008M/s  (  0.278M/s/cpu)
> > > uprobe-multi-nop      ( 8 cpus):    3.769 ± 0.094M/s  (  0.471M/s/cpu)
> > > uretprobe-multi-nop   ( 8 cpus):    2.482 ± 0.007M/s  (  0.310M/s/cpu)
> > >
> > > uprobe-nop            (16 cpus):    2.968 ± 0.011M/s  (  0.185M/s/cpu)
> > > uretprobe-nop         (16 cpus):    1.870 ± 0.002M/s  (  0.117M/s/cpu)
> > > uprobe-multi-nop      (16 cpus):    3.541 ± 0.037M/s  (  0.221M/s/cpu)
> > > uretprobe-multi-nop   (16 cpus):    2.123 ± 0.026M/s  (  0.133M/s/cpu)
> > >
> > > uprobe-nop            (32 cpus):    2.524 ± 0.026M/s  (  0.079M/s/cpu)
> > > uretprobe-nop         (32 cpus):    1.572 ± 0.003M/s  (  0.049M/s/cpu)
> > > uprobe-multi-nop      (32 cpus):    2.717 ± 0.003M/s  (  0.085M/s/cpu)
> > > uretprobe-multi-nop   (32 cpus):    1.687 ± 0.007M/s  (  0.053M/s/cpu)
> >
> > nice, do you have script for this output?
> > we could add it to benchs/run_bench_uprobes.sh
> >
>
> I keep tuning those scripts to my own needs, so I'm not sure if it's
> worth adding all of them to selftests. It's very similar to what we
> already have, but see the exact script below:
>
> #!/bin/bash
>
> set -eufo pipefail
>
> for p in 1 8 16 32; do
>     for i in uprobe-nop uretprobe-nop uprobe-multi-nop uretprobe-multi-nop; do
>         summary=$(sudo ./bench -w1 -d3 -p$p -a trig-$i | tail -n1)
>         total=$(echo "$summary" | cut -d'(' -f1 | cut -d' ' -f3-)
>         percpu=$(echo "$summary" | cut -d'(' -f2 | cut -d')' -f1 | cut
> -d'/' -f1)
>         printf "%-21s (%2d cpus): %s (%s/s/cpu)\n" $i $p "$total" "$percpu"
>     done
>     echo
> done

Added this script to commit log while applying.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf-next] selftests/bpf: add multi-uprobe benchmarks
  2024-08-06  4:29 [PATCH bpf-next] selftests/bpf: add multi-uprobe benchmarks Andrii Nakryiko
  2024-08-06  7:25 ` Jiri Olsa
@ 2024-08-23 17:10 ` patchwork-bot+netdevbpf
  1 sibling, 0 replies; 5+ messages in thread
From: patchwork-bot+netdevbpf @ 2024-08-23 17:10 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: bpf, ast, daniel, martin.lau, kernel-team

Hello:

This patch was applied to bpf/bpf-next.git (master)
by Alexei Starovoitov <ast@kernel.org>:

On Mon,  5 Aug 2024 21:29:35 -0700 you wrote:
> Add multi-uprobe and multi-uretprobe benchmarks to bench tool.
> Multi- and classic uprobes/uretprobes have different low-level
> triggering code paths, so it's sometimes important to be able to
> benchmark both flavors of uprobes/uretprobes.
> 
> Sample examples from my dev machine below. Single-threaded peformance
> almost doesn't differ, but with more parallel CPUs triggering the same
> uprobe/uretprobe the difference grows. This might be due to [0], but
> given the code is slightly different, there could be other sources of
> slowdown.
> 
> [...]

Here is the summary with links:
  - [bpf-next] selftests/bpf: add multi-uprobe benchmarks
    https://git.kernel.org/bpf/bpf-next/c/f727b13dbea1

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-08-23 17:10 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-06  4:29 [PATCH bpf-next] selftests/bpf: add multi-uprobe benchmarks Andrii Nakryiko
2024-08-06  7:25 ` Jiri Olsa
2024-08-06 17:30   ` Andrii Nakryiko
2024-08-23 17:02     ` Alexei Starovoitov
2024-08-23 17:10 ` patchwork-bot+netdevbpf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox