* [PATCH bpf-next] selftests/bpf: extend uprobe/uretprobe triggering benchmarks
@ 2024-03-01 21:45 Andrii Nakryiko
2024-03-04 13:50 ` patchwork-bot+netdevbpf
2024-03-04 13:52 ` Daniel Borkmann
0 siblings, 2 replies; 3+ messages in thread
From: Andrii Nakryiko @ 2024-03-01 21:45 UTC (permalink / raw)
To: bpf, ast, daniel, martin.lau; +Cc: andrii, kernel-team
Settle on three "flavors" of uprobe/uretprobe, installed on different
kinds of instruction: nop, push, and ret. All three are testing
different internal code paths emulating or single-stepping instructions,
so are interesting to compare and benchmark separately.
To ensure `push rbp` instruction we ensure that uprobe_target_push() is
not a leaf function by calling (global __weak) noop function and
returning something afterwards (if we don't do that, compiler will just
do a tail call optimization).
Also, we need to make sure that compiler isn't skipping frame pointer
generation, so let's add `-fno-omit-frame-pointers` to Makefile.
Just to give an idea of where we currently stand in terms of relative
performance of different uprobe/uretprobe cases vs a cheap syscall
(getpgid()) baseline, here are results from my local machine:
$ benchs/run_bench_uprobes.sh
base : 1.561 ± 0.020M/s
uprobe-nop : 0.947 ± 0.007M/s
uprobe-push : 0.951 ± 0.004M/s
uprobe-ret : 0.443 ± 0.007M/s
uretprobe-nop : 0.471 ± 0.013M/s
uretprobe-push : 0.483 ± 0.004M/s
uretprobe-ret : 0.306 ± 0.007M/s
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
tools/testing/selftests/bpf/Makefile | 2 +-
tools/testing/selftests/bpf/bench.c | 20 +--
.../selftests/bpf/benchs/bench_trigger.c | 118 ++++++++++++------
.../selftests/bpf/benchs/run_bench_uprobes.sh | 9 ++
4 files changed, 103 insertions(+), 46 deletions(-)
create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index a38a3001527c..eccba9f3a77d 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -34,7 +34,7 @@ LIBELF_CFLAGS := $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null)
LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
CFLAGS += -g $(OPT_FLAGS) -rdynamic \
- -Wall -Werror \
+ -Wall -Werror -fno-omit-frame-pointer \
$(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
-I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 73ce11b0547d..c632811f9171 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -499,10 +499,12 @@ extern const struct bench bench_trig_fentry;
extern const struct bench bench_trig_fentry_sleep;
extern const struct bench bench_trig_fmodret;
extern const struct bench bench_trig_uprobe_base;
-extern const struct bench bench_trig_uprobe_with_nop;
-extern const struct bench bench_trig_uretprobe_with_nop;
-extern const struct bench bench_trig_uprobe_without_nop;
-extern const struct bench bench_trig_uretprobe_without_nop;
+extern const struct bench bench_trig_uprobe_nop;
+extern const struct bench bench_trig_uretprobe_nop;
+extern const struct bench bench_trig_uprobe_push;
+extern const struct bench bench_trig_uretprobe_push;
+extern const struct bench bench_trig_uprobe_ret;
+extern const struct bench bench_trig_uretprobe_ret;
extern const struct bench bench_rb_libbpf;
extern const struct bench bench_rb_custom;
extern const struct bench bench_pb_libbpf;
@@ -541,10 +543,12 @@ static const struct bench *benchs[] = {
&bench_trig_fentry_sleep,
&bench_trig_fmodret,
&bench_trig_uprobe_base,
- &bench_trig_uprobe_with_nop,
- &bench_trig_uretprobe_with_nop,
- &bench_trig_uprobe_without_nop,
- &bench_trig_uretprobe_without_nop,
+ &bench_trig_uprobe_nop,
+ &bench_trig_uretprobe_nop,
+ &bench_trig_uprobe_push,
+ &bench_trig_uretprobe_push,
+ &bench_trig_uprobe_ret,
+ &bench_trig_uretprobe_ret,
&bench_rb_libbpf,
&bench_rb_custom,
&bench_pb_libbpf,
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index dbd362771d6a..a8ed8afff0b3 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -113,12 +113,25 @@ static void trigger_fmodret_setup(void)
* GCC doesn't generate stack setup preample for these functions due to them
* having no input arguments and doing nothing in the body.
*/
-__weak void uprobe_target_with_nop(void)
+__weak void uprobe_target_nop(void)
{
asm volatile ("nop");
}
-__weak void uprobe_target_without_nop(void)
+__weak void opaque_noop_func()
+{
+}
+
+__weak int uprobe_target_push(void)
+{
+ /* overhead of function call is negligible compared to uprobe
+ * triggering, so this shouldn't affect benchmark results much
+ */
+ opaque_noop_func();
+ return 1;
+}
+
+__weak void uprobe_target_ret(void)
{
asm volatile ("");
}
@@ -126,27 +139,34 @@ __weak void uprobe_target_without_nop(void)
static void *uprobe_base_producer(void *input)
{
while (true) {
- uprobe_target_with_nop();
+ uprobe_target_nop();
atomic_inc(&base_hits.value);
}
return NULL;
}
-static void *uprobe_producer_with_nop(void *input)
+static void *uprobe_producer_nop(void *input)
+{
+ while (true)
+ uprobe_target_nop();
+ return NULL;
+}
+
+static void *uprobe_producer_push(void *input)
{
while (true)
- uprobe_target_with_nop();
+ uprobe_target_push();
return NULL;
}
-static void *uprobe_producer_without_nop(void *input)
+static void *uprobe_producer_ret(void *input)
{
while (true)
- uprobe_target_without_nop();
+ uprobe_target_ret();
return NULL;
}
-static void usetup(bool use_retprobe, bool use_nop)
+static void usetup(bool use_retprobe, void *target_addr)
{
size_t uprobe_offset;
struct bpf_link *link;
@@ -159,11 +179,7 @@ static void usetup(bool use_retprobe, bool use_nop)
exit(1);
}
- if (use_nop)
- uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop);
- else
- uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop);
-
+ uprobe_offset = get_uprobe_offset(target_addr);
link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
use_retprobe,
-1 /* all PIDs */,
@@ -176,24 +192,34 @@ static void usetup(bool use_retprobe, bool use_nop)
ctx.skel->links.bench_trigger_uprobe = link;
}
-static void uprobe_setup_with_nop(void)
+static void uprobe_setup_nop(void)
+{
+ usetup(false, &uprobe_target_nop);
+}
+
+static void uretprobe_setup_nop(void)
+{
+ usetup(true, &uprobe_target_nop);
+}
+
+static void uprobe_setup_push(void)
{
- usetup(false, true);
+ usetup(false, &uprobe_target_push);
}
-static void uretprobe_setup_with_nop(void)
+static void uretprobe_setup_push(void)
{
- usetup(true, true);
+ usetup(true, &uprobe_target_push);
}
-static void uprobe_setup_without_nop(void)
+static void uprobe_setup_ret(void)
{
- usetup(false, false);
+ usetup(false, &uprobe_target_ret);
}
-static void uretprobe_setup_without_nop(void)
+static void uretprobe_setup_ret(void)
{
- usetup(true, false);
+ usetup(true, &uprobe_target_ret);
}
const struct bench bench_trig_base = {
@@ -274,37 +300,55 @@ const struct bench bench_trig_uprobe_base = {
.report_final = hits_drops_report_final,
};
-const struct bench bench_trig_uprobe_with_nop = {
- .name = "trig-uprobe-with-nop",
- .setup = uprobe_setup_with_nop,
- .producer_thread = uprobe_producer_with_nop,
+const struct bench bench_trig_uprobe_nop = {
+ .name = "trig-uprobe-nop",
+ .setup = uprobe_setup_nop,
+ .producer_thread = uprobe_producer_nop,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uretprobe_nop = {
+ .name = "trig-uretprobe-nop",
+ .setup = uretprobe_setup_nop,
+ .producer_thread = uprobe_producer_nop,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uprobe_push = {
+ .name = "trig-uprobe-push",
+ .setup = uprobe_setup_push,
+ .producer_thread = uprobe_producer_push,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
-const struct bench bench_trig_uretprobe_with_nop = {
- .name = "trig-uretprobe-with-nop",
- .setup = uretprobe_setup_with_nop,
- .producer_thread = uprobe_producer_with_nop,
+const struct bench bench_trig_uretprobe_push = {
+ .name = "trig-uretprobe-push",
+ .setup = uretprobe_setup_push,
+ .producer_thread = uprobe_producer_push,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
-const struct bench bench_trig_uprobe_without_nop = {
- .name = "trig-uprobe-without-nop",
- .setup = uprobe_setup_without_nop,
- .producer_thread = uprobe_producer_without_nop,
+const struct bench bench_trig_uprobe_ret = {
+ .name = "trig-uprobe-ret",
+ .setup = uprobe_setup_ret,
+ .producer_thread = uprobe_producer_ret,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
-const struct bench bench_trig_uretprobe_without_nop = {
- .name = "trig-uretprobe-without-nop",
- .setup = uretprobe_setup_without_nop,
- .producer_thread = uprobe_producer_without_nop,
+const struct bench bench_trig_uretprobe_ret = {
+ .name = "trig-uretprobe-ret",
+ .setup = uretprobe_setup_ret,
+ .producer_thread = uprobe_producer_ret,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
new file mode 100755
index 000000000000..9bdcc74e03a4
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+for i in base {uprobe,uretprobe}-{nop,push,ret}
+do
+ summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+ printf "%-15s: %s\n" $i "$summary"
+done
--
2.43.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH bpf-next] selftests/bpf: extend uprobe/uretprobe triggering benchmarks
2024-03-01 21:45 [PATCH bpf-next] selftests/bpf: extend uprobe/uretprobe triggering benchmarks Andrii Nakryiko
@ 2024-03-04 13:50 ` patchwork-bot+netdevbpf
2024-03-04 13:52 ` Daniel Borkmann
1 sibling, 0 replies; 3+ messages in thread
From: patchwork-bot+netdevbpf @ 2024-03-04 13:50 UTC (permalink / raw)
To: Andrii Nakryiko; +Cc: bpf, ast, daniel, martin.lau, kernel-team
Hello:
This patch was applied to bpf/bpf-next.git (master)
by Daniel Borkmann <daniel@iogearbox.net>:
On Fri, 1 Mar 2024 13:45:51 -0800 you wrote:
> Settle on three "flavors" of uprobe/uretprobe, installed on different
> kinds of instruction: nop, push, and ret. All three are testing
> different internal code paths emulating or single-stepping instructions,
> so are interesting to compare and benchmark separately.
>
> To ensure `push rbp` instruction we ensure that uprobe_target_push() is
> not a leaf function by calling (global __weak) noop function and
> returning something afterwards (if we don't do that, compiler will just
> do a tail call optimization).
>
> [...]
Here is the summary with links:
- [bpf-next] selftests/bpf: extend uprobe/uretprobe triggering benchmarks
https://git.kernel.org/bpf/bpf-next/c/8f79870ec8a9
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH bpf-next] selftests/bpf: extend uprobe/uretprobe triggering benchmarks
2024-03-01 21:45 [PATCH bpf-next] selftests/bpf: extend uprobe/uretprobe triggering benchmarks Andrii Nakryiko
2024-03-04 13:50 ` patchwork-bot+netdevbpf
@ 2024-03-04 13:52 ` Daniel Borkmann
1 sibling, 0 replies; 3+ messages in thread
From: Daniel Borkmann @ 2024-03-04 13:52 UTC (permalink / raw)
To: Andrii Nakryiko, bpf, ast, martin.lau; +Cc: kernel-team
On 3/1/24 10:45 PM, Andrii Nakryiko wrote:
[...]
> diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
> index dbd362771d6a..a8ed8afff0b3 100644
> --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
> +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
> @@ -113,12 +113,25 @@ static void trigger_fmodret_setup(void)
> * GCC doesn't generate stack setup preample for these functions due to them
> * having no input arguments and doing nothing in the body.
> */
> -__weak void uprobe_target_with_nop(void)
> +__weak void uprobe_target_nop(void)
> {
> asm volatile ("nop");
> }
>
> -__weak void uprobe_target_without_nop(void)
> +__weak void opaque_noop_func()
> +{
> +}
Nice work and super useful to have! I fixed up the void while applying.
Thanks,
Daniel
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-03-04 13:52 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-01 21:45 [PATCH bpf-next] selftests/bpf: extend uprobe/uretprobe triggering benchmarks Andrii Nakryiko
2024-03-04 13:50 ` patchwork-bot+netdevbpf
2024-03-04 13:52 ` Daniel Borkmann
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox