Netdev List
 help / color / mirror / Atom feed
From: Jesper Dangaard Brouer <hawk@kernel.org>
To: matt@readmodwrite.com, bpf@vger.kernel.org,
	Alexei Starovoitov <ast@kernel.org>
Cc: Jesper Dangaard Brouer <hawk@kernel.org>,
	mfleming@cloudflare.com, Daniel Borkmann <borkmann@iogearbox.net>,
	netdev@vger.kernel.org, kernel-team@cloudflare.com
Subject: [PATCH RFC bpf-next] selftests/bpf: Extend bench for LPM trie with noop and baseline
Date: Wed, 13 Aug 2025 17:29:35 +0200	[thread overview]
Message-ID: <175509897596.2755384.18413775753563966331.stgit@firesoul> (raw)
In-Reply-To: <20250722150152.1158205-1-matt@readmodwrite.com>

This patch is extending[0] with some adjustments[1].
 [0] https://lore.kernel.org/all/20250722150152.1158205-1-matt@readmodwrite.com/
 [1] https://github.com/xdp-project/xdp-project/blob/main/areas/bench/patches/bench-lpm-trie-V3-adjusted.patch

The 'noop' bench measures the overhead of the harness.
Meaning the bpf_prog_test_run that calls bpf_loop with 10000
(NR_LOOPS) iterations in the lpm_producer loop.

CPU: AMD EPYC 9684X
sudo ./bench lpm-trie-noop  --nr_entries=1 --producers=1 --affinity
Setting up benchmark 'lpm-trie-noop'...
Benchmark 'lpm-trie-noop' started.
Iter   0 ( 42.501us): hits   74.567M/s ( 74.567M/prod)
Iter   1 ( -5.155us): hits   74.630M/s ( 74.630M/prod)
Iter   2 (  0.123us): hits   74.620M/s ( 74.620M/prod)
Iter   3 ( -7.127us): hits   74.611M/s ( 74.611M/prod)
Iter   4 (  7.334us): hits   74.609M/s ( 74.609M/prod)
Iter   5 (  0.163us): hits   74.620M/s ( 74.620M/prod)
Iter   6 (  0.213us): hits   74.610M/s ( 74.610M/prod)
Summary: throughput   74.617 ± 0.008 M ops/s ( 74.617M ops/prod), latency   13.402 ns/op

The baseline measures overhead of getting a random number
and modulo, which can be used as a baseline comparsion
against lpm-trie-lookup and lpm-trie-update.

sudo ./bench lpm-trie-baseline  --nr_entries=1 --producers=1 --affinity
Setting up benchmark 'lpm-trie-baseline'...
Benchmark 'lpm-trie-baseline' started.
Iter   0 ( 44.996us): hits   36.308M/s ( 36.308M/prod)
Iter   1 ( -1.535us): hits   36.330M/s ( 36.330M/prod)
Iter   2 ( -3.919us): hits   36.310M/s ( 36.310M/prod)
Iter   3 ( -1.004us): hits   36.330M/s ( 36.330M/prod)
Iter   4 ( -1.476us): hits   36.320M/s ( 36.320M/prod)
Iter   5 (  0.468us): hits   36.330M/s ( 36.330M/prod)
Iter   6 ( -0.304us): hits   36.330M/s ( 36.330M/prod)
Summary: throughput   36.325 ± 0.008 M ops/s ( 36.325M ops/prod), latency   27.529 ns/op

Thus, the overhead of bpf_get_prandom_u32() is 14.1 nanosec.

Signed-off-by: Jesper Dangaard Brouer <hawk@kernel.org>
---
 tools/testing/selftests/bpf/bench.c                |    4 ++
 .../selftests/bpf/benchs/bench_lpm_trie_map.c      |   40 +++++++++++++++++++-
 tools/testing/selftests/bpf/progs/lpm_trie_bench.c |   31 ++++++++++++++--
 3 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index fd15f60fd5a8..8a41aec89479 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -560,6 +560,8 @@ extern const struct bench bench_htab_mem;
 extern const struct bench bench_crypto_encrypt;
 extern const struct bench bench_crypto_decrypt;
 extern const struct bench bench_sockmap;
+extern const struct bench bench_lpm_trie_noop;
+extern const struct bench bench_lpm_trie_baseline;
 extern const struct bench bench_lpm_trie_lookup;
 extern const struct bench bench_lpm_trie_update;
 extern const struct bench bench_lpm_trie_delete;
@@ -631,6 +633,8 @@ static const struct bench *benchs[] = {
 	&bench_crypto_encrypt,
 	&bench_crypto_decrypt,
 	&bench_sockmap,
+	&bench_lpm_trie_noop,
+	&bench_lpm_trie_baseline,
 	&bench_lpm_trie_lookup,
 	&bench_lpm_trie_update,
 	&bench_lpm_trie_delete,
diff --git a/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
index 32a46c2402ea..4e0f12e359ba 100644
--- a/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
+++ b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
@@ -87,7 +87,7 @@ static void __lpm_validate(void)
 	};
 }
 
-enum { OP_LOOKUP = 1, OP_UPDATE, OP_DELETE, OP_FREE };
+enum { OP_NOOP=0, OP_BASELINE, OP_LOOKUP, OP_UPDATE, OP_DELETE, OP_FREE };
 
 static void lpm_delete_validate(void)
 {
@@ -175,6 +175,18 @@ static void lpm_setup(void)
 	fill_map(fd);
 }
 
+static void lpm_noop_setup(void)
+{
+	__lpm_setup();
+	ctx.bench->bss->op = OP_NOOP;
+}
+
+static void lpm_baseline_setup(void)
+{
+	__lpm_setup();
+	ctx.bench->bss->op = OP_BASELINE;
+}
+
 static void lpm_lookup_setup(void)
 {
 	lpm_setup();
@@ -208,7 +220,7 @@ static void lpm_measure(struct bench_res *res)
 	res->duration_ns = atomic_swap(&ctx.bench->bss->duration_ns, 0);
 }
 
-/* For LOOKUP, UPDATE, and DELETE */
+/* For NOOP, BASELINE, LOOKUP, UPDATE, and DELETE */
 static void *lpm_producer(void *unused __always_unused)
 {
 	int err;
@@ -310,6 +322,30 @@ static void free_ops_report_final(struct bench_res res[], int res_cnt)
 	       latency / lat_divisor / env.producer_cnt, unit);
 }
 
+/* noop bench measures harness-overhead */
+const struct bench bench_lpm_trie_noop = {
+	.name = "lpm-trie-noop",
+	.argp = &bench_lpm_trie_map_argp,
+	.validate = __lpm_validate,
+	.setup = lpm_noop_setup,
+	.producer_thread = lpm_producer,
+	.measure = lpm_measure,
+	.report_progress = ops_report_progress,
+	.report_final = ops_report_final,
+};
+
+/* baseline overhead for lookup and update */
+const struct bench bench_lpm_trie_baseline = {
+	.name = "lpm-trie-baseline",
+	.argp = &bench_lpm_trie_map_argp,
+	.validate = __lpm_validate,
+	.setup = lpm_baseline_setup,
+	.producer_thread = lpm_producer,
+	.measure = lpm_measure,
+	.report_progress = ops_report_progress,
+	.report_final = ops_report_final,
+};
+
 const struct bench bench_lpm_trie_lookup = {
 	.name = "lpm-trie-lookup",
 	.argp = &bench_lpm_trie_map_argp,
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_bench.c b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c
index 522e1cbef490..e4a5cecd6560 100644
--- a/tools/testing/selftests/bpf/progs/lpm_trie_bench.c
+++ b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c
@@ -6,6 +6,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_core_read.h>
 #include "bpf_misc.h"
+#include "bpf_atomic.h"
 
 #define BPF_OBJ_NAME_LEN 16U
 #define MAX_ENTRIES 100000000
@@ -84,12 +85,30 @@ int BPF_PROG(trie_free_exit, struct work_struct *work)
 	return 0;
 }
 
-static void gen_random_key(struct trie_key *key)
+static __always_inline void gen_random_key(struct trie_key *key)
 {
 	key->prefixlen = prefixlen;
 	key->data = bpf_get_prandom_u32() % nr_entries;
 }
 
+static int noop(__u32 index, __u32 *unused)
+{
+	return 0;
+}
+
+static int baseline(__u32 index, __u32 *unused)
+{
+	struct trie_key key;
+	__s64 blackbox;
+
+	gen_random_key(&key);
+	/* Avoid compiler optimizing out the modulo */
+	barrier_var(blackbox);
+	blackbox = READ_ONCE(key.data);
+
+	return 0;
+}
+
 static int lookup(__u32 index, __u32 *unused)
 {
 	struct trie_key key;
@@ -148,13 +167,19 @@ int BPF_PROG(run_bench)
 	start = bpf_ktime_get_ns();
 
 	switch (op) {
+	case 0:
+		loops = bpf_loop(NR_LOOPS, noop, NULL, 0);
+		break;
 	case 1:
-		loops = bpf_loop(NR_LOOPS, lookup, NULL, 0);
+		loops = bpf_loop(NR_LOOPS, baseline, NULL, 0);
 		break;
 	case 2:
-		loops = bpf_loop(NR_LOOPS, update, NULL, 0);
+		loops = bpf_loop(NR_LOOPS, lookup, NULL, 0);
 		break;
 	case 3:
+		loops = bpf_loop(NR_LOOPS, update, NULL, 0);
+		break;
+	case 4:
 		loops = bpf_loop(NR_LOOPS, delete, &need_refill, 0);
 		break;
 	default:



      parent reply	other threads:[~2025-08-13 15:29 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-22 15:01 [PATCH bpf-next v3] selftests/bpf: Add LPM trie microbenchmarks Matt Fleming
2025-07-28 14:34 ` Alexei Starovoitov
2025-07-29 13:56   ` Matt Fleming
2025-07-31 16:41     ` Alexei Starovoitov
2025-08-08 14:21       ` Matt Fleming
2025-08-08 16:42         ` Alexei Starovoitov
2025-08-13 16:59   ` Jesper Dangaard Brouer
2025-08-08 15:51 ` Jesper Dangaard Brouer
2025-08-13 15:29 ` Jesper Dangaard Brouer [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=175509897596.2755384.18413775753563966331.stgit@firesoul \
    --to=hawk@kernel.org \
    --cc=ast@kernel.org \
    --cc=borkmann@iogearbox.net \
    --cc=bpf@vger.kernel.org \
    --cc=kernel-team@cloudflare.com \
    --cc=matt@readmodwrite.com \
    --cc=mfleming@cloudflare.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox