From: Jiayuan Chen <jiayuan.chen@linux.dev>
To: bpf@vger.kernel.org, john.fastabend@gmail.com, jakub@cloudflare.com
Cc: Jiayuan Chen <jiayuan.chen@linux.dev>,
"David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Simon Horman <horms@kernel.org>,
Kuniyuki Iwashima <kuniyu@google.com>,
Willem de Bruijn <willemb@google.com>,
David Ahern <dsahern@kernel.org>,
Neal Cardwell <ncardwell@google.com>,
Andrii Nakryiko <andrii@kernel.org>,
Eduard Zingerman <eddyz87@gmail.com>,
Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Martin KaFai Lau <martin.lau@linux.dev>,
Song Liu <song@kernel.org>,
Yonghong Song <yonghong.song@linux.dev>,
KP Singh <kpsingh@kernel.org>,
Stanislav Fomichev <sdf@fomichev.me>, Hao Luo <haoluo@google.com>,
Jiri Olsa <jolsa@kernel.org>, Shuah Khan <shuah@kernel.org>,
Jiapeng Chong <jiapeng.chong@linux.alibaba.com>,
Ihor Solodrai <ihor.solodrai@linux.dev>,
Michal Luczaj <mhal@rbox.co>,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-kselftest@vger.kernel.org
Subject: [PATCH bpf-next v1 7/7] selftests/bpf: add splice option to sockmap benchmark
Date: Wed, 4 Mar 2026 14:33:58 +0800 [thread overview]
Message-ID: <20260304063643.14581-8-jiayuan.chen@linux.dev> (raw)
In-Reply-To: <20260304063643.14581-1-jiayuan.chen@linux.dev>
Add --splice option to bench_sockmap that uses splice(2) instead of
read(2) in the consumer path. A global pipe is created once during
setup and reused across iterations to avoid per-call pipe creation
overhead.
When --splice is enabled, the consumer splices data from the socket
into the pipe, then reads from the pipe into the user buffer. The
socket is set to O_NONBLOCK to prevent tcp_splice_read() from
blocking indefinitely, as it only checks sock->file->f_flags for
non-blocking mode, ignoring SPLICE_F_NONBLOCK.
Also increase SO_RCVBUF to 16MB to avoid sk_psock_backlog being
throttled by the default sk_rcvbuf limit, and add --verify option
to optionally enable data correctness checking (disabled by default
for benchmark accuracy).
Benchmark results with rx-verdict-ingress mode (loopback, 8 CPUs):
read(2): ~4292 MB/s
splice(2) + zero-copy: ~4270 MB/s
splice(2) + always-copy: ~2770 MB/s
Zero-copy splice achieves near-parity with read(2), while the
always-copy fallback is ~35% slower.
Usage:
# Steer softirqs to CPU 7 to avoid contending with the producer CPU
echo 80 > /sys/class/net/lo/queues/rx-0/rps_cpus
# Raise the receive buffer ceiling so the benchmark can set 16MB rcvbuf
sysctl -w net.core.rmem_max=16777216
# Run the benchmark
./bench sockmap --rx-verdict-ingress --splice -c 2 -p 1 -a -d 30
Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
---
.../selftests/bpf/benchs/bench_sockmap.c | 57 ++++++++++++++++++-
1 file changed, 56 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
index cfc072aa7fff..ffcf5ad8cafa 100644
--- a/tools/testing/selftests/bpf/benchs/bench_sockmap.c
+++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
@@ -7,6 +7,9 @@
#include <sys/sendfile.h>
#include <arpa/inet.h>
#include <fcntl.h>
+#include <unistd.h>
+#include <sched.h>
+#include <sys/syscall.h>
#include <argp.h>
#include "bench.h"
#include "bench_sockmap_prog.skel.h"
@@ -46,6 +49,8 @@ enum SOCKMAP_ARG_FLAG {
ARG_CTL_RX_STRP,
ARG_CONSUMER_DELAY_TIME,
ARG_PRODUCER_DURATION,
+ ARG_CTL_SPLICE,
+ ARG_CTL_VERIFY,
};
#define TXMODE_NORMAL() \
@@ -110,6 +115,9 @@ static struct socmap_ctx {
int delay_consumer;
int prod_run_time;
int strp_size;
+ bool use_splice;
+ bool verify;
+ int pipefd[2];
} ctx = {
.prod_send = 0,
.user_read = 0,
@@ -119,6 +127,9 @@ static struct socmap_ctx {
.delay_consumer = 0,
.prod_run_time = 0,
.strp_size = 0,
+ .use_splice = false,
+ .verify = false,
+ .pipefd = {-1, -1},
};
static void bench_sockmap_prog_destroy(void)
@@ -130,6 +141,11 @@ static void bench_sockmap_prog_destroy(void)
close(ctx.fds[i]);
}
+ if (ctx.pipefd[0] >= 0)
+ close(ctx.pipefd[0]);
+ if (ctx.pipefd[1] >= 0)
+ close(ctx.pipefd[1]);
+
bench_sockmap_prog__destroy(ctx.skel);
}
@@ -320,6 +336,7 @@ static int setup_tx_sockmap(void)
static void setup(void)
{
+ int rcvbuf = 16 * 1024 * 1024;
int err;
ctx.skel = bench_sockmap_prog__open_and_load();
@@ -350,6 +367,18 @@ static void setup(void)
goto err;
}
+ if (ctx.use_splice) {
+ if (pipe(ctx.pipefd)) {
+ fprintf(stderr, "pipe error:%d\n", errno);
+ goto err;
+ }
+ }
+
+ setsockopt(ctx.c2, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf));
+
+ if (ctx.use_splice)
+ set_non_block(ctx.c2, true);
+
return;
err:
@@ -368,6 +397,8 @@ static void measure(struct bench_res *res)
static void verify_data(int *check_pos, char *buf, int rcv)
{
+ if (!ctx.verify)
+ return;
for (int i = 0 ; i < rcv; i++) {
if (buf[i] != snd_data[(*check_pos) % DATA_REPEAT_SIZE]) {
fprintf(stderr, "verify data fail");
@@ -388,6 +419,9 @@ static void *consumer(void *input)
char *buf = malloc(recv_buf_size);
int delay_read = ctx.delay_consumer;
+ printf("cons[%d] started, tid=%ld cpu=%d\n",
+ tid, syscall(SYS_gettid), sched_getcpu());
+
if (!buf) {
fprintf(stderr, "fail to init read buffer");
return NULL;
@@ -419,7 +453,15 @@ static void *consumer(void *input)
}
/* read real endpoint by consumer 0 */
atomic_inc(&ctx.read_calls);
- rcv = read(ctx.c2, buf, recv_buf_size);
+ if (ctx.use_splice) {
+ rcv = splice(ctx.c2, NULL, ctx.pipefd[1],
+ NULL, recv_buf_size,
+ SPLICE_F_NONBLOCK);
+ if (rcv > 0)
+ rcv = read(ctx.pipefd[0], buf, rcv);
+ } else {
+ rcv = read(ctx.c2, buf, recv_buf_size);
+ }
if (rcv < 0 && errno != EAGAIN) {
fprintf(stderr, "%s fail to read c2 %d\n", __func__, errno);
return NULL;
@@ -440,6 +482,9 @@ static void *producer(void *input)
int target;
FILE *file;
+ printf("prod started, tid=%ld cpu=%d\n",
+ syscall(SYS_gettid), sched_getcpu());
+
file = tmpfile();
if (!file) {
fprintf(stderr, "create file for sendfile");
@@ -554,6 +599,10 @@ static const struct argp_option opts[] = {
"delay consumer start"},
{ "producer-duration", ARG_PRODUCER_DURATION, "SEC", 0,
"producer duration"},
+ { "splice", ARG_CTL_SPLICE, NULL, 0,
+ "use splice instead of read for consumer"},
+ { "verify", ARG_CTL_VERIFY, NULL, 0,
+ "verify received data correctness"},
{},
};
@@ -572,6 +621,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case ARG_CTL_RX_STRP:
ctx.strp_size = strtol(arg, NULL, 10);
break;
+ case ARG_CTL_SPLICE:
+ ctx.use_splice = true;
+ break;
+ case ARG_CTL_VERIFY:
+ ctx.verify = true;
+ break;
default:
return ARGP_ERR_UNKNOWN;
}
--
2.43.0
prev parent reply other threads:[~2026-03-04 6:40 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-04 6:33 [PATCH bpf-next v1 0/7] bpf/sockmap: add splice support for tcp_bpf Jiayuan Chen
2026-03-04 6:33 ` [PATCH bpf-next v1 1/7] net: add splice_read to struct proto and set it in tcp_prot/tcpv6_prot Jiayuan Chen
2026-03-04 6:33 ` [PATCH bpf-next v1 2/7] inet: add inet_splice_read() and use it in inet_stream_ops/inet6_stream_ops Jiayuan Chen
2026-03-04 6:33 ` [PATCH bpf-next v1 3/7] tcp_bpf: refactor recvmsg with read actor abstraction Jiayuan Chen
2026-03-04 7:14 ` bot+bpf-ci
2026-03-04 6:33 ` [PATCH bpf-next v1 4/7] tcp_bpf: add splice_read support for sockmap Jiayuan Chen
2026-03-04 7:27 ` bot+bpf-ci
2026-03-04 6:33 ` [PATCH bpf-next v1 5/7] tcp_bpf: optimize splice_read with zero-copy for non-slab pages Jiayuan Chen
2026-03-04 6:33 ` [PATCH bpf-next v1 6/7] selftests/bpf: add splice_read tests for sockmap Jiayuan Chen
2026-03-06 17:25 ` Mykyta Yatsenko
2026-03-04 6:33 ` Jiayuan Chen [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260304063643.14581-8-jiayuan.chen@linux.dev \
--to=jiayuan.chen@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=dsahern@kernel.org \
--cc=eddyz87@gmail.com \
--cc=edumazet@google.com \
--cc=haoluo@google.com \
--cc=horms@kernel.org \
--cc=ihor.solodrai@linux.dev \
--cc=jakub@cloudflare.com \
--cc=jiapeng.chong@linux.alibaba.com \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kpsingh@kernel.org \
--cc=kuba@kernel.org \
--cc=kuniyu@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=martin.lau@linux.dev \
--cc=mhal@rbox.co \
--cc=ncardwell@google.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sdf@fomichev.me \
--cc=shuah@kernel.org \
--cc=song@kernel.org \
--cc=willemb@google.com \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox