From: Jiayuan Chen <jiayuan.chen@linux.dev>
To: bpf@vger.kernel.org, john.fastabend@gmail.com, jakub@cloudflare.com
Cc: Jiayuan Chen <jiayuan.chen@linux.dev>,
"David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Simon Horman <horms@kernel.org>,
Kuniyuki Iwashima <kuniyu@google.com>,
Willem de Bruijn <willemb@google.com>,
David Ahern <dsahern@kernel.org>,
Neal Cardwell <ncardwell@google.com>,
Andrii Nakryiko <andrii@kernel.org>,
Eduard Zingerman <eddyz87@gmail.com>,
Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Martin KaFai Lau <martin.lau@linux.dev>,
Song Liu <song@kernel.org>,
Yonghong Song <yonghong.song@linux.dev>,
KP Singh <kpsingh@kernel.org>,
Stanislav Fomichev <sdf@fomichev.me>, Hao Luo <haoluo@google.com>,
Jiri Olsa <jolsa@kernel.org>, Shuah Khan <shuah@kernel.org>,
Jiapeng Chong <jiapeng.chong@linux.alibaba.com>,
Ihor Solodrai <ihor.solodrai@linux.dev>,
Michal Luczaj <mhal@rbox.co>,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-kselftest@vger.kernel.org
Subject: [PATCH bpf-next v1 7/7] selftests/bpf: add splice option to sockmap benchmark
Date: Wed, 4 Mar 2026 14:33:58 +0800 [thread overview]
Message-ID: <20260304063643.14581-8-jiayuan.chen@linux.dev> (raw)
In-Reply-To: <20260304063643.14581-1-jiayuan.chen@linux.dev>
Add --splice option to bench_sockmap that uses splice(2) instead of
read(2) in the consumer path. A global pipe is created once during
setup and reused across iterations to avoid per-call pipe creation
overhead.
When --splice is enabled, the consumer splices data from the socket
into the pipe, then reads from the pipe into the user buffer. The
socket is set to O_NONBLOCK to prevent tcp_splice_read() from
blocking indefinitely, as it only checks sock->file->f_flags for
non-blocking mode, ignoring SPLICE_F_NONBLOCK.
Also increase SO_RCVBUF to 16MB to avoid sk_psock_backlog being
throttled by the default sk_rcvbuf limit, and add --verify option
to optionally enable data correctness checking (disabled by default
for benchmark accuracy).
Benchmark results with rx-verdict-ingress mode (loopback, 8 CPUs):
read(2): ~4292 MB/s
splice(2) + zero-copy: ~4270 MB/s
splice(2) + always-copy: ~2770 MB/s
Zero-copy splice achieves near-parity with read(2), while the
always-copy fallback is ~35% slower.
Usage:
# Steer softirqs to CPU 7 to avoid contending with the producer CPU
echo 80 > /sys/class/net/lo/queues/rx-0/rps_cpus
# Raise the receive buffer ceiling so the benchmark can set 16MB rcvbuf
sysctl -w net.core.rmem_max=16777216
# Run the benchmark
./bench sockmap --rx-verdict-ingress --splice -c 2 -p 1 -a -d 30
Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
---
.../selftests/bpf/benchs/bench_sockmap.c | 57 ++++++++++++++++++-
1 file changed, 56 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
index cfc072aa7fff..ffcf5ad8cafa 100644
--- a/tools/testing/selftests/bpf/benchs/bench_sockmap.c
+++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
@@ -7,6 +7,9 @@
#include <sys/sendfile.h>
#include <arpa/inet.h>
#include <fcntl.h>
+#include <unistd.h>
+#include <sched.h>
+#include <sys/syscall.h>
#include <argp.h>
#include "bench.h"
#include "bench_sockmap_prog.skel.h"
@@ -46,6 +49,8 @@ enum SOCKMAP_ARG_FLAG {
ARG_CTL_RX_STRP,
ARG_CONSUMER_DELAY_TIME,
ARG_PRODUCER_DURATION,
+ ARG_CTL_SPLICE,
+ ARG_CTL_VERIFY,
};
#define TXMODE_NORMAL() \
@@ -110,6 +115,9 @@ static struct socmap_ctx {
int delay_consumer;
int prod_run_time;
int strp_size;
+ bool use_splice;
+ bool verify;
+ int pipefd[2];
} ctx = {
.prod_send = 0,
.user_read = 0,
@@ -119,6 +127,9 @@ static struct socmap_ctx {
.delay_consumer = 0,
.prod_run_time = 0,
.strp_size = 0,
+ .use_splice = false,
+ .verify = false,
+ .pipefd = {-1, -1},
};
static void bench_sockmap_prog_destroy(void)
@@ -130,6 +141,11 @@ static void bench_sockmap_prog_destroy(void)
close(ctx.fds[i]);
}
+ if (ctx.pipefd[0] >= 0)
+ close(ctx.pipefd[0]);
+ if (ctx.pipefd[1] >= 0)
+ close(ctx.pipefd[1]);
+
bench_sockmap_prog__destroy(ctx.skel);
}
@@ -320,6 +336,7 @@ static int setup_tx_sockmap(void)
static void setup(void)
{
+ int rcvbuf = 16 * 1024 * 1024;
int err;
ctx.skel = bench_sockmap_prog__open_and_load();
@@ -350,6 +367,18 @@ static void setup(void)
goto err;
}
+ if (ctx.use_splice) {
+ if (pipe(ctx.pipefd)) {
+ fprintf(stderr, "pipe error:%d\n", errno);
+ goto err;
+ }
+ }
+
+ setsockopt(ctx.c2, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf));
+
+ if (ctx.use_splice)
+ set_non_block(ctx.c2, true);
+
return;
err:
@@ -368,6 +397,8 @@ static void measure(struct bench_res *res)
static void verify_data(int *check_pos, char *buf, int rcv)
{
+ if (!ctx.verify)
+ return;
for (int i = 0 ; i < rcv; i++) {
if (buf[i] != snd_data[(*check_pos) % DATA_REPEAT_SIZE]) {
fprintf(stderr, "verify data fail");
@@ -388,6 +419,9 @@ static void *consumer(void *input)
char *buf = malloc(recv_buf_size);
int delay_read = ctx.delay_consumer;
+ printf("cons[%d] started, tid=%ld cpu=%d\n",
+ tid, syscall(SYS_gettid), sched_getcpu());
+
if (!buf) {
fprintf(stderr, "fail to init read buffer");
return NULL;
@@ -419,7 +453,15 @@ static void *consumer(void *input)
}
/* read real endpoint by consumer 0 */
atomic_inc(&ctx.read_calls);
- rcv = read(ctx.c2, buf, recv_buf_size);
+ if (ctx.use_splice) {
+ rcv = splice(ctx.c2, NULL, ctx.pipefd[1],
+ NULL, recv_buf_size,
+ SPLICE_F_NONBLOCK);
+ if (rcv > 0)
+ rcv = read(ctx.pipefd[0], buf, rcv);
+ } else {
+ rcv = read(ctx.c2, buf, recv_buf_size);
+ }
if (rcv < 0 && errno != EAGAIN) {
fprintf(stderr, "%s fail to read c2 %d\n", __func__, errno);
return NULL;
@@ -440,6 +482,9 @@ static void *producer(void *input)
int target;
FILE *file;
+ printf("prod started, tid=%ld cpu=%d\n",
+ syscall(SYS_gettid), sched_getcpu());
+
file = tmpfile();
if (!file) {
fprintf(stderr, "create file for sendfile");
@@ -554,6 +599,10 @@ static const struct argp_option opts[] = {
"delay consumer start"},
{ "producer-duration", ARG_PRODUCER_DURATION, "SEC", 0,
"producer duration"},
+ { "splice", ARG_CTL_SPLICE, NULL, 0,
+ "use splice instead of read for consumer"},
+ { "verify", ARG_CTL_VERIFY, NULL, 0,
+ "verify received data correctness"},
{},
};
@@ -572,6 +621,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case ARG_CTL_RX_STRP:
ctx.strp_size = strtol(arg, NULL, 10);
break;
+ case ARG_CTL_SPLICE:
+ ctx.use_splice = true;
+ break;
+ case ARG_CTL_VERIFY:
+ ctx.verify = true;
+ break;
default:
return ARGP_ERR_UNKNOWN;
}
--
2.43.0
prev parent reply other threads:[~2026-03-04 6:40 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-04 6:33 [PATCH bpf-next v1 0/7] bpf/sockmap: add splice support for tcp_bpf Jiayuan Chen
2026-03-04 6:33 ` [PATCH bpf-next v1 1/7] net: add splice_read to struct proto and set it in tcp_prot/tcpv6_prot Jiayuan Chen
2026-03-04 6:33 ` [PATCH bpf-next v1 2/7] inet: add inet_splice_read() and use it in inet_stream_ops/inet6_stream_ops Jiayuan Chen
2026-03-04 6:33 ` [PATCH bpf-next v1 3/7] tcp_bpf: refactor recvmsg with read actor abstraction Jiayuan Chen
2026-03-04 7:14 ` bot+bpf-ci
2026-03-04 6:33 ` [PATCH bpf-next v1 4/7] tcp_bpf: add splice_read support for sockmap Jiayuan Chen
2026-03-04 7:27 ` bot+bpf-ci
2026-03-04 6:33 ` [PATCH bpf-next v1 5/7] tcp_bpf: optimize splice_read with zero-copy for non-slab pages Jiayuan Chen
2026-03-04 6:33 ` [PATCH bpf-next v1 6/7] selftests/bpf: add splice_read tests for sockmap Jiayuan Chen
2026-03-06 17:25 ` Mykyta Yatsenko
2026-03-04 6:33 ` Jiayuan Chen [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260304063643.14581-8-jiayuan.chen@linux.dev \
--to=jiayuan.chen@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=dsahern@kernel.org \
--cc=eddyz87@gmail.com \
--cc=edumazet@google.com \
--cc=haoluo@google.com \
--cc=horms@kernel.org \
--cc=ihor.solodrai@linux.dev \
--cc=jakub@cloudflare.com \
--cc=jiapeng.chong@linux.alibaba.com \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kpsingh@kernel.org \
--cc=kuba@kernel.org \
--cc=kuniyu@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=martin.lau@linux.dev \
--cc=mhal@rbox.co \
--cc=ncardwell@google.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sdf@fomichev.me \
--cc=shuah@kernel.org \
--cc=song@kernel.org \
--cc=willemb@google.com \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.