public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Jiayuan Chen <jiayuan.chen@linux.dev>
To: bpf@vger.kernel.org, john.fastabend@gmail.com, jakub@cloudflare.com
Cc: Jiayuan Chen <jiayuan.chen@linux.dev>,
	"David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Simon Horman <horms@kernel.org>,
	Kuniyuki Iwashima <kuniyu@google.com>,
	Willem de Bruijn <willemb@google.com>,
	David Ahern <dsahern@kernel.org>,
	Neal Cardwell <ncardwell@google.com>,
	Andrii Nakryiko <andrii@kernel.org>,
	Eduard Zingerman <eddyz87@gmail.com>,
	Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Martin KaFai Lau <martin.lau@linux.dev>,
	Song Liu <song@kernel.org>,
	Yonghong Song <yonghong.song@linux.dev>,
	KP Singh <kpsingh@kernel.org>,
	Stanislav Fomichev <sdf@fomichev.me>, Hao Luo <haoluo@google.com>,
	Jiri Olsa <jolsa@kernel.org>, Shuah Khan <shuah@kernel.org>,
	Jiapeng Chong <jiapeng.chong@linux.alibaba.com>,
	Ihor Solodrai <ihor.solodrai@linux.dev>,
	Michal Luczaj <mhal@rbox.co>,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-kselftest@vger.kernel.org
Subject: [PATCH bpf-next v1 7/7] selftests/bpf: add splice option to sockmap benchmark
Date: Wed,  4 Mar 2026 14:33:58 +0800	[thread overview]
Message-ID: <20260304063643.14581-8-jiayuan.chen@linux.dev> (raw)
In-Reply-To: <20260304063643.14581-1-jiayuan.chen@linux.dev>

Add --splice option to bench_sockmap that uses splice(2) instead of
read(2) in the consumer path. A global pipe is created once during
setup and reused across iterations to avoid per-call pipe creation
overhead.

When --splice is enabled, the consumer splices data from the socket
into the pipe, then reads from the pipe into the user buffer. The
socket is set to O_NONBLOCK to prevent tcp_splice_read() from
blocking indefinitely, as it only checks sock->file->f_flags for
non-blocking mode, ignoring SPLICE_F_NONBLOCK.

Also increase SO_RCVBUF to 16MB to avoid sk_psock_backlog being
throttled by the default sk_rcvbuf limit, and add --verify option
to optionally enable data correctness checking (disabled by default
for benchmark accuracy).

Benchmark results with rx-verdict-ingress mode (loopback, 8 CPUs):

  read(2):                  ~4292 MB/s
  splice(2) + zero-copy:    ~4270 MB/s
  splice(2) + always-copy:  ~2770 MB/s

Zero-copy splice achieves near-parity with read(2), while the
always-copy fallback is ~35% slower.

Usage:
  # Steer softirqs to CPU 7 to avoid contending with the producer CPU
  echo 80 > /sys/class/net/lo/queues/rx-0/rps_cpus
  # Raise the receive buffer ceiling so the benchmark can set 16MB rcvbuf
  sysctl -w net.core.rmem_max=16777216
  # Run the benchmark
  ./bench sockmap --rx-verdict-ingress --splice -c 2 -p 1 -a -d 30

Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
---
 .../selftests/bpf/benchs/bench_sockmap.c      | 57 ++++++++++++++++++-
 1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
index cfc072aa7fff..ffcf5ad8cafa 100644
--- a/tools/testing/selftests/bpf/benchs/bench_sockmap.c
+++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
@@ -7,6 +7,9 @@
 #include <sys/sendfile.h>
 #include <arpa/inet.h>
 #include <fcntl.h>
+#include <unistd.h>
+#include <sched.h>
+#include <sys/syscall.h>
 #include <argp.h>
 #include "bench.h"
 #include "bench_sockmap_prog.skel.h"
@@ -46,6 +49,8 @@ enum SOCKMAP_ARG_FLAG {
 	ARG_CTL_RX_STRP,
 	ARG_CONSUMER_DELAY_TIME,
 	ARG_PRODUCER_DURATION,
+	ARG_CTL_SPLICE,
+	ARG_CTL_VERIFY,
 };
 
 #define TXMODE_NORMAL()				\
@@ -110,6 +115,9 @@ static struct socmap_ctx {
 	int		delay_consumer;
 	int		prod_run_time;
 	int		strp_size;
+	bool		use_splice;
+	bool		verify;
+	int		pipefd[2];
 } ctx = {
 	.prod_send	= 0,
 	.user_read	= 0,
@@ -119,6 +127,9 @@ static struct socmap_ctx {
 	.delay_consumer = 0,
 	.prod_run_time	= 0,
 	.strp_size	= 0,
+	.use_splice	= false,
+	.verify		= false,
+	.pipefd		= {-1, -1},
 };
 
 static void bench_sockmap_prog_destroy(void)
@@ -130,6 +141,11 @@ static void bench_sockmap_prog_destroy(void)
 			close(ctx.fds[i]);
 	}
 
+	if (ctx.pipefd[0] >= 0)
+		close(ctx.pipefd[0]);
+	if (ctx.pipefd[1] >= 0)
+		close(ctx.pipefd[1]);
+
 	bench_sockmap_prog__destroy(ctx.skel);
 }
 
@@ -320,6 +336,7 @@ static int setup_tx_sockmap(void)
 
 static void setup(void)
 {
+	int rcvbuf = 16 * 1024 * 1024;
 	int err;
 
 	ctx.skel = bench_sockmap_prog__open_and_load();
@@ -350,6 +367,18 @@ static void setup(void)
 		goto err;
 	}
 
+	if (ctx.use_splice) {
+		if (pipe(ctx.pipefd)) {
+			fprintf(stderr, "pipe error:%d\n", errno);
+			goto err;
+		}
+	}
+
+	setsockopt(ctx.c2, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf));
+
+	if (ctx.use_splice)
+		set_non_block(ctx.c2, true);
+
 	return;
 
 err:
@@ -368,6 +397,8 @@ static void measure(struct bench_res *res)
 
 static void verify_data(int *check_pos, char *buf, int rcv)
 {
+	if (!ctx.verify)
+		return;
 	for (int i = 0 ; i < rcv; i++) {
 		if (buf[i] != snd_data[(*check_pos) % DATA_REPEAT_SIZE]) {
 			fprintf(stderr, "verify data fail");
@@ -388,6 +419,9 @@ static void *consumer(void *input)
 	char *buf = malloc(recv_buf_size);
 	int delay_read = ctx.delay_consumer;
 
+	printf("cons[%d] started, tid=%ld cpu=%d\n",
+	       tid, syscall(SYS_gettid), sched_getcpu());
+
 	if (!buf) {
 		fprintf(stderr, "fail to init read buffer");
 		return NULL;
@@ -419,7 +453,15 @@ static void *consumer(void *input)
 			}
 			/* read real endpoint by consumer 0 */
 			atomic_inc(&ctx.read_calls);
-			rcv = read(ctx.c2, buf, recv_buf_size);
+			if (ctx.use_splice) {
+				rcv = splice(ctx.c2, NULL, ctx.pipefd[1],
+					     NULL, recv_buf_size,
+					     SPLICE_F_NONBLOCK);
+				if (rcv > 0)
+					rcv = read(ctx.pipefd[0], buf, rcv);
+			} else {
+				rcv = read(ctx.c2, buf, recv_buf_size);
+			}
 			if (rcv < 0 && errno != EAGAIN) {
 				fprintf(stderr, "%s fail to read c2 %d\n", __func__, errno);
 				return NULL;
@@ -440,6 +482,9 @@ static void *producer(void *input)
 	int target;
 	FILE *file;
 
+	printf("prod started, tid=%ld cpu=%d\n",
+	       syscall(SYS_gettid), sched_getcpu());
+
 	file = tmpfile();
 	if (!file) {
 		fprintf(stderr, "create file for sendfile");
@@ -554,6 +599,10 @@ static const struct argp_option opts[] = {
 		"delay consumer start"},
 	{ "producer-duration", ARG_PRODUCER_DURATION, "SEC", 0,
 		"producer duration"},
+	{ "splice", ARG_CTL_SPLICE, NULL, 0,
+		"use splice instead of read for consumer"},
+	{ "verify", ARG_CTL_VERIFY, NULL, 0,
+		"verify received data correctness"},
 	{},
 };
 
@@ -572,6 +621,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	case ARG_CTL_RX_STRP:
 		ctx.strp_size = strtol(arg, NULL, 10);
 		break;
+	case ARG_CTL_SPLICE:
+		ctx.use_splice = true;
+		break;
+	case ARG_CTL_VERIFY:
+		ctx.verify = true;
+		break;
 	default:
 		return ARGP_ERR_UNKNOWN;
 	}
-- 
2.43.0


      parent reply	other threads:[~2026-03-04  6:40 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-04  6:33 [PATCH bpf-next v1 0/7] bpf/sockmap: add splice support for tcp_bpf Jiayuan Chen
2026-03-04  6:33 ` [PATCH bpf-next v1 1/7] net: add splice_read to struct proto and set it in tcp_prot/tcpv6_prot Jiayuan Chen
2026-03-04  6:33 ` [PATCH bpf-next v1 2/7] inet: add inet_splice_read() and use it in inet_stream_ops/inet6_stream_ops Jiayuan Chen
2026-03-04  6:33 ` [PATCH bpf-next v1 3/7] tcp_bpf: refactor recvmsg with read actor abstraction Jiayuan Chen
2026-03-04  7:14   ` bot+bpf-ci
2026-03-04  6:33 ` [PATCH bpf-next v1 4/7] tcp_bpf: add splice_read support for sockmap Jiayuan Chen
2026-03-04  7:27   ` bot+bpf-ci
2026-03-04  6:33 ` [PATCH bpf-next v1 5/7] tcp_bpf: optimize splice_read with zero-copy for non-slab pages Jiayuan Chen
2026-03-04  6:33 ` [PATCH bpf-next v1 6/7] selftests/bpf: add splice_read tests for sockmap Jiayuan Chen
2026-03-06 17:25   ` Mykyta Yatsenko
2026-03-04  6:33 ` Jiayuan Chen [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260304063643.14581-8-jiayuan.chen@linux.dev \
    --to=jiayuan.chen@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=eddyz87@gmail.com \
    --cc=edumazet@google.com \
    --cc=haoluo@google.com \
    --cc=horms@kernel.org \
    --cc=ihor.solodrai@linux.dev \
    --cc=jakub@cloudflare.com \
    --cc=jiapeng.chong@linux.alibaba.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kpsingh@kernel.org \
    --cc=kuba@kernel.org \
    --cc=kuniyu@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=martin.lau@linux.dev \
    --cc=mhal@rbox.co \
    --cc=ncardwell@google.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sdf@fomichev.me \
    --cc=shuah@kernel.org \
    --cc=song@kernel.org \
    --cc=willemb@google.com \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox