public inbox for linux-doc@vger.kernel.org
 help / color / mirror / Atom feed
From: "Clément Léger" <cleger@meta.com>
To: <io-uring@vger.kernel.org>,
	Pavel Begunkov <asml.silence@gmail.com>,
	"Jens Axboe" <axboe@kernel.dk>
Cc: "Clément Léger" <cleger@meta.com>,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-kselftest@vger.kernel.org, netdev@vger.kernel.org,
	"David S. Miller" <davem@davemloft.net>,
	"Eric Dumazet" <edumazet@google.com>,
	"Jakub Kicinski" <kuba@kernel.org>,
	"Paolo Abeni" <pabeni@redhat.com>,
	"Simon Horman" <horms@kernel.org>,
	"Jonathan Corbet" <corbet@lwn.net>,
	"Shuah Khan" <skhan@linuxfoundation.org>,
	"Vishwanath Seshagiri" <vishs@fb.com>
Subject: [PATCH 5/5] selftests: iou-zcrx: add notification and stats test for zcrx
Date: Wed, 22 Apr 2026 04:25:16 -0700	[thread overview]
Message-ID: <20260422112522.3316660-6-cleger@meta.com> (raw)
In-Reply-To: <20260422112522.3316660-1-cleger@meta.com>

Add a selftest to verify that ZCRX notification are properly delivered
to userspace and that the shared-memory notification stats (copy_count,
copy_bytes) are correctly incremented when zero-copy RX falls back to
copying or when it runs out of buffers.

The test registers a notification descriptor during
IORING_REGISTER_ZCRX_IFQ with a stats region placed after the refill
queue entries. A new -n flag verifies that the copy fallback is
triggered and -b/-a flags allows to check for out of buffer
notification.

To reliably trigger copy fallback, the Python test uses a new
single_no_flow() setup variant that configures tcp-data-split and RSS
but without ethtool flow rule. Without flow steering, traffic arrives
on non-zcrx queues as regular pages, forcing the kernel copy-fallback
path in io_zcrx_copy_frag().

Out-of-buffer notification is verified by using a smaller receive area
and by avoiding recycling the buffers so that the kernel runs out of
buffer quickly.

Signed-off-by: Clément Léger <cleger@meta.com>
---
 .../selftests/drivers/net/hw/iou-zcrx.c       | 112 ++++++++++++++++--
 .../selftests/drivers/net/hw/iou-zcrx.py      |  49 +++++++-
 2 files changed, 149 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
index 240d13dbc54e..3c95e6460c24 100644
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
@@ -52,7 +52,27 @@ struct t_io_uring_zcrx_ifq_reg {
 	struct io_uring_zcrx_offsets offsets;
 	__u32	zcrx_id;
 	__u32	rx_buf_len;
-	__u64	__resv[3];
+	__u64	notif_desc;
+	__u64	__resv[2];
+};
+
+#define ZCRX_NOTIF_NO_BUFFERS		(1 << 0)
+#define ZCRX_NOTIF_COPY			(1 << 1)
+#define ZCRX_NOTIF_DESC_FLAG_STATS	(1 << 0)
+
+#define NOTIF_USER_DATA			3
+
+struct t_zcrx_notification_desc {
+	__u64	user_data;
+	__u32	type_mask;
+	__u32	flags;
+	__u64	stats_offset;
+	__u64	__resv2[9];
+};
+
+struct t_io_uring_zcrx_notif_stats {
+	__u64	copy_count;
+	__u64	copy_bytes;
 };
 
 static long page_size;
@@ -84,7 +104,10 @@ static int cfg_oneshot_recvs;
 static int cfg_send_size = SEND_SIZE;
 static struct sockaddr_in6 cfg_addr;
 static unsigned int cfg_rx_buf_len;
+static size_t cfg_area_size;
 static bool cfg_dry_run;
+static bool cfg_copy_fallback;
+static bool cfg_no_buffers;
 
 static char *payload;
 static void *area_ptr;
@@ -95,6 +118,8 @@ static unsigned long area_token;
 static int connfd;
 static bool stop;
 static size_t received;
+static unsigned int notif_received_mask;
+static size_t notif_stats_offset;
 
 static unsigned long gettimeofday_ms(void)
 {
@@ -142,6 +167,7 @@ static void setup_zcrx(struct io_uring *ring)
 {
 	unsigned int ifindex;
 	unsigned int rq_entries = 4096;
+	size_t area_size = cfg_area_size ? cfg_area_size : AREA_SIZE;
 	int ret;
 
 	ifindex = if_nametoindex(cfg_ifname);
@@ -150,7 +176,7 @@ static void setup_zcrx(struct io_uring *ring)
 
 	if (cfg_rx_buf_len && cfg_rx_buf_len != page_size) {
 		area_ptr = mmap(NULL,
-				AREA_SIZE,
+				area_size,
 				PROT_READ | PROT_WRITE,
 				MAP_ANONYMOUS | MAP_PRIVATE |
 				MAP_HUGETLB | MAP_HUGE_2MB,
@@ -162,7 +188,7 @@ static void setup_zcrx(struct io_uring *ring)
 		}
 	} else {
 		area_ptr = mmap(NULL,
-				AREA_SIZE,
+				area_size,
 				PROT_READ | PROT_WRITE,
 				MAP_ANONYMOUS | MAP_PRIVATE,
 				0,
@@ -172,6 +198,12 @@ static void setup_zcrx(struct io_uring *ring)
 	}
 
 	ring_size = get_refill_ring_size(rq_entries);
+
+	if (cfg_copy_fallback) {
+		notif_stats_offset = ring_size;
+		ring_size += ALIGN_UP(sizeof(struct t_io_uring_zcrx_notif_stats), page_size);
+	}
+
 	ring_ptr = mmap(NULL,
 			ring_size,
 			PROT_READ | PROT_WRITE,
@@ -187,10 +219,11 @@ static void setup_zcrx(struct io_uring *ring)
 
 	struct io_uring_zcrx_area_reg area_reg = {
 		.addr = (__u64)(unsigned long)area_ptr,
-		.len = AREA_SIZE,
+		.len = area_size,
 		.flags = 0,
 	};
 
+	struct t_zcrx_notification_desc notif_desc;
 	struct t_io_uring_zcrx_ifq_reg reg = {
 		.if_idx = ifindex,
 		.if_rxq = cfg_queue_id,
@@ -200,11 +233,32 @@ static void setup_zcrx(struct io_uring *ring)
 		.rx_buf_len = cfg_rx_buf_len,
 	};
 
+	if (cfg_copy_fallback || cfg_no_buffers) {
+		__u32 type_mask = 0;
+
+		if (cfg_copy_fallback)
+			type_mask = ZCRX_NOTIF_COPY;
+		if (cfg_no_buffers)
+			type_mask = ZCRX_NOTIF_NO_BUFFERS;
+
+		memset(&notif_desc, 0, sizeof(notif_desc));
+		notif_desc.user_data = NOTIF_USER_DATA;
+		notif_desc.type_mask = type_mask;
+		if (cfg_copy_fallback) {
+			notif_desc.flags = ZCRX_NOTIF_DESC_FLAG_STATS;
+			notif_desc.stats_offset = notif_stats_offset;
+		}
+		reg.notif_desc = (__u64)(unsigned long)&notif_desc;
+	}
+
 	ret = io_uring_register_ifq(ring, (void *)&reg);
 	if (cfg_rx_buf_len && (ret == -EINVAL || ret == -EOPNOTSUPP ||
 			       ret == -ERANGE)) {
 		printf("Large chunks are not supported %i\n", ret);
 		exit(SKIP_CODE);
+	} else if ((cfg_copy_fallback || cfg_no_buffers) && ret == -EINVAL) {
+		printf("Notifications not supported %i\n", ret);
+		exit(SKIP_CODE);
 	} else if (ret) {
 		error(1, 0, "io_uring_register_ifq(): %d", ret);
 	}
@@ -304,10 +358,13 @@ static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
 	}
 	received += n;
 
-	rqe = &rq_ring.rqes[(rq_ring.rq_tail & rq_mask)];
-	rqe->off = (rcqe->off & ~IORING_ZCRX_AREA_MASK) | area_token;
-	rqe->len = cqe->res;
-	io_uring_smp_store_release(rq_ring.ktail, ++rq_ring.rq_tail);
+	/* Skip ring refill so that we ran out of buffers quickly */
+	if (!cfg_no_buffers) {
+		rqe = &rq_ring.rqes[(rq_ring.rq_tail & rq_mask)];
+		rqe->off = (rcqe->off & ~IORING_ZCRX_AREA_MASK) | area_token;
+		rqe->len = cqe->res;
+		io_uring_smp_store_release(rq_ring.ktail, ++rq_ring.rq_tail);
+	}
 }
 
 static void server_loop(struct io_uring *ring)
@@ -324,8 +381,15 @@ static void server_loop(struct io_uring *ring)
 			process_accept(ring, cqe);
 		else if (cqe->user_data == 2)
 			process_recvzc(ring, cqe);
-		else
+		else if ((cfg_copy_fallback || cfg_no_buffers) &&
+			 cqe->user_data == NOTIF_USER_DATA) {
+			notif_received_mask |= cqe->res;
+			if (cfg_no_buffers &&
+			    (cqe->res & ZCRX_NOTIF_NO_BUFFERS))
+				stop = true;
+		} else {
 			error(1, 0, "unknown cqe");
+		}
 		count++;
 	}
 	io_uring_cq_advance(ring, count);
@@ -374,6 +438,23 @@ static void run_server(void)
 
 	if (!stop)
 		error(1, 0, "test failed\n");
+
+	if (cfg_copy_fallback) {
+		struct t_io_uring_zcrx_notif_stats *stats =
+			(void *)((char *)ring_ptr + notif_stats_offset);
+
+		if (!(notif_received_mask & ZCRX_NOTIF_COPY))
+			error(1, 0, "expected copy fallback notification");
+		if (!IO_URING_READ_ONCE(stats->copy_count))
+			error(1, 0, "expected copy_count > 0");
+		if (!IO_URING_READ_ONCE(stats->copy_bytes))
+			error(1, 0, "expected copy_bytes > 0");
+	}
+
+	if (cfg_no_buffers) {
+		if (!(notif_received_mask & ZCRX_NOTIF_NO_BUFFERS))
+			error(1, 0, "expected no-buffers notification");
+	}
 }
 
 static void run_client(void)
@@ -425,7 +506,7 @@ static void parse_opts(int argc, char **argv)
 		usage(argv[0]);
 	cfg_payload_len = max_payload_len;
 
-	while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:x:d")) != -1) {
+	while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:x:a:dnb")) != -1) {
 		switch (c) {
 		case 's':
 			if (cfg_client)
@@ -466,8 +547,19 @@ static void parse_opts(int argc, char **argv)
 		case 'd':
 			cfg_dry_run = true;
 			break;
+		case 'n':
+			cfg_copy_fallback = true;
+			break;
+		case 'b':
+			cfg_no_buffers = true;
+			break;
+		case 'a':
+			cfg_area_size = strtoul(optarg, NULL, 0) * page_size;
+			break;
 		}
 	}
+	if (cfg_copy_fallback && cfg_no_buffers)
+		error(1, 0, "Pass one of -n or -b");
 
 	if (cfg_server && addr)
 		error(1, 0, "Receiver cannot have -h specified");
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
index e81724cb5542..f7f1cbff5959 100755
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -41,7 +41,9 @@ def set_flow_rule_rss(cfg, rss_ctx_id):
     return int(values)
 
 
-def single(cfg):
+def single_no_flow(cfg):
+    """Like single() but without a flow rule."""
+
     channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
     channels = channels['combined-count']
     if channels < 2:
@@ -65,6 +67,9 @@ def single(cfg):
     ethtool(f"-X {cfg.ifname} equal {cfg.target}")
     defer(ethtool, f"-X {cfg.ifname} default")
 
+def single(cfg):
+    single_no_flow(cfg)
+
     flow_rule_id = set_flow_rule(cfg)
     defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
 
@@ -130,6 +135,26 @@ def test_zcrx_oneshot(cfg, setup) -> None:
         cmd(tx_cmd, host=cfg.remote)
 
 
+@ksft_variants([
+    KsftNamedVariant("single", single_no_flow),
+])
+def test_zcrx_notif(cfg, setup) -> None:
+    """Test zcrx copy fallback notification.
+
+    Omits the flow rule so traffic arrives on non-zcrx queues as regular
+    pages, forcing the kernel copy-fallback path. Asserts that the
+    ZCRX_NOTIF_COPY notification CQE is delivered."""
+
+    cfg.require_ipver('6')
+
+    setup(cfg)
+    rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target} -n"
+    tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 12840"
+    with bkg(rx_cmd, exit_wait=True):
+        wait_port_listen(cfg.port, proto="tcp")
+        cmd(tx_cmd, host=cfg.remote)
+
+
 def test_zcrx_large_chunks(cfg) -> None:
     """Test zcrx with large buffer chunks."""
 
@@ -157,6 +182,25 @@ def test_zcrx_large_chunks(cfg) -> None:
         cmd(tx_cmd, host=cfg.remote)
 
 
+@ksft_variants([
+    KsftNamedVariant("single", single),
+])
+def test_zcrx_notif_no_buffers(cfg, setup) -> None:
+    """Test zcrx out-of-buffer notification.
+
+    Skips buffer refill so the pool is quickly exhausted, triggering
+    a ZCRX_NOTIF_NO_BUFFERS notification CQE."""
+
+    cfg.require_ipver('6')
+
+    setup(cfg)
+    rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target} -b -a 64"
+    tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 12840"
+    with bkg(rx_cmd, exit_wait=True):
+        wait_port_listen(cfg.port, proto="tcp")
+        cmd(tx_cmd, host=cfg.remote, fail=False)
+
+
 def main() -> None:
     with NetDrvEpEnv(__file__) as cfg:
         cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx")
@@ -166,7 +210,8 @@ def main() -> None:
         cfg.netnl = NetdevFamily()
         cfg.port = rand_port()
         ksft_run(globs=globals(), cases=[test_zcrx, test_zcrx_oneshot,
-                                        test_zcrx_large_chunks], args=(cfg, ))
+                                        test_zcrx_large_chunks, test_zcrx_notif,
+                                        test_zcrx_notif_no_buffers], args=(cfg, ))
     ksft_exit()
 
 
-- 
2.52.0


      parent reply	other threads:[~2026-04-22 11:30 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-22 11:25 [PATCH 0/5] io_uring/zcrx: add CQE based notifications and stats reporting Clément Léger
2026-04-22 11:25 ` [PATCH 1/5] io_uring/zcrx: notify user when out of buffers Clément Léger
2026-04-22 11:25 ` [PATCH 2/5] io_uring/zcrx: notify user on frag copy fallback Clément Léger
2026-04-22 11:25 ` [PATCH 3/5] io_uring/zcrx: add shared-memory notification statistics Clément Léger
2026-04-22 11:25 ` [PATCH 4/5] Documentation: networking: document zcrx notifications and statistics Clément Léger
2026-04-22 11:25 ` Clément Léger [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260422112522.3316660-6-cleger@meta.com \
    --to=cleger@meta.com \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=corbet@lwn.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=horms@kernel.org \
    --cc=io-uring@vger.kernel.org \
    --cc=kuba@kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=skhan@linuxfoundation.org \
    --cc=vishs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox