From: "Clément Léger" <cleger@meta.com>
To: <io-uring@vger.kernel.org>,
Pavel Begunkov <asml.silence@gmail.com>,
"Jens Axboe" <axboe@kernel.dk>
Cc: "Clément Léger" <cleger@meta.com>,
linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-kselftest@vger.kernel.org, netdev@vger.kernel.org,
"David S. Miller" <davem@davemloft.net>,
"Eric Dumazet" <edumazet@google.com>,
"Jakub Kicinski" <kuba@kernel.org>,
"Paolo Abeni" <pabeni@redhat.com>,
"Simon Horman" <horms@kernel.org>,
"Jonathan Corbet" <corbet@lwn.net>,
"Shuah Khan" <skhan@linuxfoundation.org>,
"Vishwanath Seshagiri" <vishs@fb.com>
Subject: [PATCH 3/5] io_uring/zcrx: add shared-memory notification statistics
Date: Wed, 22 Apr 2026 04:25:14 -0700 [thread overview]
Message-ID: <20260422112522.3316660-4-cleger@meta.com> (raw)
In-Reply-To: <20260422112522.3316660-1-cleger@meta.com>
Add support for an optional stats struct embedded in the refill queue
region, allowing userspace to monitor copy-fallback and no-buffers events
in real-time.
Userspace queries the stats struct size and alignment via
IO_URING_QUERY_ZCRX (notif_stats_size / notif_stats_alignment), then
provides a stats_offset in zcrx_notification_desc pointing to a location
within the refill queue region.
The kernel updates the stats counters in-place using atomic ops on every
copy-fallback and no-buffers event.
Signed-off-by: Clément Léger <cleger@meta.com>
---
include/uapi/linux/io_uring/query.h | 12 +++++++
include/uapi/linux/io_uring/zcrx.h | 15 +++++++--
io_uring/query.c | 14 ++++++++
io_uring/zcrx.c | 50 +++++++++++++++++++++++++++--
io_uring/zcrx.h | 1 +
5 files changed, 88 insertions(+), 4 deletions(-)
diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h
index 95500759cc13..738c35c7d05c 100644
--- a/include/uapi/linux/io_uring/query.h
+++ b/include/uapi/linux/io_uring/query.h
@@ -23,6 +23,7 @@ enum {
IO_URING_QUERY_OPCODES = 0,
IO_URING_QUERY_ZCRX = 1,
IO_URING_QUERY_SCQ = 2,
+ IO_URING_QUERY_ZCRX_NOTIF = 3,
__IO_URING_QUERY_MAX,
};
@@ -62,6 +63,17 @@ struct io_uring_query_zcrx {
__u64 __resv2;
};
+struct io_uring_query_zcrx_notif {
+ /* Bitmask of supported ZCRX_NOTIF_* flags*/
+ __u32 notif_flags;
+ /* Size of io_uring_zcrx_notif_stats */
+ __u32 notif_stats_size;
+ /* Required alignment for the stats struct within the region (ie stats_offset) */
+ __u32 notif_stats_off_alignment;
+ __u32 resv1;
+ __u64 __resv2[10];
+};
+
struct io_uring_query_scq {
/* The SQ/CQ rings header size */
__u64 hdr_size;
diff --git a/include/uapi/linux/io_uring/zcrx.h b/include/uapi/linux/io_uring/zcrx.h
index e0c0079626c8..ae9bbca3004c 100644
--- a/include/uapi/linux/io_uring/zcrx.h
+++ b/include/uapi/linux/io_uring/zcrx.h
@@ -73,11 +73,22 @@ enum zcrx_notification_type {
ZCRX_NOTIF_COPY = 1 << 1
};
+enum zcrx_notification_desc_flags {
+ /* If set, stats_offset holds a valid offset to a notif_stats struct */
+ ZCRX_NOTIF_DESC_FLAG_STATS = 1 << 0,
+};
+
+struct io_uring_zcrx_notif_stats {
+ __u64 copy_count; /* cumulative copy-fallback CQEs */
+ __u64 copy_bytes; /* cumulative bytes copied */
+};
+
struct zcrx_notification_desc {
__u64 user_data;
__u32 type_mask;
- __u32 __resv1;
- __u64 __resv2[10];
+ __u32 flags; /* see enum zcrx_notification_desc_flags */
+ __u64 stats_offset; /* offset from the beginning of refill ring region for stats */
+ __u64 __resv2[9];
};
/*
diff --git a/io_uring/query.c b/io_uring/query.c
index c1704d088374..3591106e139d 100644
--- a/io_uring/query.c
+++ b/io_uring/query.c
@@ -9,6 +9,7 @@
union io_query_data {
struct io_uring_query_opcode opcodes;
struct io_uring_query_zcrx zcrx;
+ struct io_uring_query_zcrx_notif zcrx_notif;
struct io_uring_query_scq scq;
};
@@ -44,6 +45,16 @@ static ssize_t io_query_zcrx(union io_query_data *data)
return sizeof(*e);
}
+static ssize_t io_query_zcrx_notif(union io_query_data *data)
+{
+ struct io_uring_query_zcrx_notif *e = &data->zcrx_notif;
+
+ e->notif_flags = ZCRX_NOTIF_TYPE_MASK;
+ e->notif_stats_size = sizeof(struct io_uring_zcrx_notif_stats);
+ e->notif_stats_off_alignment = __alignof__(struct io_uring_zcrx_notif_stats);
+ return sizeof(*e);
+}
+
static ssize_t io_query_scq(union io_query_data *data)
{
struct io_uring_query_scq *e = &data->scq;
@@ -83,6 +94,9 @@ static int io_handle_query_entry(union io_query_data *data, void __user *uhdr,
case IO_URING_QUERY_ZCRX:
ret = io_query_zcrx(data);
break;
+ case IO_URING_QUERY_ZCRX_NOTIF:
+ ret = io_query_zcrx_notif(data);
+ break;
case IO_URING_QUERY_SCQ:
ret = io_query_scq(data);
break;
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 732e585aa13a..c61f94fb14c3 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -414,6 +414,7 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
io_free_region(ifq->user, &ifq->rq_region);
ifq->rq.ring = NULL;
ifq->rq.rqes = NULL;
+ ifq->notif_stats = NULL;
}
static void io_zcrx_free_area(struct io_zcrx_ifq *ifq,
@@ -841,6 +842,33 @@ static int zcrx_register_netdev(struct io_zcrx_ifq *ifq,
return ret;
}
+static int zcrx_validate_notif_stats(struct io_zcrx_ifq *ifq,
+ const struct io_uring_zcrx_ifq_reg *reg,
+ const struct zcrx_notification_desc *notif)
+{
+ size_t stats_off = notif->stats_offset;
+ size_t used, end;
+
+ used = reg->offsets.rqes +
+ sizeof(struct io_uring_zcrx_rqe) * reg->rq_entries;
+
+ if (!IS_ALIGNED(stats_off, __alignof__(struct io_uring_zcrx_notif_stats)))
+ return -EINVAL;
+ if (stats_off < used)
+ return -ERANGE;
+ if (check_add_overflow(stats_off,
+ sizeof(struct io_uring_zcrx_notif_stats),
+ &end))
+ return -ERANGE;
+ if (end > io_region_size(&ifq->rq_region))
+ return -ERANGE;
+
+ ifq->notif_stats = io_region_get_ptr(&ifq->rq_region) + stats_off;
+ memset(ifq->notif_stats, 0, sizeof(*ifq->notif_stats));
+
+ return 0;
+}
+
int io_register_zcrx(struct io_ring_ctx *ctx,
struct io_uring_zcrx_ifq_reg __user *arg)
{
@@ -894,7 +922,9 @@ int io_register_zcrx(struct io_ring_ctx *ctx,
return -EFAULT;
if (notif.type_mask & ~ZCRX_NOTIF_TYPE_MASK)
return -EINVAL;
- if (notif.__resv1 || !mem_is_zero(¬if.__resv2, sizeof(notif.__resv2)))
+ if (notif.flags & ~ZCRX_NOTIF_DESC_FLAG_STATS)
+ return -EINVAL;
+ if (!mem_is_zero(¬if.__resv2, sizeof(notif.__resv2)))
return -EINVAL;
ifq = io_zcrx_ifq_alloc(ctx);
@@ -925,6 +955,12 @@ int io_register_zcrx(struct io_ring_ctx *ctx,
if (ret)
goto err;
+ if (notif.flags & ZCRX_NOTIF_DESC_FLAG_STATS) {
+ ret = zcrx_validate_notif_stats(ifq, ®, ¬if);
+ if (ret)
+ goto err;
+ }
+
ifq->kern_readable = !(area.flags & IORING_ZCRX_AREA_DMABUF);
if (!(reg.flags & ZCRX_REG_NODEV)) {
@@ -1133,6 +1169,11 @@ static void zcrx_notif_tw(struct io_tw_req tw_req, io_tw_token_t tw)
kfree_rcu(req, rcu_head);
}
+static void zcrx_stat_add(__u64 *p, s64 v)
+{
+ WRITE_ONCE(*p, READ_ONCE(*p) + v);
+}
+
static void zcrx_send_notif(struct io_zcrx_ifq *ifq, u32 type_mask)
{
gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO;
@@ -1513,8 +1554,13 @@ static int io_zcrx_copy_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
int ret;
ret = io_zcrx_copy_chunk(req, ifq, page, off + skb_frag_off(frag), len);
- if (ret > 0)
+ if (ret > 0) {
+ if (ifq->notif_stats) {
+ zcrx_stat_add(&ifq->notif_stats->copy_count, 1);
+ zcrx_stat_add(&ifq->notif_stats->copy_bytes, ret);
+ }
zcrx_send_notif(ifq, ZCRX_NOTIF_COPY);
+ }
return ret;
}
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 1bd63adaa711..0dcf486ff530 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -82,6 +82,7 @@ struct io_zcrx_ifq {
u32 allowed_notif_mask;
u32 fired_notifs;
u64 notif_data;
+ struct io_uring_zcrx_notif_stats *notif_stats;
};
#if defined(CONFIG_IO_URING_ZCRX)
--
2.52.0
next prev parent reply other threads:[~2026-04-22 11:29 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-22 11:25 [PATCH 0/5] io_uring/zcrx: add CQE based notifications and stats reporting Clément Léger
2026-04-22 11:25 ` [PATCH 1/5] io_uring/zcrx: notify user when out of buffers Clément Léger
2026-05-12 10:59 ` Pavel Begunkov
2026-05-15 10:26 ` Pavel Begunkov
2026-04-22 11:25 ` [PATCH 2/5] io_uring/zcrx: notify user on frag copy fallback Clément Léger
2026-05-12 11:02 ` Pavel Begunkov
2026-04-22 11:25 ` Clément Léger [this message]
2026-04-22 11:25 ` [PATCH 4/5] Documentation: networking: document zcrx notifications and statistics Clément Léger
2026-04-22 11:25 ` [PATCH 5/5] selftests: iou-zcrx: add notification and stats test for zcrx Clément Léger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260422112522.3316660-4-cleger@meta.com \
--to=cleger@meta.com \
--cc=asml.silence@gmail.com \
--cc=axboe@kernel.dk \
--cc=corbet@lwn.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=io-uring@vger.kernel.org \
--cc=kuba@kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=skhan@linuxfoundation.org \
--cc=vishs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.