From: Pavel Begunkov <asml.silence@gmail.com>
To: io-uring@vger.kernel.org
Cc: asml.silence@gmail.com
Subject: [zcrx-next 2/2] io_uring/zcrx: allow synchronous buffer return
Date: Sun, 17 Aug 2025 23:44:58 +0100 [thread overview]
Message-ID: <3f915dbb730c2a8bdaccfb83f1208ed931a998be.1755468077.git.asml.silence@gmail.com> (raw)
In-Reply-To: <cover.1755468077.git.asml.silence@gmail.com>
Returning buffers via a ring is performant and convenient, but it
becomes a problem when/if the user misconfigured the ring size and it
becomes full. Add a synchronous way to return buffers back to the page
pool via a new register opcode. It's supposed to be a reliable slow
path for refilling.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
include/uapi/linux/io_uring.h | 10 ++++++
io_uring/register.c | 3 ++
io_uring/zcrx.c | 64 +++++++++++++++++++++++++++++++++++
io_uring/zcrx.h | 7 ++++
4 files changed, 84 insertions(+)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 6957dc539d83..97b206df4cc1 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -665,6 +665,9 @@ enum io_uring_register_op {
IORING_REGISTER_MEM_REGION = 34,
+ /* return zcrx buffers back into circulation */
+ IORING_REGISTER_ZCRX_REFILL = 35,
+
/* this goes last */
IORING_REGISTER_LAST,
@@ -1046,6 +1049,13 @@ struct io_uring_zcrx_ifq_reg {
__u64 __resv[3];
};
+struct io_uring_zcrx_refill {
+ __u32 zcrx_id;
+ __u32 nr_entries;
+ __u64 rqes;
+ __u64 __resv[2];
+};
+
#ifdef __cplusplus
}
#endif
diff --git a/io_uring/register.c b/io_uring/register.c
index a59589249fce..5155ea627f65 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -835,6 +835,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_register_mem_region(ctx, arg);
break;
+ case IORING_REGISTER_ZCRX_REFILL:
+ ret = io_zcrx_return_bufs(ctx, arg, nr_args);
+ break;
default:
ret = -EINVAL;
break;
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index d510ebc3d382..4540e5cd7430 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -922,6 +922,70 @@ static const struct memory_provider_ops io_uring_pp_zc_ops = {
.uninstall = io_pp_uninstall,
};
+#define IO_ZCRX_MAX_SYS_REFILL_BUFS (1 << 16)
+#define IO_ZCRX_SYS_REFILL_BATCH 32
+
+static void io_return_buffers(struct io_zcrx_ifq *ifq,
+ struct io_uring_zcrx_rqe *rqes, unsigned nr)
+{
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ struct net_iov *niov;
+ netmem_ref netmem;
+
+ if (!io_parse_rqe(&rqes[i], ifq, &niov))
+ continue;
+
+ scoped_guard(spinlock_bh, &ifq->rq_lock) {
+ if (!io_zcrx_put_niov_uref(niov))
+ continue;
+ }
+
+ netmem = net_iov_to_netmem(niov);
+ if (!page_pool_unref_and_test(netmem))
+ continue;
+ io_zcrx_return_niov(niov);
+ }
+}
+
+int io_zcrx_return_bufs(struct io_ring_ctx *ctx,
+ void __user *arg, unsigned nr_arg)
+{
+ struct io_uring_zcrx_rqe rqes[IO_ZCRX_SYS_REFILL_BATCH];
+ struct io_uring_zcrx_rqe __user *urqes;
+ struct io_uring_zcrx_refill zr;
+ struct io_zcrx_ifq *ifq;
+ unsigned nr, i;
+
+ if (nr_arg)
+ return -EINVAL;
+ if (copy_from_user(&zr, arg, sizeof(zr)))
+ return -EFAULT;
+ if (!zr.nr_entries || zr.nr_entries > IO_ZCRX_MAX_SYS_REFILL_BUFS)
+ return -EINVAL;
+ if (!mem_is_zero(&zr.__resv, sizeof(zr.__resv)))
+ return -EINVAL;
+
+ ifq = xa_load(&ctx->zcrx_ctxs, zr.zcrx_id);
+ if (!ifq)
+ return -EINVAL;
+ nr = zr.nr_entries;
+ urqes = u64_to_user_ptr(zr.rqes);
+
+ for (i = 0; i < nr;) {
+ unsigned batch = min(nr - i, IO_ZCRX_SYS_REFILL_BATCH);
+
+ if (copy_from_user(rqes, urqes + i, sizeof(rqes)))
+ return i ? i : -EFAULT;
+ io_return_buffers(ifq, rqes, batch);
+
+ i += batch;
+ cond_resched();
+ }
+ return nr;
+}
+
static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
struct io_zcrx_ifq *ifq, int off, int len)
{
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index a48871b5adad..33ef61503092 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -63,6 +63,8 @@ struct io_zcrx_ifq {
};
#if defined(CONFIG_IO_URING_ZCRX)
+int io_zcrx_return_bufs(struct io_ring_ctx *ctx,
+ void __user *arg, unsigned nr_arg);
int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
struct io_uring_zcrx_ifq_reg __user *arg);
void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx);
@@ -95,6 +97,11 @@ static inline struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ct
{
return NULL;
}
+static inline int io_zcrx_return_bufs(struct io_ring_ctx *ctx,
+ void __user *arg, unsigned nr_arg)
+{
+ return -EOPNOTSUPP;
+}
#endif
int io_recvzc(struct io_kiocb *req, unsigned int issue_flags);
--
2.49.0
next prev parent reply other threads:[~2025-08-17 22:43 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-17 22:44 [zcrx-next 0/2] add support for synchronous refill Pavel Begunkov
2025-08-17 22:44 ` [zcrx-next 1/2] io_uring/zcrx: introduce io_parse_rqe() Pavel Begunkov
2025-08-17 22:44 ` Pavel Begunkov [this message]
2025-08-20 18:20 ` [zcrx-next 0/2] add support for synchronous refill Jens Axboe
2025-08-20 18:57 ` Pavel Begunkov
2025-08-20 19:02 ` Jens Axboe
2025-08-20 19:33 ` Pavel Begunkov
2025-08-20 19:36 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3f915dbb730c2a8bdaccfb83f1208ed931a998be.1755468077.git.asml.silence@gmail.com \
--to=asml.silence@gmail.com \
--cc=io-uring@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).