Linux io-uring development
 help / color / mirror / Atom feed
* [PATCH] io_uring/kbuf: retain pbuf ring list for deferred rw commits
@ 2026-05-09 17:09 Shuvam Pandey
  0 siblings, 0 replies; only message in thread
From: Shuvam Pandey @ 2026-05-09 17:09 UTC (permalink / raw)
  To: axboe; +Cc: io-uring, linux-kernel

io_uring can defer committing a provided buffer ring selection until
request completion, since incremental buffer rings need the final
completion length.

For rw requests that complete asynchronously, deferred completion paths
can later call io_put_kbuf(..., NULL). If the selected io_buffer_list is
not retained across that point, io_kbuf_commit() is skipped and the
provided buffer ring head is not advanced.

Retain a reference to the selected io_buffer_list while commit is
deferred, and use it from the deferred put/recycle paths.

Signed-off-by: Shuvam Pandey <shuvampandey1@gmail.com>
---
 include/linux/io_uring_types.h | 10 +++++++++-
 io_uring/io_uring.c            |  1 +
 io_uring/kbuf.c                | 28 +++++++++++++++++++++++++++-
 io_uring/kbuf.h                | 20 +++++++++++++++++++-
 4 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 244392026c6d5e430bbac97b0328aba37a0c2770..8a477736595197ac833ae283b0446533e2353ccf 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -8,6 +8,7 @@
 #include <linux/llist.h>
 #include <uapi/linux/io_uring.h>
 
+struct io_buffer_list;
 struct iou_loop_params;
 struct io_uring_bpf_ops;
 
@@ -94,7 +95,8 @@ struct io_mapped_region {
  * struct io_kiocb. For legacy/classic provided buffers, keeping a reference
  * across execution contexts are fine. But for ring provided buffers, the
  * list may go away as soon as ->uring_lock is dropped. As the io_kiocb
- * persists, it's better to just keep the buffer local for those cases.
+ * persists, it's better to just keep the buffer local for those cases,
+ * unless the request has taken its own explicit lifetime reference.
  */
 struct io_br_sel {
 	struct io_buffer_list *buf_list;
@@ -738,6 +740,12 @@ struct io_kiocb {
 	union {
 		/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
 		struct io_buffer	*kbuf;
+		/*
+		 * Stores selected provided buffer ring list for deferred
+		 * commit, valid for REQ_F_BUFFER_RING requests with
+		 * REQ_F_BUFFERS_COMMIT set.
+		 */
+		struct io_buffer_list	*buf_list;
 
 		struct io_rsrc_node	*buf_node;
 	};
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 4ed998d60c09cb0706d23ce5e9a5c9da5e282607..c251d43d061c166cc040d4be379a41fe718f53cd 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1733,6 +1733,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	req->tctx = current->io_uring;
 	req->cancel_seq_set = false;
 	req->async_data = NULL;
+	req->buf_list = NULL;
 
 	if (unlikely(opcode >= IORING_OP_LAST)) {
 		req->opcode = 0;
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 63061aa1cab945c2b8be220760e69d0426ebf104..84d4f892a5f33d90406dc26db5f73c269a51510d 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -220,6 +220,14 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
 			req->flags |= REQ_F_BUF_MORE;
 		sel.buf_list = NULL;
 	}
+	if ((req->flags & REQ_F_BUFFERS_COMMIT) && sel.buf_list) {
+		if (WARN_ON_ONCE(req->buf_list && req->buf_list != bl))
+			return sel;
+		if (!req->buf_list) {
+			io_get_bl(bl);
+			req->buf_list = bl;
+		}
+	}
 	return sel;
 }
 
@@ -407,6 +415,7 @@ static inline bool __io_put_kbuf_ring(struct io_kiocb *req,
 unsigned int __io_put_kbufs(struct io_kiocb *req, struct io_buffer_list *bl,
 			    int len, int nbufs)
 {
+	struct io_buffer_list *stored_bl;
 	unsigned int ret;
 
 	ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
@@ -416,8 +425,17 @@ unsigned int __io_put_kbufs(struct io_kiocb *req, struct io_buffer_list *bl,
 		return ret;
 	}
 
+	stored_bl = req->buf_list;
+	if (!bl)
+		bl = stored_bl;
+	else if (stored_bl && WARN_ON_ONCE(stored_bl != bl))
+		bl = stored_bl;
 	if (!__io_put_kbuf_ring(req, bl, len, nbufs))
 		ret |= IORING_CQE_F_BUF_MORE;
+	if (stored_bl) {
+		req->buf_list = NULL;
+		io_put_bl(req->ctx, stored_bl);
+	}
 	return ret;
 }
 
@@ -442,7 +460,7 @@ static int io_remove_buffers_legacy(struct io_ring_ctx *ctx,
 	return i;
 }
 
-static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
+static void __io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
 {
 	if (bl->flags & IOBL_BUF_RING)
 		io_free_region(ctx->user, &bl->region);
@@ -452,6 +470,12 @@ static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
 	kfree(bl);
 }
 
+void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
+{
+	if (refcount_dec_and_test(&bl->refs))
+		__io_put_bl(ctx, bl);
+}
+
 void io_destroy_buffers(struct io_ring_ctx *ctx)
 {
 	struct io_buffer_list *bl;
@@ -579,6 +603,7 @@ static int __io_manage_buffers_legacy(struct io_kiocb *req,
 		bl = kzalloc_obj(*bl, GFP_KERNEL_ACCOUNT);
 		if (!bl)
 			return -ENOMEM;
+		refcount_set(&bl->refs, 1);
 
 		INIT_LIST_HEAD(&bl->buf_list);
 		ret = io_buffer_add_list(req->ctx, bl, p->bgid);
@@ -652,6 +677,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
 	bl = kzalloc_obj(*bl, GFP_KERNEL_ACCOUNT);
 	if (!bl)
 		return -ENOMEM;
+	refcount_set(&bl->refs, 1);
 
 	mmap_offset = (unsigned long)reg.bgid << IORING_OFF_PBUF_SHIFT;
 	ring_size = flex_array_size(br, bufs, reg.ring_entries);
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index 401773e1ef805eb46054a60ce226a3dcf41cf504..933eb2f16a820724c96104c59f26188971a85b13 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -3,6 +3,7 @@
 #define IOU_KBUF_H
 
 #include <uapi/linux/io_uring.h>
+#include <linux/refcount.h>
 #include <linux/io_uring_types.h>
 
 enum {
@@ -23,6 +24,7 @@ struct io_buffer_list {
 	};
 	/* count of classic/legacy buffers in buffer list */
 	int nbufs;
+	refcount_t refs;
 
 	__u16 bgid;
 
@@ -81,6 +83,13 @@ int io_manage_buffers_legacy(struct io_kiocb *req, unsigned int issue_flags);
 
 int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
 int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
+void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
+
+static inline void io_get_bl(struct io_buffer_list *bl)
+{
+	refcount_inc(&bl->refs);
+}
+
 int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg);
 
 bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
@@ -97,8 +106,17 @@ struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx,
 static inline bool io_kbuf_recycle_ring(struct io_kiocb *req,
 					struct io_buffer_list *bl)
 {
+	struct io_buffer_list *stored_bl = req->buf_list;
+
+	if (stored_bl) {
+		WARN_ON_ONCE(bl && bl != stored_bl);
+		req->buf_list = NULL;
+		req->flags &= ~(REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT);
+		io_put_bl(req->ctx, stored_bl);
+		return true;
+	}
 	if (bl) {
-		req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT);
+		req->flags &= ~(REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT);
 		return true;
 	}
 	return false;

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2026-05-09 17:09 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-09 17:09 [PATCH] io_uring/kbuf: retain pbuf ring list for deferred rw commits Shuvam Pandey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox