From: Shuvam Pandey <shuvampandey1@gmail.com>
To: axboe@kernel.dk
Cc: io-uring@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH] io_uring/kbuf: retain pbuf ring list for deferred rw commits
Date: Sat, 9 May 2026 22:54:47 +0545 [thread overview]
Message-ID: <20260509170947.67188-1-shuvampandey1@gmail.com> (raw)
io_uring can defer committing a provided buffer ring selection until
request completion, since incremental buffer rings need the final
completion length.
For rw requests that complete asynchronously, deferred completion paths
can later call io_put_kbuf(..., NULL). If the selected io_buffer_list is
not retained across that point, io_kbuf_commit() is skipped and the
provided buffer ring head is not advanced.
Retain a reference to the selected io_buffer_list while commit is
deferred, and use it from the deferred put/recycle paths.
Signed-off-by: Shuvam Pandey <shuvampandey1@gmail.com>
---
include/linux/io_uring_types.h | 10 +++++++++-
io_uring/io_uring.c | 1 +
io_uring/kbuf.c | 28 +++++++++++++++++++++++++++-
io_uring/kbuf.h | 20 +++++++++++++++++++-
4 files changed, 56 insertions(+), 3 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 244392026c6d5e430bbac97b0328aba37a0c2770..8a477736595197ac833ae283b0446533e2353ccf 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -8,6 +8,7 @@
#include <linux/llist.h>
#include <uapi/linux/io_uring.h>
+struct io_buffer_list;
struct iou_loop_params;
struct io_uring_bpf_ops;
@@ -94,7 +95,8 @@ struct io_mapped_region {
* struct io_kiocb. For legacy/classic provided buffers, keeping a reference
* across execution contexts are fine. But for ring provided buffers, the
* list may go away as soon as ->uring_lock is dropped. As the io_kiocb
- * persists, it's better to just keep the buffer local for those cases.
+ * persists, it's better to just keep the buffer local for those cases,
+ * unless the request has taken its own explicit lifetime reference.
*/
struct io_br_sel {
struct io_buffer_list *buf_list;
@@ -738,6 +740,12 @@ struct io_kiocb {
union {
/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
struct io_buffer *kbuf;
+ /*
+ * Stores selected provided buffer ring list for deferred
+ * commit, valid for REQ_F_BUFFER_RING requests with
+ * REQ_F_BUFFERS_COMMIT set.
+ */
+ struct io_buffer_list *buf_list;
struct io_rsrc_node *buf_node;
};
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 4ed998d60c09cb0706d23ce5e9a5c9da5e282607..c251d43d061c166cc040d4be379a41fe718f53cd 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1733,6 +1733,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
req->tctx = current->io_uring;
req->cancel_seq_set = false;
req->async_data = NULL;
+ req->buf_list = NULL;
if (unlikely(opcode >= IORING_OP_LAST)) {
req->opcode = 0;
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 63061aa1cab945c2b8be220760e69d0426ebf104..84d4f892a5f33d90406dc26db5f73c269a51510d 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -220,6 +220,14 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
req->flags |= REQ_F_BUF_MORE;
sel.buf_list = NULL;
}
+ if ((req->flags & REQ_F_BUFFERS_COMMIT) && sel.buf_list) {
+ if (WARN_ON_ONCE(req->buf_list && req->buf_list != bl))
+ return sel;
+ if (!req->buf_list) {
+ io_get_bl(bl);
+ req->buf_list = bl;
+ }
+ }
return sel;
}
@@ -407,6 +415,7 @@ static inline bool __io_put_kbuf_ring(struct io_kiocb *req,
unsigned int __io_put_kbufs(struct io_kiocb *req, struct io_buffer_list *bl,
int len, int nbufs)
{
+ struct io_buffer_list *stored_bl;
unsigned int ret;
ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
@@ -416,8 +425,17 @@ unsigned int __io_put_kbufs(struct io_kiocb *req, struct io_buffer_list *bl,
return ret;
}
+ stored_bl = req->buf_list;
+ if (!bl)
+ bl = stored_bl;
+ else if (stored_bl && WARN_ON_ONCE(stored_bl != bl))
+ bl = stored_bl;
if (!__io_put_kbuf_ring(req, bl, len, nbufs))
ret |= IORING_CQE_F_BUF_MORE;
+ if (stored_bl) {
+ req->buf_list = NULL;
+ io_put_bl(req->ctx, stored_bl);
+ }
return ret;
}
@@ -442,7 +460,7 @@ static int io_remove_buffers_legacy(struct io_ring_ctx *ctx,
return i;
}
-static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
+static void __io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
{
if (bl->flags & IOBL_BUF_RING)
io_free_region(ctx->user, &bl->region);
@@ -452,6 +470,12 @@ static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
kfree(bl);
}
+void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
+{
+ if (refcount_dec_and_test(&bl->refs))
+ __io_put_bl(ctx, bl);
+}
+
void io_destroy_buffers(struct io_ring_ctx *ctx)
{
struct io_buffer_list *bl;
@@ -579,6 +603,7 @@ static int __io_manage_buffers_legacy(struct io_kiocb *req,
bl = kzalloc_obj(*bl, GFP_KERNEL_ACCOUNT);
if (!bl)
return -ENOMEM;
+ refcount_set(&bl->refs, 1);
INIT_LIST_HEAD(&bl->buf_list);
ret = io_buffer_add_list(req->ctx, bl, p->bgid);
@@ -652,6 +677,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
bl = kzalloc_obj(*bl, GFP_KERNEL_ACCOUNT);
if (!bl)
return -ENOMEM;
+ refcount_set(&bl->refs, 1);
mmap_offset = (unsigned long)reg.bgid << IORING_OFF_PBUF_SHIFT;
ring_size = flex_array_size(br, bufs, reg.ring_entries);
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index 401773e1ef805eb46054a60ce226a3dcf41cf504..933eb2f16a820724c96104c59f26188971a85b13 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -3,6 +3,7 @@
#define IOU_KBUF_H
#include <uapi/linux/io_uring.h>
+#include <linux/refcount.h>
#include <linux/io_uring_types.h>
enum {
@@ -23,6 +24,7 @@ struct io_buffer_list {
};
/* count of classic/legacy buffers in buffer list */
int nbufs;
+ refcount_t refs;
__u16 bgid;
@@ -81,6 +83,13 @@ int io_manage_buffers_legacy(struct io_kiocb *req, unsigned int issue_flags);
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
+void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
+
+static inline void io_get_bl(struct io_buffer_list *bl)
+{
+ refcount_inc(&bl->refs);
+}
+
int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg);
bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
@@ -97,8 +106,17 @@ struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx,
static inline bool io_kbuf_recycle_ring(struct io_kiocb *req,
struct io_buffer_list *bl)
{
+ struct io_buffer_list *stored_bl = req->buf_list;
+
+ if (stored_bl) {
+ WARN_ON_ONCE(bl && bl != stored_bl);
+ req->buf_list = NULL;
+ req->flags &= ~(REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT);
+ io_put_bl(req->ctx, stored_bl);
+ return true;
+ }
if (bl) {
- req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT);
+ req->flags &= ~(REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT);
return true;
}
return false;
reply other threads:[~2026-05-09 17:09 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260509170947.67188-1-shuvampandey1@gmail.com \
--to=shuvampandey1@gmail.com \
--cc=axboe@kernel.dk \
--cc=io-uring@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox