* Re: FAILED: patch "[PATCH] io_uring/kbuf: flag partial buffer mappings" failed to apply to 6.12-stable tree
2025-06-29 12:42 FAILED: patch "[PATCH] io_uring/kbuf: flag partial buffer mappings" failed to apply to 6.12-stable tree gregkh
@ 2025-06-29 19:20 ` Jens Axboe
2025-07-02 10:01 ` Greg KH
0 siblings, 1 reply; 3+ messages in thread
From: Jens Axboe @ 2025-06-29 19:20 UTC (permalink / raw)
To: gregkh; +Cc: stable
[-- Attachment #1: Type: text/plain, Size: 870 bytes --]
On 6/29/25 6:42 AM, gregkh@linuxfoundation.org wrote:
>
> The patch below does not apply to the 6.12-stable tree.
> If someone wants it applied there, or to any other stable or longterm
> tree, then please email the backport, including the original git commit
> id to <stable@vger.kernel.org>.
>
> To reproduce the conflict and resubmit, you may use the following commands:
>
> git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
> git checkout FETCH_HEAD
> git cherry-pick -x 178b8ff66ff827c41b4fa105e9aabb99a0b5c537
> # <resolve conflicts, build, test, etc.>
> git commit -s
> git send-email --to '<stable@vger.kernel.org>' --in-reply-to '2025062921-froth-singing-509c@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
6.12-stable is missing a few backports here. Here's this one, with the
prior ones added first.
--
Jens Axboe
[-- Attachment #2: 0006-io_uring-kbuf-flag-partial-buffer-mappings.patch --]
[-- Type: text/x-patch, Size: 5003 bytes --]
From 29305dedb17704599efaa5c3ee6b1bee7dc80fd8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 26 Jun 2025 12:17:48 -0600
Subject: [PATCH 6/6] io_uring/kbuf: flag partial buffer mappings
A previous commit aborted mapping more for a non-incremental ring for
bundle peeking, but depending on where in the process this peeking
happened, it would not necessarily prevent a retry by the user. That can
create gaps in the received/read data.
Add struct buf_sel_arg->partial_map, which can pass this information
back. The networking side can then map that to internal state and use it
to gate retry as well.
Since this necessitates a new flag, change io_sr_msg->retry to a
retry_flags member, and store both the retry and partial map condition
in there.
Cc: stable@vger.kernel.org
Fixes: 26ec15e4b0c1 ("io_uring/kbuf: don't truncate end buffer for multiple buffer peeks")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
(cherry picked from commit 178b8ff66ff827c41b4fa105e9aabb99a0b5c537)
---
io_uring/kbuf.c | 1 +
io_uring/kbuf.h | 1 +
io_uring/net.c | 23 +++++++++++++++--------
3 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index c9289597522f..9bd27deeee6f 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -263,6 +263,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
if (len > arg->max_len) {
len = arg->max_len;
if (!(bl->flags & IOBL_INC)) {
+ arg->partial_map = 1;
if (iov != arg->iovs)
break;
buf->len = len;
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index 36aadfe5ac00..2586a292dfb9 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -61,6 +61,7 @@ struct buf_sel_arg {
size_t max_len;
unsigned short nr_iovs;
unsigned short mode;
+ unsigned short partial_map;
};
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
diff --git a/io_uring/net.c b/io_uring/net.c
index 5db69a45dac5..0116cfaec848 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -76,13 +76,18 @@ struct io_sr_msg {
/* initialised and used only by !msg send variants */
u16 addr_len;
u16 buf_group;
- bool retry;
+ unsigned short retry_flags;
void __user *addr;
void __user *msg_control;
/* used only for send zerocopy */
struct io_kiocb *notif;
};
+enum sr_retry_flags {
+ IO_SR_MSG_RETRY = 1,
+ IO_SR_MSG_PARTIAL_MAP = 2,
+};
+
/*
* Number of times we'll try and do receives if there's more data. If we
* exceed this limit, then add us to the back of the queue and retry from
@@ -204,7 +209,7 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req,
req->flags &= ~REQ_F_BL_EMPTY;
sr->done_io = 0;
- sr->retry = false;
+ sr->retry_flags = 0;
sr->len = 0; /* get from the provided buffer */
req->buf_index = sr->buf_group;
}
@@ -411,7 +416,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
sr->done_io = 0;
- sr->retry = false;
+ sr->retry_flags = 0;
if (req->opcode == IORING_OP_SEND) {
if (READ_ONCE(sqe->__pad3[0]))
@@ -783,7 +788,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
sr->done_io = 0;
- sr->retry = false;
+ sr->retry_flags = 0;
if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
@@ -856,7 +861,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret),
issue_flags);
- if (sr->retry)
+ if (sr->retry_flags & IO_SR_MSG_RETRY)
cflags = req->cqe.flags | (cflags & CQE_F_MASK);
/* bundle with no more immediate buffers, we're done */
if (req->flags & REQ_F_BL_EMPTY)
@@ -865,12 +870,12 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
* If more is available AND it was a full transfer, retry and
* append to this one
*/
- if (!sr->retry && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
+ if (!sr->retry_flags && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
!iov_iter_count(&kmsg->msg.msg_iter)) {
req->cqe.flags = cflags & ~CQE_F_MASK;
sr->len = kmsg->msg.msg_inq;
sr->done_io += this_ret;
- sr->retry = true;
+ sr->retry_flags |= IO_SR_MSG_RETRY;
return false;
}
} else {
@@ -1123,6 +1128,8 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
kmsg->free_iov = arg.iovs;
req->flags |= REQ_F_NEED_CLEANUP;
}
+ if (arg.partial_map)
+ sr->retry_flags |= IO_SR_MSG_PARTIAL_MAP;
/* special case 1 vec, can be a fast path */
if (ret == 1) {
@@ -1252,7 +1259,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_kiocb *notif;
zc->done_io = 0;
- zc->retry = false;
+ zc->retry_flags = 0;
req->flags |= REQ_F_POLL_NO_LAZY;
if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
--
2.50.0
[-- Attachment #3: 0005-io_uring-net-mark-iov-as-dynamically-allocated-even-.patch --]
[-- Type: text/x-patch, Size: 1811 bytes --]
From c4e101eab9014e6174d9042c3dc7ff80ce22b889 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 25 Jun 2025 10:17:06 -0600
Subject: [PATCH 5/6] io_uring/net: mark iov as dynamically allocated even for
single segments
Commit 9a709b7e98e6fa51600b5f2d24c5068efa6d39de upstream.
A bigger array of vecs could've been allocated, but
io_ring_buffers_peek() still decided to cap the mapped range depending
on how much data was available. Hence don't rely on the segment count
to know if the request should be marked as needing cleanup, always
check upfront if the iov array is different than the fast_iov array.
Fixes: 26ec15e4b0c1 ("io_uring/kbuf: don't truncate end buffer for multiple buffer peeks")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
io_uring/net.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/io_uring/net.c b/io_uring/net.c
index d18b751eb812..5db69a45dac5 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1118,6 +1118,12 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
if (unlikely(ret < 0))
return ret;
+ if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
+ kmsg->free_iov_nr = ret;
+ kmsg->free_iov = arg.iovs;
+ req->flags |= REQ_F_NEED_CLEANUP;
+ }
+
/* special case 1 vec, can be a fast path */
if (ret == 1) {
sr->buf = arg.iovs[0].iov_base;
@@ -1126,11 +1132,6 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
}
iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
arg.out_len);
- if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
- kmsg->free_iov_nr = ret;
- kmsg->free_iov = arg.iovs;
- req->flags |= REQ_F_NEED_CLEANUP;
- }
} else {
void __user *buf;
--
2.50.0
[-- Attachment #4: 0004-io_uring-net-always-use-current-transfer-count-for-b.patch --]
[-- Type: text/x-patch, Size: 1490 bytes --]
From b66423f9c952d70f4c8130da3b9fc2be68db52cc Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 20 Jun 2025 07:41:21 -0600
Subject: [PATCH 4/6] io_uring/net: always use current transfer count for
buffer put
A previous fix corrected the retry condition for when to continue a
current bundle, but it missed that the current (not the total) transfer
count also applies to the buffer put. If not, then for incrementally
consumed buffer rings repeated completions on the same request may end
up over consuming.
Reported-by: Roy Tang (ErgoniaTrading) <royonia@ergonia.io>
Cc: stable@vger.kernel.org
Fixes: 3a08988123c8 ("io_uring/net: only retry recv bundle for a full transfer")
Link: https://github.com/axboe/liburing/issues/1423
Signed-off-by: Jens Axboe <axboe@kernel.dk>
(cherry picked from commit 51a4598ad5d9eb6be4ec9ba65bbfdf0ac302eb2e)
---
io_uring/net.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/io_uring/net.c b/io_uring/net.c
index 60bba37d0e0c..d18b751eb812 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -854,7 +854,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
if (sr->flags & IORING_RECVSEND_BUNDLE) {
size_t this_ret = *ret - sr->done_io;
- cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, this_ret),
+ cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret),
issue_flags);
if (sr->retry)
cflags = req->cqe.flags | (cflags & CQE_F_MASK);
--
2.50.0
[-- Attachment #5: 0003-io_uring-net-only-consider-msg_inq-if-larger-than-1.patch --]
[-- Type: text/x-patch, Size: 1938 bytes --]
From 8861bd9b328862a49e1cc613bb70eba48d6ce0c8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 28 May 2025 13:45:44 -0600
Subject: [PATCH 3/6] io_uring/net: only consider msg_inq if larger than 1
Commit 2c7f023219966777be0687e15b57689894304cd3 upstream.
Currently retry and general validity of msg_inq is gated on it being
larger than zero, but it's entirely possible for this to be slightly
inaccurate. In particular, if FIN is received, it'll return 1.
Just use larger than 1 as the check. This covers both the FIN case, and
at the same time, it doesn't make much sense to retry a recv immediately
if there's even just a single 1 byte of valid data in the socket.
Leave the SOCK_NONEMPTY flagging when larger than 0 still, as an app may
use that for the final receive.
Cc: stable@vger.kernel.org
Reported-by: Christian Mazakas <christian.mazakas@gmail.com>
Fixes: 7c71a0af81ba ("io_uring/net: improve recv bundles")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
io_uring/net.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/io_uring/net.c b/io_uring/net.c
index 0df6876f70be..60bba37d0e0c 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -865,7 +865,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
* If more is available AND it was a full transfer, retry and
* append to this one
*/
- if (!sr->retry && kmsg->msg.msg_inq > 0 && this_ret > 0 &&
+ if (!sr->retry && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
!iov_iter_count(&kmsg->msg.msg_iter)) {
req->cqe.flags = cflags & ~CQE_F_MASK;
sr->len = kmsg->msg.msg_inq;
@@ -1111,7 +1111,7 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
arg.mode |= KBUF_MODE_FREE;
}
- if (kmsg->msg.msg_inq > 0)
+ if (kmsg->msg.msg_inq > 1)
arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq);
ret = io_buffers_peek(req, &arg);
--
2.50.0
[-- Attachment #6: 0002-io_uring-net-only-retry-recv-bundle-for-a-full-trans.patch --]
[-- Type: text/x-patch, Size: 2242 bytes --]
From 010c5e8c46373dfba92fbe264ac0dc407fcf38db Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 21 May 2025 18:51:49 -0600
Subject: [PATCH 2/6] io_uring/net: only retry recv bundle for a full transfer
Commit 3a08988123c868dbfdd054541b1090fb891fa49e upstream.
If a shorter than assumed transfer was seen, a partial buffer will have
been filled. For that case it isn't sane to attempt to fill more into
the bundle before posting a completion, as that will cause a gap in
the received data.
Check if the iterator has hit zero and only allow to continue a bundle
operation if that is the case.
Also ensure that for putting finished buffers, only the current transfer
is accounted. Otherwise too many buffers may be put for a short transfer.
Link: https://github.com/axboe/liburing/issues/1409
Cc: stable@vger.kernel.org
Fixes: 7c71a0af81ba ("io_uring/net: improve recv bundles")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
io_uring/net.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/io_uring/net.c b/io_uring/net.c
index 38f74dc5b9e7..0df6876f70be 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -852,18 +852,24 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
if (sr->flags & IORING_RECVSEND_BUNDLE) {
- cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret),
+ size_t this_ret = *ret - sr->done_io;
+
+ cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, this_ret),
issue_flags);
if (sr->retry)
cflags = req->cqe.flags | (cflags & CQE_F_MASK);
/* bundle with no more immediate buffers, we're done */
if (req->flags & REQ_F_BL_EMPTY)
goto finish;
- /* if more is available, retry and append to this one */
- if (!sr->retry && kmsg->msg.msg_inq > 0 && *ret > 0) {
+ /*
+ * If more is available AND it was a full transfer, retry and
+ * append to this one
+ */
+ if (!sr->retry && kmsg->msg.msg_inq > 0 && this_ret > 0 &&
+ !iov_iter_count(&kmsg->msg.msg_iter)) {
req->cqe.flags = cflags & ~CQE_F_MASK;
sr->len = kmsg->msg.msg_inq;
- sr->done_io += *ret;
+ sr->done_io += this_ret;
sr->retry = true;
return false;
}
--
2.50.0
[-- Attachment #7: 0001-io_uring-net-improve-recv-bundles.patch --]
[-- Type: text/x-patch, Size: 4348 bytes --]
From 3a5ac5f9a18ac9a80cdcee755a88b9ba8db90e3c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 8 Feb 2025 10:50:34 -0700
Subject: [PATCH 1/6] io_uring/net: improve recv bundles
Commit 7c71a0af81ba72de9b2c501065e4e718aba9a271 upstream.
Current recv bundles are only supported for multishot receives, and
additionally they also always post at least 2 CQEs if more data is
available than what a buffer will hold. This happens because the initial
bundle recv will do a single buffer, and then do the rest of what is in
the socket as a followup receive. As shown in a test program, if 1k
buffers are available and 32k is available to receive in the socket,
you'd get the following completions:
bundle=1, mshot=0
cqe res 1024
cqe res 1024
[...]
cqe res 1024
bundle=1, mshot=1
cqe res 1024
cqe res 31744
where bundle=1 && mshot=0 will post 32 1k completions, and bundle=1 &&
mshot=1 will post a 1k completion and then a 31k completion.
To support bundle recv without multishot, it's possible to simply retry
the recv immediately and post a single completion, rather than split it
into two completions. With the below patch, the same test looks as
follows:
bundle=1, mshot=0
cqe res 32768
bundle=1, mshot=1
cqe res 32768
where mshot=0 works fine for bundles, and both of them post just a
single 32k completion rather than split it into separate completions.
Posting fewer completions is always a nice win, and not needing
multishot for proper bundle efficiency is nice for cases that can't
necessarily use multishot.
Reported-by: Norman Maurer <norman_maurer@apple.com>
Link: https://lore.kernel.org/r/184f9f92-a682-4205-a15d-89e18f664502@kernel.dk
Fixes: 2f9c9515bdfd ("io_uring/net: support bundles for recv")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
io_uring/net.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/io_uring/net.c b/io_uring/net.c
index 384915d931b7..38f74dc5b9e7 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -76,6 +76,7 @@ struct io_sr_msg {
/* initialised and used only by !msg send variants */
u16 addr_len;
u16 buf_group;
+ bool retry;
void __user *addr;
void __user *msg_control;
/* used only for send zerocopy */
@@ -203,6 +204,7 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req,
req->flags &= ~REQ_F_BL_EMPTY;
sr->done_io = 0;
+ sr->retry = false;
sr->len = 0; /* get from the provided buffer */
req->buf_index = sr->buf_group;
}
@@ -409,6 +411,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
sr->done_io = 0;
+ sr->retry = false;
if (req->opcode == IORING_OP_SEND) {
if (READ_ONCE(sqe->__pad3[0]))
@@ -780,6 +783,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
sr->done_io = 0;
+ sr->retry = false;
if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
@@ -828,6 +832,9 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return io_recvmsg_prep_setup(req);
}
+/* bits to clear in old and inherit in new cflags on bundle retry */
+#define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE)
+
/*
* Finishes io_recv and io_recvmsg.
*
@@ -847,9 +854,19 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
if (sr->flags & IORING_RECVSEND_BUNDLE) {
cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret),
issue_flags);
+ if (sr->retry)
+ cflags = req->cqe.flags | (cflags & CQE_F_MASK);
/* bundle with no more immediate buffers, we're done */
if (req->flags & REQ_F_BL_EMPTY)
goto finish;
+ /* if more is available, retry and append to this one */
+ if (!sr->retry && kmsg->msg.msg_inq > 0 && *ret > 0) {
+ req->cqe.flags = cflags & ~CQE_F_MASK;
+ sr->len = kmsg->msg.msg_inq;
+ sr->done_io += *ret;
+ sr->retry = true;
+ return false;
+ }
} else {
cflags |= io_put_kbuf(req, *ret, issue_flags);
}
@@ -1228,6 +1245,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_kiocb *notif;
zc->done_io = 0;
+ zc->retry = false;
req->flags |= REQ_F_POLL_NO_LAZY;
if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
--
2.50.0
^ permalink raw reply related [flat|nested] 3+ messages in thread