io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V4] io_uring: uring_cmd: add multishot support
@ 2025-08-20 15:40 Ming Lei
  2025-08-20 19:17 ` Jens Axboe
  0 siblings, 1 reply; 3+ messages in thread
From: Ming Lei @ 2025-08-20 15:40 UTC (permalink / raw)
  To: Jens Axboe, io-uring, Pavel Begunkov; +Cc: Caleb Sander Mateos, Ming Lei

Add UAPI flag IORING_URING_CMD_MULTISHOT for supporting multishot
uring_cmd operations with provided buffer.

This enables drivers to post multiple completion events from a single
uring_cmd submission, which is useful for:

- Notifying userspace of device events (e.g., interrupt handling)
- Supporting devices with multiple event sources (e.g., multi-queue devices)
- Avoiding the need for device poll() support when events originate
  from multiple sources device-wide

The implementation adds two new APIs:
- io_uring_cmd_select_buffer(): selects a buffer from the provided
  buffer group for multishot uring_cmd
- io_uring_mshot_cmd_post_cqe(): posts a CQE after event data is
  pushed to the provided buffer

Multishot uring_cmd must be used with buffer select (IOSQE_BUFFER_SELECT)
and is mutually exclusive with IORING_URING_CMD_FIXED for now.

The ublk driver will be the first user of this functionality:

	https://github.com/ming1/linux/commits/ublk-devel/

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
V4:
- add io_do_buffer_select() check in io_uring_cmd_select_buffer(()
- comments that the two APIs should work together for committing buffer
  upfront(Jens)

V3:
- enhance buffer select check(Jens)

V2:
- Fixed static inline return type
- Updated UAPI comments: Clarified that IORING_URING_CMD_MULTISHOT must be used with buffer select
- Refactored validation checks: Moved the mutual exclusion checks into the individual flag validation
sections for better code organization
- Added missing req_set_fail(): Added the missing failure handling in io_uring_mshot_cmd_post_cqe
- Improved commit message: Rewrote the commit message to be clearer, more technical, and better explain
the use cases and API changes

 include/linux/io_uring/cmd.h  | 28 ++++++++++++
 include/uapi/linux/io_uring.h |  6 ++-
 io_uring/opdef.c              |  1 +
 io_uring/uring_cmd.c          | 81 ++++++++++++++++++++++++++++++++++-
 4 files changed, 114 insertions(+), 2 deletions(-)

diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index cfa6d0c0c322..72832757f8ef 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -70,6 +70,22 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
 /* Execute the request from a blocking context */
 void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd);
 
+/*
+ * Select a buffer from the provided buffer group for multishot uring_cmd.
+ * Returns the selected buffer address and size.
+ */
+int io_uring_cmd_select_buffer(struct io_uring_cmd *ioucmd,
+			       unsigned buf_group,
+			       void **buf, size_t *len,
+			       unsigned int issue_flags);
+
+/*
+ * Complete a multishot uring_cmd event. This will post a CQE to the completion
+ * queue and update the provided buffer.
+ */
+bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
+				 ssize_t ret, unsigned int issue_flags);
+
 #else
 static inline int
 io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
@@ -102,6 +118,18 @@ static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
 static inline void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd)
 {
 }
+static inline int io_uring_cmd_select_buffer(struct io_uring_cmd *ioucmd,
+				unsigned buf_group,
+				void **buf, size_t *len,
+				unsigned int issue_flags)
+{
+	return -EOPNOTSUPP;
+}
+static inline bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
+				ssize_t ret, unsigned int issue_flags)
+{
+	return true;
+}
 #endif
 
 /*
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 6957dc539d83..1e935f8901c5 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -298,9 +298,13 @@ enum io_uring_op {
  * sqe->uring_cmd_flags		top 8bits aren't available for userspace
  * IORING_URING_CMD_FIXED	use registered buffer; pass this flag
  *				along with setting sqe->buf_index.
+ * IORING_URING_CMD_MULTISHOT	must be used with buffer select, like other
+ *				multishot commands. Not compatible with
+ *				IORING_URING_CMD_FIXED, for now.
  */
 #define IORING_URING_CMD_FIXED	(1U << 0)
-#define IORING_URING_CMD_MASK	IORING_URING_CMD_FIXED
+#define IORING_URING_CMD_MULTISHOT	(1U << 1)
+#define IORING_URING_CMD_MASK	(IORING_URING_CMD_FIXED | IORING_URING_CMD_MULTISHOT)
 
 
 /*
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 9568785810d9..932319633eac 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -413,6 +413,7 @@ const struct io_issue_def io_issue_defs[] = {
 #endif
 	},
 	[IORING_OP_URING_CMD] = {
+		.buffer_select		= 1,
 		.needs_file		= 1,
 		.plug			= 1,
 		.iopoll			= 1,
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 053bac89b6c0..babb6a4b3542 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -11,6 +11,7 @@
 #include "io_uring.h"
 #include "alloc_cache.h"
 #include "rsrc.h"
+#include "kbuf.h"
 #include "uring_cmd.h"
 #include "poll.h"
 
@@ -194,8 +195,21 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	if (ioucmd->flags & ~IORING_URING_CMD_MASK)
 		return -EINVAL;
 
-	if (ioucmd->flags & IORING_URING_CMD_FIXED)
+	if (ioucmd->flags & IORING_URING_CMD_FIXED) {
+		if (ioucmd->flags & IORING_URING_CMD_MULTISHOT)
+			return -EINVAL;
 		req->buf_index = READ_ONCE(sqe->buf_index);
+	}
+
+	if (ioucmd->flags & IORING_URING_CMD_MULTISHOT) {
+		if (ioucmd->flags & IORING_URING_CMD_FIXED)
+			return -EINVAL;
+		if (!(req->flags & REQ_F_BUFFER_SELECT))
+			return -EINVAL;
+	} else {
+		if (req->flags & REQ_F_BUFFER_SELECT)
+			return -EINVAL;
+	}
 
 	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
 
@@ -251,6 +265,11 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
 	}
 
 	ret = file->f_op->uring_cmd(ioucmd, issue_flags);
+	if (ioucmd->flags & IORING_URING_CMD_MULTISHOT) {
+		if (ret >= 0)
+			return IOU_ISSUE_SKIP_COMPLETE;
+		io_kbuf_recycle(req, issue_flags);
+	}
 	if (ret == -EAGAIN) {
 		ioucmd->flags |= IORING_URING_CMD_REISSUE;
 		return ret;
@@ -333,3 +352,63 @@ bool io_uring_cmd_post_mshot_cqe32(struct io_uring_cmd *cmd,
 		return false;
 	return io_req_post_cqe32(req, cqe);
 }
+
+/*
+ * Work with io_uring_mshot_cmd_post_cqe() together for committing the
+ * provided buffer upfront
+ *
+ * The two must be paired, and both must be called within the same
+ * uring_cmd submission context.
+ */
+int io_uring_cmd_select_buffer(struct io_uring_cmd *ioucmd,
+			       unsigned buf_group,
+			       void __user **buf, size_t *len,
+			       unsigned int issue_flags)
+{
+	struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
+	void __user *ubuf;
+
+	if (!(ioucmd->flags & IORING_URING_CMD_MULTISHOT))
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(!io_do_buffer_select(req)))
+		return -EINVAL;
+
+	ubuf = io_buffer_select(req, len, buf_group, issue_flags);
+	if (!ubuf)
+		return -ENOBUFS;
+
+	*buf = ubuf;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(io_uring_cmd_select_buffer);
+
+/*
+ * Return true if this multishot uring_cmd needs to be completed, otherwise
+ * the event CQE is posted successfully.
+ *
+ * This function must be paired with io_uring_cmd_select_buffer(), and both
+ * must be called within the same uring_cmd submission context.
+ */
+bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
+				 ssize_t ret, unsigned int issue_flags)
+{
+	struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
+	unsigned int cflags = 0;
+
+	if (!(ioucmd->flags & IORING_URING_CMD_MULTISHOT))
+		return true;
+
+	if (ret > 0) {
+		cflags = io_put_kbuf(req, ret, issue_flags);
+		if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE))
+			return false;
+	}
+
+	io_kbuf_recycle(req, issue_flags);
+	if (ret < 0)
+		req_set_fail(req);
+	io_req_set_res(req, ret, cflags);
+	return true;
+}
+EXPORT_SYMBOL_GPL(io_uring_mshot_cmd_post_cqe);
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH V4] io_uring: uring_cmd: add multishot support
  2025-08-20 15:40 [PATCH V4] io_uring: uring_cmd: add multishot support Ming Lei
@ 2025-08-20 19:17 ` Jens Axboe
  2025-08-21  2:24   ` Jens Axboe
  0 siblings, 1 reply; 3+ messages in thread
From: Jens Axboe @ 2025-08-20 19:17 UTC (permalink / raw)
  To: Ming Lei, io-uring, Pavel Begunkov; +Cc: Caleb Sander Mateos

On 8/20/25 9:40 AM, Ming Lei wrote:
> Add UAPI flag IORING_URING_CMD_MULTISHOT for supporting multishot
> uring_cmd operations with provided buffer.
> 
> This enables drivers to post multiple completion events from a single
> uring_cmd submission, which is useful for:
> 
> - Notifying userspace of device events (e.g., interrupt handling)
> - Supporting devices with multiple event sources (e.g., multi-queue devices)
> - Avoiding the need for device poll() support when events originate
>   from multiple sources device-wide
> 
> The implementation adds two new APIs:
> - io_uring_cmd_select_buffer(): selects a buffer from the provided
>   buffer group for multishot uring_cmd
> - io_uring_mshot_cmd_post_cqe(): posts a CQE after event data is
>   pushed to the provided buffer
> 
> Multishot uring_cmd must be used with buffer select (IOSQE_BUFFER_SELECT)
> and is mutually exclusive with IORING_URING_CMD_FIXED for now.
> 
> The ublk driver will be the first user of this functionality:
> 
> 	https://github.com/ming1/linux/commits/ublk-devel/
> 
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
> V4:
> - add io_do_buffer_select() check in io_uring_cmd_select_buffer(()
> - comments that the two APIs should work together for committing buffer
>   upfront(Jens)
> 
> V3:
> - enhance buffer select check(Jens)
> 
> V2:
> - Fixed static inline return type
> - Updated UAPI comments: Clarified that IORING_URING_CMD_MULTISHOT must be used with buffer select
> - Refactored validation checks: Moved the mutual exclusion checks into the individual flag validation
> sections for better code organization
> - Added missing req_set_fail(): Added the missing failure handling in io_uring_mshot_cmd_post_cqe
> - Improved commit message: Rewrote the commit message to be clearer, more technical, and better explain
> the use cases and API changes
> 
>  include/linux/io_uring/cmd.h  | 28 ++++++++++++
>  include/uapi/linux/io_uring.h |  6 ++-
>  io_uring/opdef.c              |  1 +
>  io_uring/uring_cmd.c          | 81 ++++++++++++++++++++++++++++++++++-
>  4 files changed, 114 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
> index cfa6d0c0c322..72832757f8ef 100644
> --- a/include/linux/io_uring/cmd.h
> +++ b/include/linux/io_uring/cmd.h
> @@ -70,6 +70,22 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
>  /* Execute the request from a blocking context */
>  void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd);
>  
> +/*
> + * Select a buffer from the provided buffer group for multishot uring_cmd.
> + * Returns the selected buffer address and size.
> + */
> +int io_uring_cmd_select_buffer(struct io_uring_cmd *ioucmd,
> +			       unsigned buf_group,
> +			       void **buf, size_t *len,
> +			       unsigned int issue_flags);
> +
> +/*
> + * Complete a multishot uring_cmd event. This will post a CQE to the completion
> + * queue and update the provided buffer.
> + */
> +bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
> +				 ssize_t ret, unsigned int issue_flags);
> +
>  #else
>  static inline int
>  io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
> @@ -102,6 +118,18 @@ static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
>  static inline void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd)
>  {
>  }
> +static inline int io_uring_cmd_select_buffer(struct io_uring_cmd *ioucmd,
> +				unsigned buf_group,
> +				void **buf, size_t *len,
> +				unsigned int issue_flags)
> +{
> +	return -EOPNOTSUPP;
> +}
> +static inline bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
> +				ssize_t ret, unsigned int issue_flags)
> +{
> +	return true;
> +}
>  #endif
>  
>  /*
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 6957dc539d83..1e935f8901c5 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -298,9 +298,13 @@ enum io_uring_op {
>   * sqe->uring_cmd_flags		top 8bits aren't available for userspace
>   * IORING_URING_CMD_FIXED	use registered buffer; pass this flag
>   *				along with setting sqe->buf_index.
> + * IORING_URING_CMD_MULTISHOT	must be used with buffer select, like other
> + *				multishot commands. Not compatible with
> + *				IORING_URING_CMD_FIXED, for now.
>   */
>  #define IORING_URING_CMD_FIXED	(1U << 0)
> -#define IORING_URING_CMD_MASK	IORING_URING_CMD_FIXED
> +#define IORING_URING_CMD_MULTISHOT	(1U << 1)
> +#define IORING_URING_CMD_MASK	(IORING_URING_CMD_FIXED | IORING_URING_CMD_MULTISHOT)
>  
>  
>  /*
> diff --git a/io_uring/opdef.c b/io_uring/opdef.c
> index 9568785810d9..932319633eac 100644
> --- a/io_uring/opdef.c
> +++ b/io_uring/opdef.c
> @@ -413,6 +413,7 @@ const struct io_issue_def io_issue_defs[] = {
>  #endif
>  	},
>  	[IORING_OP_URING_CMD] = {
> +		.buffer_select		= 1,
>  		.needs_file		= 1,
>  		.plug			= 1,
>  		.iopoll			= 1,
> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
> index 053bac89b6c0..babb6a4b3542 100644
> --- a/io_uring/uring_cmd.c
> +++ b/io_uring/uring_cmd.c
> @@ -11,6 +11,7 @@
>  #include "io_uring.h"
>  #include "alloc_cache.h"
>  #include "rsrc.h"
> +#include "kbuf.h"
>  #include "uring_cmd.h"
>  #include "poll.h"
>  
> @@ -194,8 +195,21 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  	if (ioucmd->flags & ~IORING_URING_CMD_MASK)
>  		return -EINVAL;
>  
> -	if (ioucmd->flags & IORING_URING_CMD_FIXED)
> +	if (ioucmd->flags & IORING_URING_CMD_FIXED) {
> +		if (ioucmd->flags & IORING_URING_CMD_MULTISHOT)
> +			return -EINVAL;
>  		req->buf_index = READ_ONCE(sqe->buf_index);
> +	}
> +
> +	if (ioucmd->flags & IORING_URING_CMD_MULTISHOT) {
> +		if (ioucmd->flags & IORING_URING_CMD_FIXED)
> +			return -EINVAL;
> +		if (!(req->flags & REQ_F_BUFFER_SELECT))
> +			return -EINVAL;
> +	} else {
> +		if (req->flags & REQ_F_BUFFER_SELECT)
> +			return -EINVAL;
> +	}
>  
>  	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
>  
> @@ -251,6 +265,11 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
>  	}
>  
>  	ret = file->f_op->uring_cmd(ioucmd, issue_flags);
> +	if (ioucmd->flags & IORING_URING_CMD_MULTISHOT) {
> +		if (ret >= 0)
> +			return IOU_ISSUE_SKIP_COMPLETE;
> +		io_kbuf_recycle(req, issue_flags);
> +	}
>  	if (ret == -EAGAIN) {
>  		ioucmd->flags |= IORING_URING_CMD_REISSUE;
>  		return ret;
> @@ -333,3 +352,63 @@ bool io_uring_cmd_post_mshot_cqe32(struct io_uring_cmd *cmd,
>  		return false;
>  	return io_req_post_cqe32(req, cqe);
>  }
> +
> +/*
> + * Work with io_uring_mshot_cmd_post_cqe() together for committing the
> + * provided buffer upfront
> + *
> + * The two must be paired, and both must be called within the same
> + * uring_cmd submission context.
> + */
> +int io_uring_cmd_select_buffer(struct io_uring_cmd *ioucmd,
> +			       unsigned buf_group,
> +			       void __user **buf, size_t *len,
> +			       unsigned int issue_flags)
> +{
> +	struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
> +	void __user *ubuf;
> +
> +	if (!(ioucmd->flags & IORING_URING_CMD_MULTISHOT))
> +		return -EINVAL;
> +
> +	if (WARN_ON_ONCE(!io_do_buffer_select(req)))
> +		return -EINVAL;
> +
> +	ubuf = io_buffer_select(req, len, buf_group, issue_flags);
> +	if (!ubuf)
> +		return -ENOBUFS;
> +
> +	*buf = ubuf;
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(io_uring_cmd_select_buffer);
> +
> +/*
> + * Return true if this multishot uring_cmd needs to be completed, otherwise
> + * the event CQE is posted successfully.
> + *
> + * This function must be paired with io_uring_cmd_select_buffer(), and both
> + * must be called within the same uring_cmd submission context.
> + */
> +bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
> +				 ssize_t ret, unsigned int issue_flags)
> +{
> +	struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
> +	unsigned int cflags = 0;
> +
> +	if (!(ioucmd->flags & IORING_URING_CMD_MULTISHOT))
> +		return true;
> +
> +	if (ret > 0) {
> +		cflags = io_put_kbuf(req, ret, issue_flags);
> +		if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE))
> +			return false;
> +	}
> +
> +	io_kbuf_recycle(req, issue_flags);
> +	if (ret < 0)
> +		req_set_fail(req);
> +	io_req_set_res(req, ret, cflags);
> +	return true;
> +}
> +EXPORT_SYMBOL_GPL(io_uring_mshot_cmd_post_cqe);

I posted the series I talked about, which wraps this in struct
io_br_sel:

https://lore.kernel.org/io-uring/20250820182601.442933-1-axboe@kernel.dk/T/#m1001f151693606f7a73b48ab939be5caf8d182c3

and with that in mind, I don't think the CQE posting is the place to do
it. Even before that series, I don't think it's the right spot. I'd keep
that as just posting the CQE.

In any case, on top of the current patches, you'd do something ala:

struct io_br_sel sel;

sel = io_uring_cmd_select_buffer(cmd, group, &len, issue_flags);

and sel.addr would be your buffer on return, and sel would hold the
buffer_list (or have it committed already and NULL) after the fact.
sel.

Then once you're ready to do so, call:

cflags = io_put_kbuf(req, ret, sel.buf_list, issue_flags);
io_uring_mshot_cmd_post_cqe(cmd, ret, cflags);

in that same context. As mentioned, I'd keep that outside of the CQE
posting, leaving that as:

bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
				 ssize_t ret, unsigned int cflags)
{
	struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);

	if (!(ioucmd->flags & IORING_URING_CMD_MULTISHOT))
		return true;

	if (ret > 0) {
		if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE))
			return false;
	}

	if (ret < 0)
		req_set_fail(req);
	io_req_set_res(req, ret, cflags);
	return true;
}

With that, it should be completely clear that the usage of
io_buffer_list is always sane, and it forces you to do the right thing
wrt how the buffer is retrieved and committed. This avoids any issues
with the current patch in terms of buffer list UAF, and makes it so that
future users of this helper is also forced to get it right rather than
need to understand all the minute details around that.

Hope that makes sense...

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH V4] io_uring: uring_cmd: add multishot support
  2025-08-20 19:17 ` Jens Axboe
@ 2025-08-21  2:24   ` Jens Axboe
  0 siblings, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2025-08-21  2:24 UTC (permalink / raw)
  To: Ming Lei, io-uring, Pavel Begunkov; +Cc: Caleb Sander Mateos

On 8/20/25 1:17 PM, Jens Axboe wrote:
> On 8/20/25 9:40 AM, Ming Lei wrote:
>> Add UAPI flag IORING_URING_CMD_MULTISHOT for supporting multishot
>> uring_cmd operations with provided buffer.
>>
>> This enables drivers to post multiple completion events from a single
>> uring_cmd submission, which is useful for:
>>
>> - Notifying userspace of device events (e.g., interrupt handling)
>> - Supporting devices with multiple event sources (e.g., multi-queue devices)
>> - Avoiding the need for device poll() support when events originate
>>   from multiple sources device-wide
>>
>> The implementation adds two new APIs:
>> - io_uring_cmd_select_buffer(): selects a buffer from the provided
>>   buffer group for multishot uring_cmd
>> - io_uring_mshot_cmd_post_cqe(): posts a CQE after event data is
>>   pushed to the provided buffer
>>
>> Multishot uring_cmd must be used with buffer select (IOSQE_BUFFER_SELECT)
>> and is mutually exclusive with IORING_URING_CMD_FIXED for now.
>>
>> The ublk driver will be the first user of this functionality:
>>
>> 	https://github.com/ming1/linux/commits/ublk-devel/
>>
>> Signed-off-by: Ming Lei <ming.lei@redhat.com>
>> ---
>> V4:
>> - add io_do_buffer_select() check in io_uring_cmd_select_buffer(()
>> - comments that the two APIs should work together for committing buffer
>>   upfront(Jens)
>>
>> V3:
>> - enhance buffer select check(Jens)
>>
>> V2:
>> - Fixed static inline return type
>> - Updated UAPI comments: Clarified that IORING_URING_CMD_MULTISHOT must be used with buffer select
>> - Refactored validation checks: Moved the mutual exclusion checks into the individual flag validation
>> sections for better code organization
>> - Added missing req_set_fail(): Added the missing failure handling in io_uring_mshot_cmd_post_cqe
>> - Improved commit message: Rewrote the commit message to be clearer, more technical, and better explain
>> the use cases and API changes
>>
>>  include/linux/io_uring/cmd.h  | 28 ++++++++++++
>>  include/uapi/linux/io_uring.h |  6 ++-
>>  io_uring/opdef.c              |  1 +
>>  io_uring/uring_cmd.c          | 81 ++++++++++++++++++++++++++++++++++-
>>  4 files changed, 114 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
>> index cfa6d0c0c322..72832757f8ef 100644
>> --- a/include/linux/io_uring/cmd.h
>> +++ b/include/linux/io_uring/cmd.h
>> @@ -70,6 +70,22 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
>>  /* Execute the request from a blocking context */
>>  void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd);
>>  
>> +/*
>> + * Select a buffer from the provided buffer group for multishot uring_cmd.
>> + * Returns the selected buffer address and size.
>> + */
>> +int io_uring_cmd_select_buffer(struct io_uring_cmd *ioucmd,
>> +			       unsigned buf_group,
>> +			       void **buf, size_t *len,
>> +			       unsigned int issue_flags);
>> +
>> +/*
>> + * Complete a multishot uring_cmd event. This will post a CQE to the completion
>> + * queue and update the provided buffer.
>> + */
>> +bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
>> +				 ssize_t ret, unsigned int issue_flags);
>> +
>>  #else
>>  static inline int
>>  io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
>> @@ -102,6 +118,18 @@ static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
>>  static inline void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd)
>>  {
>>  }
>> +static inline int io_uring_cmd_select_buffer(struct io_uring_cmd *ioucmd,
>> +				unsigned buf_group,
>> +				void **buf, size_t *len,
>> +				unsigned int issue_flags)
>> +{
>> +	return -EOPNOTSUPP;
>> +}
>> +static inline bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
>> +				ssize_t ret, unsigned int issue_flags)
>> +{
>> +	return true;
>> +}
>>  #endif
>>  
>>  /*
>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>> index 6957dc539d83..1e935f8901c5 100644
>> --- a/include/uapi/linux/io_uring.h
>> +++ b/include/uapi/linux/io_uring.h
>> @@ -298,9 +298,13 @@ enum io_uring_op {
>>   * sqe->uring_cmd_flags		top 8bits aren't available for userspace
>>   * IORING_URING_CMD_FIXED	use registered buffer; pass this flag
>>   *				along with setting sqe->buf_index.
>> + * IORING_URING_CMD_MULTISHOT	must be used with buffer select, like other
>> + *				multishot commands. Not compatible with
>> + *				IORING_URING_CMD_FIXED, for now.
>>   */
>>  #define IORING_URING_CMD_FIXED	(1U << 0)
>> -#define IORING_URING_CMD_MASK	IORING_URING_CMD_FIXED
>> +#define IORING_URING_CMD_MULTISHOT	(1U << 1)
>> +#define IORING_URING_CMD_MASK	(IORING_URING_CMD_FIXED | IORING_URING_CMD_MULTISHOT)
>>  
>>  
>>  /*
>> diff --git a/io_uring/opdef.c b/io_uring/opdef.c
>> index 9568785810d9..932319633eac 100644
>> --- a/io_uring/opdef.c
>> +++ b/io_uring/opdef.c
>> @@ -413,6 +413,7 @@ const struct io_issue_def io_issue_defs[] = {
>>  #endif
>>  	},
>>  	[IORING_OP_URING_CMD] = {
>> +		.buffer_select		= 1,
>>  		.needs_file		= 1,
>>  		.plug			= 1,
>>  		.iopoll			= 1,
>> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
>> index 053bac89b6c0..babb6a4b3542 100644
>> --- a/io_uring/uring_cmd.c
>> +++ b/io_uring/uring_cmd.c
>> @@ -11,6 +11,7 @@
>>  #include "io_uring.h"
>>  #include "alloc_cache.h"
>>  #include "rsrc.h"
>> +#include "kbuf.h"
>>  #include "uring_cmd.h"
>>  #include "poll.h"
>>  
>> @@ -194,8 +195,21 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>>  	if (ioucmd->flags & ~IORING_URING_CMD_MASK)
>>  		return -EINVAL;
>>  
>> -	if (ioucmd->flags & IORING_URING_CMD_FIXED)
>> +	if (ioucmd->flags & IORING_URING_CMD_FIXED) {
>> +		if (ioucmd->flags & IORING_URING_CMD_MULTISHOT)
>> +			return -EINVAL;
>>  		req->buf_index = READ_ONCE(sqe->buf_index);
>> +	}
>> +
>> +	if (ioucmd->flags & IORING_URING_CMD_MULTISHOT) {
>> +		if (ioucmd->flags & IORING_URING_CMD_FIXED)
>> +			return -EINVAL;
>> +		if (!(req->flags & REQ_F_BUFFER_SELECT))
>> +			return -EINVAL;
>> +	} else {
>> +		if (req->flags & REQ_F_BUFFER_SELECT)
>> +			return -EINVAL;
>> +	}
>>  
>>  	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
>>  
>> @@ -251,6 +265,11 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
>>  	}
>>  
>>  	ret = file->f_op->uring_cmd(ioucmd, issue_flags);
>> +	if (ioucmd->flags & IORING_URING_CMD_MULTISHOT) {
>> +		if (ret >= 0)
>> +			return IOU_ISSUE_SKIP_COMPLETE;
>> +		io_kbuf_recycle(req, issue_flags);
>> +	}
>>  	if (ret == -EAGAIN) {
>>  		ioucmd->flags |= IORING_URING_CMD_REISSUE;
>>  		return ret;
>> @@ -333,3 +352,63 @@ bool io_uring_cmd_post_mshot_cqe32(struct io_uring_cmd *cmd,
>>  		return false;
>>  	return io_req_post_cqe32(req, cqe);
>>  }
>> +
>> +/*
>> + * Work with io_uring_mshot_cmd_post_cqe() together for committing the
>> + * provided buffer upfront
>> + *
>> + * The two must be paired, and both must be called within the same
>> + * uring_cmd submission context.
>> + */
>> +int io_uring_cmd_select_buffer(struct io_uring_cmd *ioucmd,
>> +			       unsigned buf_group,
>> +			       void __user **buf, size_t *len,
>> +			       unsigned int issue_flags)
>> +{
>> +	struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
>> +	void __user *ubuf;
>> +
>> +	if (!(ioucmd->flags & IORING_URING_CMD_MULTISHOT))
>> +		return -EINVAL;
>> +
>> +	if (WARN_ON_ONCE(!io_do_buffer_select(req)))
>> +		return -EINVAL;
>> +
>> +	ubuf = io_buffer_select(req, len, buf_group, issue_flags);
>> +	if (!ubuf)
>> +		return -ENOBUFS;
>> +
>> +	*buf = ubuf;
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(io_uring_cmd_select_buffer);
>> +
>> +/*
>> + * Return true if this multishot uring_cmd needs to be completed, otherwise
>> + * the event CQE is posted successfully.
>> + *
>> + * This function must be paired with io_uring_cmd_select_buffer(), and both
>> + * must be called within the same uring_cmd submission context.
>> + */
>> +bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
>> +				 ssize_t ret, unsigned int issue_flags)
>> +{
>> +	struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
>> +	unsigned int cflags = 0;
>> +
>> +	if (!(ioucmd->flags & IORING_URING_CMD_MULTISHOT))
>> +		return true;
>> +
>> +	if (ret > 0) {
>> +		cflags = io_put_kbuf(req, ret, issue_flags);
>> +		if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE))
>> +			return false;
>> +	}
>> +
>> +	io_kbuf_recycle(req, issue_flags);
>> +	if (ret < 0)
>> +		req_set_fail(req);
>> +	io_req_set_res(req, ret, cflags);
>> +	return true;
>> +}
>> +EXPORT_SYMBOL_GPL(io_uring_mshot_cmd_post_cqe);
> 
> I posted the series I talked about, which wraps this in struct
> io_br_sel:
> 
> https://lore.kernel.org/io-uring/20250820182601.442933-1-axboe@kernel.dk/T/#m1001f151693606f7a73b48ab939be5caf8d182c3
> 
> and with that in mind, I don't think the CQE posting is the place to do
> it. Even before that series, I don't think it's the right spot. I'd keep
> that as just posting the CQE.
> 
> In any case, on top of the current patches, you'd do something ala:
> 
> struct io_br_sel sel;
> 
> sel = io_uring_cmd_select_buffer(cmd, group, &len, issue_flags);
> 
> and sel.addr would be your buffer on return, and sel would hold the
> buffer_list (or have it committed already and NULL) after the fact.
> sel.
> 
> Then once you're ready to do so, call:
> 
> cflags = io_put_kbuf(req, ret, sel.buf_list, issue_flags);
> io_uring_mshot_cmd_post_cqe(cmd, ret, cflags);
> 
> in that same context. As mentioned, I'd keep that outside of the CQE
> posting, leaving that as:
> 
> bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
> 				 ssize_t ret, unsigned int cflags)
> {
> 	struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
> 
> 	if (!(ioucmd->flags & IORING_URING_CMD_MULTISHOT))
> 		return true;
> 
> 	if (ret > 0) {
> 		if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE))
> 			return false;
> 	}
> 
> 	if (ret < 0)
> 		req_set_fail(req);
> 	io_req_set_res(req, ret, cflags);
> 	return true;
> }
> 
> With that, it should be completely clear that the usage of
> io_buffer_list is always sane, and it forces you to do the right thing
> wrt how the buffer is retrieved and committed. This avoids any issues
> with the current patch in terms of buffer list UAF, and makes it so that
> future users of this helper is also forced to get it right rather than
> need to understand all the minute details around that.
> 
> Hope that makes sense...

Something like this incremental on top of the current tree, where
obviously the kbuf.h io_br_sel move to io_uring_types.h is a separate
patch.

That:

- Gets rid of the recycle in the issue path. You can't recycle there,
  have the caller that selected the buffer do the recycling there, if
  needed.

- Naming io_uring_cmd_buffer_select() rather than
  io_uring_cmd_select_buffer() to closer follow the internal API.

- io_uring_cmd_buffer_select() will now return with a NULL ->addr,
  caller should just -ENOBUFS at that point. More of a stylistic
  choice...


diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index 72832757f8ef..856d343b8e2a 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -74,17 +74,16 @@ void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd);
  * Select a buffer from the provided buffer group for multishot uring_cmd.
  * Returns the selected buffer address and size.
  */
-int io_uring_cmd_select_buffer(struct io_uring_cmd *ioucmd,
-			       unsigned buf_group,
-			       void **buf, size_t *len,
-			       unsigned int issue_flags);
+struct io_br_sel io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd,
+					    unsigned buf_group, size_t *len,
+					    unsigned int issue_flags);
 
 /*
  * Complete a multishot uring_cmd event. This will post a CQE to the completion
  * queue and update the provided buffer.
  */
 bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
-				 ssize_t ret, unsigned int issue_flags);
+				 struct io_br_sel *sel, unsigned int issue_flags);
 
 #else
 static inline int
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 1d33984611bc..472a1e1383a4 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -85,6 +85,24 @@ struct io_mapped_region {
 	unsigned		flags;
 };
 
+/*
+ * Return value from io_buffer_list selection, to avoid stashing it in
+ * struct io_kiocb. For legacy/classic provided buffers, keeping a reference
+ * across execution contexts are fine. But for ring provided buffers, the
+ * list may go away as soon as ->uring_lock is dropped. As the io_kiocb
+ * persists, it's better to just keep the buffer local for those cases.
+ */
+struct io_br_sel {
+	struct io_buffer_list *buf_list;
+	/*
+	 * Some selection parts return the user address, others return an error.
+	 */
+	union {
+		void __user *addr;
+		ssize_t val;
+	};
+};
+
 /*
  * Arbitrary limit, can be raised if need be
  */
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index 32f73adbe1e9..ada382ff38d7 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -62,24 +62,6 @@ struct buf_sel_arg {
 	unsigned short partial_map;
 };
 
-/*
- * Return value from io_buffer_list selection, to avoid stashing it in
- * struct io_kiocb. For legacy/classic provided buffers, keeping a reference
- * across execution contexts are fine. But for ring provided buffers, the
- * list may go away as soon as ->uring_lock is dropped. As the io_kiocb
- * persists, it's better to just keep the buffer local for those cases.
- */
-struct io_br_sel {
-	struct io_buffer_list *buf_list;
-	/*
-	 * Some selection parts return the user address, others return an error.
-	 */
-	union {
-		void __user *addr;
-		ssize_t val;
-	};
-};
-
 struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len,
 				  unsigned buf_group, unsigned int issue_flags);
 int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index babb6a4b3542..688fd1fad2a8 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -268,7 +268,6 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
 	if (ioucmd->flags & IORING_URING_CMD_MULTISHOT) {
 		if (ret >= 0)
 			return IOU_ISSUE_SKIP_COMPLETE;
-		io_kbuf_recycle(req, issue_flags);
 	}
 	if (ret == -EAGAIN) {
 		ioucmd->flags |= IORING_URING_CMD_REISSUE;
@@ -360,28 +359,21 @@ bool io_uring_cmd_post_mshot_cqe32(struct io_uring_cmd *cmd,
  * The two must be paired, and both must be called within the same
  * uring_cmd submission context.
  */
-int io_uring_cmd_select_buffer(struct io_uring_cmd *ioucmd,
-			       unsigned buf_group,
-			       void __user **buf, size_t *len,
-			       unsigned int issue_flags)
+struct io_br_sel io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd,
+					    unsigned buf_group, size_t *len,
+					    unsigned int issue_flags)
 {
 	struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
-	void __user *ubuf;
 
 	if (!(ioucmd->flags & IORING_URING_CMD_MULTISHOT))
-		return -EINVAL;
+		return (struct io_br_sel) { .val = -EINVAL };
 
 	if (WARN_ON_ONCE(!io_do_buffer_select(req)))
-		return -EINVAL;
-
-	ubuf = io_buffer_select(req, len, buf_group, issue_flags);
-	if (!ubuf)
-		return -ENOBUFS;
+		return (struct io_br_sel) { .val = -EINVAL };
 
-	*buf = ubuf;
-	return 0;
+	return io_buffer_select(req, len, buf_group, issue_flags);
 }
-EXPORT_SYMBOL_GPL(io_uring_cmd_select_buffer);
+EXPORT_SYMBOL_GPL(io_uring_cmd_buffer_select);
 
 /*
  * Return true if this multishot uring_cmd needs to be completed, otherwise
@@ -391,7 +383,7 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_select_buffer);
  * must be called within the same uring_cmd submission context.
  */
 bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
-				 ssize_t ret, unsigned int issue_flags)
+				 struct io_br_sel *sel, unsigned int issue_flags)
 {
 	struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
 	unsigned int cflags = 0;
@@ -399,16 +391,16 @@ bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
 	if (!(ioucmd->flags & IORING_URING_CMD_MULTISHOT))
 		return true;
 
-	if (ret > 0) {
-		cflags = io_put_kbuf(req, ret, issue_flags);
-		if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE))
+	if (sel->val > 0) {
+		cflags = io_put_kbuf(req, sel->val, sel->buf_list);
+		if (io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE))
 			return false;
 	}
 
-	io_kbuf_recycle(req, issue_flags);
-	if (ret < 0)
+	io_kbuf_recycle(req, sel->buf_list, issue_flags);
+	if (sel->val < 0)
 		req_set_fail(req);
-	io_req_set_res(req, ret, cflags);
+	io_req_set_res(req, sel->val, cflags);
 	return true;
 }
 EXPORT_SYMBOL_GPL(io_uring_mshot_cmd_post_cqe);

-- 
Jens Axboe

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-08-21  2:24 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-20 15:40 [PATCH V4] io_uring: uring_cmd: add multishot support Ming Lei
2025-08-20 19:17 ` Jens Axboe
2025-08-21  2:24   ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).