* [PATCH V5 1/6] ublk: convert to refcount_t
2025-05-20 4:54 [PATCH V5 0/6] ublk: support to register bvec buffer automatically Ming Lei
@ 2025-05-20 4:54 ` Ming Lei
2025-05-20 4:54 ` [PATCH V5 2/6] ublk: prepare for supporting to register request buffer automatically Ming Lei
` (5 subsequent siblings)
6 siblings, 0 replies; 13+ messages in thread
From: Ming Lei @ 2025-05-20 4:54 UTC (permalink / raw)
To: Jens Axboe, linux-block; +Cc: Uday Shankar, Caleb Sander Mateos, Ming Lei
Convert to refcount_t and prepare for supporting to register bvec buffer
automatically, which needs to initialize reference counter as 2, and
kref doesn't provide this interface, so convert to refcount_t.
Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
Suggested-by: Caleb Sander Mateos <csander@purestorage.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/block/ublk_drv.c | 19 +++++--------------
1 file changed, 5 insertions(+), 14 deletions(-)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index cb612151e9a1..ae2f47dc8224 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -79,7 +79,7 @@
UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT)
struct ublk_rq_data {
- struct kref ref;
+ refcount_t ref;
};
struct ublk_uring_cmd_pdu {
@@ -488,7 +488,6 @@ static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq,
#endif
static inline void __ublk_complete_rq(struct request *req);
-static void ublk_complete_rq(struct kref *ref);
static dev_t ublk_chr_devt;
static const struct class ublk_chr_class = {
@@ -648,7 +647,7 @@ static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
if (ublk_need_req_ref(ubq)) {
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
- kref_init(&data->ref);
+ refcount_set(&data->ref, 1);
}
}
@@ -658,7 +657,7 @@ static inline bool ublk_get_req_ref(const struct ublk_queue *ubq,
if (ublk_need_req_ref(ubq)) {
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
- return kref_get_unless_zero(&data->ref);
+ return refcount_inc_not_zero(&data->ref);
}
return true;
@@ -670,7 +669,8 @@ static inline void ublk_put_req_ref(const struct ublk_queue *ubq,
if (ublk_need_req_ref(ubq)) {
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
- kref_put(&data->ref, ublk_complete_rq);
+ if (refcount_dec_and_test(&data->ref))
+ __ublk_complete_rq(req);
} else {
__ublk_complete_rq(req);
}
@@ -1122,15 +1122,6 @@ static inline void __ublk_complete_rq(struct request *req)
blk_mq_end_request(req, res);
}
-static void ublk_complete_rq(struct kref *ref)
-{
- struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data,
- ref);
- struct request *req = blk_mq_rq_from_pdu(data);
-
- __ublk_complete_rq(req);
-}
-
static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req,
int res, unsigned issue_flags)
{
--
2.47.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH V5 2/6] ublk: prepare for supporting to register request buffer automatically
2025-05-20 4:54 [PATCH V5 0/6] ublk: support to register bvec buffer automatically Ming Lei
2025-05-20 4:54 ` [PATCH V5 1/6] ublk: convert to refcount_t Ming Lei
@ 2025-05-20 4:54 ` Ming Lei
2025-05-20 15:59 ` Jens Axboe
2025-05-20 17:40 ` Caleb Sander Mateos
2025-05-20 4:54 ` [PATCH V5 3/6] ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG Ming Lei
` (4 subsequent siblings)
6 siblings, 2 replies; 13+ messages in thread
From: Ming Lei @ 2025-05-20 4:54 UTC (permalink / raw)
To: Jens Axboe, linux-block; +Cc: Uday Shankar, Caleb Sander Mateos, Ming Lei
UBLK_F_SUPPORT_ZERO_COPY requires ublk server to issue explicit buffer
register/unregister uring_cmd for each IO, this way is not only inefficient,
but also introduce dependency between buffer consumer and buffer register/
unregister uring_cmd, please see tools/testing/selftests/ublk/stripe.c
in which backing file IO has to be issued one by one by IOSQE_IO_LINK.
Prepare for adding feature UBLK_F_AUTO_BUF_REG for addressing the existing
zero copy limitation:
- register request buffer automatically to ublk uring_cmd's io_uring
context before delivering io command to ublk server
- unregister request buffer automatically from the ublk uring_cmd's
io_uring context when completing the request
- io_uring will unregister the buffer automatically when uring is
exiting, so we needn't worry about accident exit
For using this feature, ublk server has to create one sparse buffer table
Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/block/ublk_drv.c | 70 ++++++++++++++++++++++++++++++++++++----
1 file changed, 64 insertions(+), 6 deletions(-)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index ae2f47dc8224..3e56a9d267fb 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -133,6 +133,14 @@ struct ublk_uring_cmd_pdu {
*/
#define UBLK_IO_FLAG_NEED_GET_DATA 0x08
+/*
+ * request buffer is registered automatically, so we have to unregister it
+ * before completing this request.
+ *
+ * io_uring will unregister buffer automatically for us during exiting.
+ */
+#define UBLK_IO_FLAG_AUTO_BUF_REG 0x10
+
/* atomic RW with ubq->cancel_lock */
#define UBLK_IO_FLAG_CANCELED 0x80000000
@@ -205,6 +213,7 @@ struct ublk_params_header {
__u32 types;
};
+static void ublk_io_release(void *priv);
static void ublk_stop_dev_unlocked(struct ublk_device *ub);
static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq);
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
@@ -619,6 +628,11 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq)
return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY;
}
+static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq)
+{
+ return false;
+}
+
static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
{
return ubq->flags & UBLK_F_USER_COPY;
@@ -626,7 +640,8 @@ static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
static inline bool ublk_need_map_io(const struct ublk_queue *ubq)
{
- return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq);
+ return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq) &&
+ !ublk_support_auto_buf_reg(ubq);
}
static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
@@ -637,8 +652,13 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
*
* for zero copy, request buffer need to be registered to io_uring
* buffer table, so reference is needed
+ *
+ * For auto buffer register, ublk server still may issue
+ * UBLK_IO_COMMIT_AND_FETCH_REQ before one registered buffer is used up,
+ * so reference is required too.
*/
- return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq);
+ return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq) ||
+ ublk_support_auto_buf_reg(ubq);
}
static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
@@ -1155,6 +1175,35 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
blk_mq_end_request(rq, BLK_STS_IOERR);
}
+static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io,
+ unsigned int issue_flags)
+{
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+ int ret;
+
+ ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release, 0,
+ issue_flags);
+ if (ret) {
+ blk_mq_end_request(req, BLK_STS_IOERR);
+ return false;
+ }
+ /* one extra reference is dropped by ublk_io_release */
+ refcount_set(&data->ref, 2);
+ io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG;
+ return true;
+}
+
+static bool ublk_prep_auto_buf_reg(struct ublk_queue *ubq,
+ struct request *req, struct ublk_io *io,
+ unsigned int issue_flags)
+{
+ if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req))
+ return ublk_auto_buf_reg(req, io, issue_flags);
+
+ ublk_init_req_ref(ubq, req);
+ return true;
+}
+
static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req,
struct ublk_io *io)
{
@@ -1180,7 +1229,6 @@ static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req,
mapped_bytes >> 9;
}
- ublk_init_req_ref(ubq, req);
return true;
}
@@ -1226,7 +1274,8 @@ static void ublk_dispatch_req(struct ublk_queue *ubq,
if (!ublk_start_io(ubq, req, io))
return;
- ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags);
+ if (ublk_prep_auto_buf_reg(ubq, req, io, issue_flags))
+ ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags);
}
static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd,
@@ -1994,7 +2043,8 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
struct ublk_io *io, struct io_uring_cmd *cmd,
- const struct ublksrv_io_cmd *ub_cmd)
+ const struct ublksrv_io_cmd *ub_cmd,
+ unsigned int issue_flags)
{
struct request *req = io->req;
@@ -2014,6 +2064,14 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
return -EINVAL;
}
+ if (ublk_support_auto_buf_reg(ubq)) {
+ if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) {
+ WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, 0,
+ issue_flags));
+ io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG;
+ }
+ }
+
ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
/* now this cmd slot is owned by ublk driver */
@@ -2110,7 +2168,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
goto out;
break;
case UBLK_IO_COMMIT_AND_FETCH_REQ:
- ret = ublk_commit_and_fetch(ubq, io, cmd, ub_cmd);
+ ret = ublk_commit_and_fetch(ubq, io, cmd, ub_cmd, issue_flags);
if (ret)
goto out;
break;
--
2.47.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH V5 2/6] ublk: prepare for supporting to register request buffer automatically
2025-05-20 4:54 ` [PATCH V5 2/6] ublk: prepare for supporting to register request buffer automatically Ming Lei
@ 2025-05-20 15:59 ` Jens Axboe
2025-05-20 17:40 ` Caleb Sander Mateos
1 sibling, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2025-05-20 15:59 UTC (permalink / raw)
To: Ming Lei, linux-block; +Cc: Uday Shankar, Caleb Sander Mateos
On 5/19/25 10:54 PM, Ming Lei wrote:
> @@ -2014,6 +2064,14 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
> return -EINVAL;
> }
>
> + if (ublk_support_auto_buf_reg(ubq)) {
> + if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) {
> + WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, 0,
> + issue_flags));
> + io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG;
> + }
> + }
> +
Debug or WARN_ON_ONCE() statements with side effects is generally not a
good idea, imho. Would be cleaner to do:
ret = io_buffer_unregister_bvec(cmd, 0, issue_flags);
WARN_ON_ONCE(ret);
and ditto for the next patch that then updates it.
But pretty minor, not worth a respin. This series looks ready to me at
this point.
--
Jens Axboe
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH V5 2/6] ublk: prepare for supporting to register request buffer automatically
2025-05-20 4:54 ` [PATCH V5 2/6] ublk: prepare for supporting to register request buffer automatically Ming Lei
2025-05-20 15:59 ` Jens Axboe
@ 2025-05-20 17:40 ` Caleb Sander Mateos
2025-05-21 1:42 ` Ming Lei
1 sibling, 1 reply; 13+ messages in thread
From: Caleb Sander Mateos @ 2025-05-20 17:40 UTC (permalink / raw)
To: Ming Lei; +Cc: Jens Axboe, linux-block, Uday Shankar
On Mon, May 19, 2025 at 9:55 PM Ming Lei <ming.lei@redhat.com> wrote:
>
> UBLK_F_SUPPORT_ZERO_COPY requires ublk server to issue explicit buffer
> register/unregister uring_cmd for each IO, this way is not only inefficient,
> but also introduce dependency between buffer consumer and buffer register/
> unregister uring_cmd, please see tools/testing/selftests/ublk/stripe.c
> in which backing file IO has to be issued one by one by IOSQE_IO_LINK.
>
> Prepare for adding feature UBLK_F_AUTO_BUF_REG for addressing the existing
> zero copy limitation:
>
> - register request buffer automatically to ublk uring_cmd's io_uring
> context before delivering io command to ublk server
>
> - unregister request buffer automatically from the ublk uring_cmd's
> io_uring context when completing the request
>
> - io_uring will unregister the buffer automatically when uring is
> exiting, so we needn't worry about accident exit
>
> For using this feature, ublk server has to create one sparse buffer table
>
> Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
> drivers/block/ublk_drv.c | 70 ++++++++++++++++++++++++++++++++++++----
> 1 file changed, 64 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index ae2f47dc8224..3e56a9d267fb 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -133,6 +133,14 @@ struct ublk_uring_cmd_pdu {
> */
> #define UBLK_IO_FLAG_NEED_GET_DATA 0x08
>
> +/*
> + * request buffer is registered automatically, so we have to unregister it
> + * before completing this request.
> + *
> + * io_uring will unregister buffer automatically for us during exiting.
> + */
> +#define UBLK_IO_FLAG_AUTO_BUF_REG 0x10
> +
> /* atomic RW with ubq->cancel_lock */
> #define UBLK_IO_FLAG_CANCELED 0x80000000
>
> @@ -205,6 +213,7 @@ struct ublk_params_header {
> __u32 types;
> };
>
> +static void ublk_io_release(void *priv);
> static void ublk_stop_dev_unlocked(struct ublk_device *ub);
> static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq);
> static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
> @@ -619,6 +628,11 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq)
> return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY;
> }
>
> +static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq)
> +{
> + return false;
> +}
> +
> static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
> {
> return ubq->flags & UBLK_F_USER_COPY;
> @@ -626,7 +640,8 @@ static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
>
> static inline bool ublk_need_map_io(const struct ublk_queue *ubq)
> {
> - return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq);
> + return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq) &&
> + !ublk_support_auto_buf_reg(ubq);
> }
>
> static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
> @@ -637,8 +652,13 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
> *
> * for zero copy, request buffer need to be registered to io_uring
> * buffer table, so reference is needed
> + *
> + * For auto buffer register, ublk server still may issue
> + * UBLK_IO_COMMIT_AND_FETCH_REQ before one registered buffer is used up,
> + * so reference is required too.
> */
> - return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq);
> + return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq) ||
> + ublk_support_auto_buf_reg(ubq);
> }
>
> static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
> @@ -1155,6 +1175,35 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
> blk_mq_end_request(rq, BLK_STS_IOERR);
> }
>
> +static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io,
> + unsigned int issue_flags)
> +{
> + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
> + int ret;
> +
> + ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release, 0,
> + issue_flags);
> + if (ret) {
> + blk_mq_end_request(req, BLK_STS_IOERR);
> + return false;
> + }
> + /* one extra reference is dropped by ublk_io_release */
> + refcount_set(&data->ref, 2);
> + io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG;
> + return true;
> +}
> +
> +static bool ublk_prep_auto_buf_reg(struct ublk_queue *ubq,
> + struct request *req, struct ublk_io *io,
> + unsigned int issue_flags)
> +{
> + if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req))
> + return ublk_auto_buf_reg(req, io, issue_flags);
> +
> + ublk_init_req_ref(ubq, req);
> + return true;
> +}
> +
> static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req,
> struct ublk_io *io)
> {
> @@ -1180,7 +1229,6 @@ static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req,
> mapped_bytes >> 9;
> }
>
> - ublk_init_req_ref(ubq, req);
> return true;
> }
>
> @@ -1226,7 +1274,8 @@ static void ublk_dispatch_req(struct ublk_queue *ubq,
> if (!ublk_start_io(ubq, req, io))
> return;
>
> - ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags);
> + if (ublk_prep_auto_buf_reg(ubq, req, io, issue_flags))
> + ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags);
> }
>
> static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd,
> @@ -1994,7 +2043,8 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
>
> static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
> struct ublk_io *io, struct io_uring_cmd *cmd,
> - const struct ublksrv_io_cmd *ub_cmd)
> + const struct ublksrv_io_cmd *ub_cmd,
> + unsigned int issue_flags)
> {
> struct request *req = io->req;
>
> @@ -2014,6 +2064,14 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
> return -EINVAL;
> }
>
> + if (ublk_support_auto_buf_reg(ubq)) {
> + if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) {
> + WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, 0,
> + issue_flags));
Since the io_ring_ctx is determined from the io_uring_cmd, this only
works if the UBLK_IO_COMMIT_AND_FETCH_REQ is submitted to the same
io_uring as the previous UBLK_IO_(COMMIT_AND_)FETCH_REQ for the ublk
I/O. It would be good to document that. And I would probably drop the
WARN_ON_ONCE() here, since it can be triggered from userspace.
Otherwise,
Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
> + io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG;
> + }
> + }
> +
> ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
>
> /* now this cmd slot is owned by ublk driver */
> @@ -2110,7 +2168,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
> goto out;
> break;
> case UBLK_IO_COMMIT_AND_FETCH_REQ:
> - ret = ublk_commit_and_fetch(ubq, io, cmd, ub_cmd);
> + ret = ublk_commit_and_fetch(ubq, io, cmd, ub_cmd, issue_flags);
> if (ret)
> goto out;
> break;
> --
> 2.47.0
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH V5 2/6] ublk: prepare for supporting to register request buffer automatically
2025-05-20 17:40 ` Caleb Sander Mateos
@ 2025-05-21 1:42 ` Ming Lei
0 siblings, 0 replies; 13+ messages in thread
From: Ming Lei @ 2025-05-21 1:42 UTC (permalink / raw)
To: Caleb Sander Mateos; +Cc: Jens Axboe, linux-block, Uday Shankar
On Tue, May 20, 2025 at 10:40:01AM -0700, Caleb Sander Mateos wrote:
> On Mon, May 19, 2025 at 9:55 PM Ming Lei <ming.lei@redhat.com> wrote:
> >
> > UBLK_F_SUPPORT_ZERO_COPY requires ublk server to issue explicit buffer
> > register/unregister uring_cmd for each IO, this way is not only inefficient,
> > but also introduce dependency between buffer consumer and buffer register/
> > unregister uring_cmd, please see tools/testing/selftests/ublk/stripe.c
> > in which backing file IO has to be issued one by one by IOSQE_IO_LINK.
> >
> > Prepare for adding feature UBLK_F_AUTO_BUF_REG for addressing the existing
> > zero copy limitation:
> >
> > - register request buffer automatically to ublk uring_cmd's io_uring
> > context before delivering io command to ublk server
> >
> > - unregister request buffer automatically from the ublk uring_cmd's
> > io_uring context when completing the request
> >
> > - io_uring will unregister the buffer automatically when uring is
> > exiting, so we needn't worry about accident exit
> >
> > For using this feature, ublk server has to create one sparse buffer table
> >
> > Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> > drivers/block/ublk_drv.c | 70 ++++++++++++++++++++++++++++++++++++----
> > 1 file changed, 64 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> > index ae2f47dc8224..3e56a9d267fb 100644
> > --- a/drivers/block/ublk_drv.c
> > +++ b/drivers/block/ublk_drv.c
> > @@ -133,6 +133,14 @@ struct ublk_uring_cmd_pdu {
> > */
> > #define UBLK_IO_FLAG_NEED_GET_DATA 0x08
> >
> > +/*
> > + * request buffer is registered automatically, so we have to unregister it
> > + * before completing this request.
> > + *
> > + * io_uring will unregister buffer automatically for us during exiting.
> > + */
> > +#define UBLK_IO_FLAG_AUTO_BUF_REG 0x10
> > +
> > /* atomic RW with ubq->cancel_lock */
> > #define UBLK_IO_FLAG_CANCELED 0x80000000
> >
> > @@ -205,6 +213,7 @@ struct ublk_params_header {
> > __u32 types;
> > };
> >
> > +static void ublk_io_release(void *priv);
> > static void ublk_stop_dev_unlocked(struct ublk_device *ub);
> > static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq);
> > static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
> > @@ -619,6 +628,11 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq)
> > return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY;
> > }
> >
> > +static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq)
> > +{
> > + return false;
> > +}
> > +
> > static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
> > {
> > return ubq->flags & UBLK_F_USER_COPY;
> > @@ -626,7 +640,8 @@ static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
> >
> > static inline bool ublk_need_map_io(const struct ublk_queue *ubq)
> > {
> > - return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq);
> > + return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq) &&
> > + !ublk_support_auto_buf_reg(ubq);
> > }
> >
> > static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
> > @@ -637,8 +652,13 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
> > *
> > * for zero copy, request buffer need to be registered to io_uring
> > * buffer table, so reference is needed
> > + *
> > + * For auto buffer register, ublk server still may issue
> > + * UBLK_IO_COMMIT_AND_FETCH_REQ before one registered buffer is used up,
> > + * so reference is required too.
> > */
> > - return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq);
> > + return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq) ||
> > + ublk_support_auto_buf_reg(ubq);
> > }
> >
> > static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
> > @@ -1155,6 +1175,35 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
> > blk_mq_end_request(rq, BLK_STS_IOERR);
> > }
> >
> > +static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io,
> > + unsigned int issue_flags)
> > +{
> > + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
> > + int ret;
> > +
> > + ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release, 0,
> > + issue_flags);
> > + if (ret) {
> > + blk_mq_end_request(req, BLK_STS_IOERR);
> > + return false;
> > + }
> > + /* one extra reference is dropped by ublk_io_release */
> > + refcount_set(&data->ref, 2);
> > + io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG;
> > + return true;
> > +}
> > +
> > +static bool ublk_prep_auto_buf_reg(struct ublk_queue *ubq,
> > + struct request *req, struct ublk_io *io,
> > + unsigned int issue_flags)
> > +{
> > + if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req))
> > + return ublk_auto_buf_reg(req, io, issue_flags);
> > +
> > + ublk_init_req_ref(ubq, req);
> > + return true;
> > +}
> > +
> > static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req,
> > struct ublk_io *io)
> > {
> > @@ -1180,7 +1229,6 @@ static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req,
> > mapped_bytes >> 9;
> > }
> >
> > - ublk_init_req_ref(ubq, req);
> > return true;
> > }
> >
> > @@ -1226,7 +1274,8 @@ static void ublk_dispatch_req(struct ublk_queue *ubq,
> > if (!ublk_start_io(ubq, req, io))
> > return;
> >
> > - ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags);
> > + if (ublk_prep_auto_buf_reg(ubq, req, io, issue_flags))
> > + ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags);
> > }
> >
> > static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd,
> > @@ -1994,7 +2043,8 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
> >
> > static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
> > struct ublk_io *io, struct io_uring_cmd *cmd,
> > - const struct ublksrv_io_cmd *ub_cmd)
> > + const struct ublksrv_io_cmd *ub_cmd,
> > + unsigned int issue_flags)
> > {
> > struct request *req = io->req;
> >
> > @@ -2014,6 +2064,14 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
> > return -EINVAL;
> > }
> >
> > + if (ublk_support_auto_buf_reg(ubq)) {
> > + if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) {
> > + WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, 0,
> > + issue_flags));
>
> Since the io_ring_ctx is determined from the io_uring_cmd, this only
> works if the UBLK_IO_COMMIT_AND_FETCH_REQ is submitted to the same
> io_uring as the previous UBLK_IO_(COMMIT_AND_)FETCH_REQ for the ublk
> I/O. It would be good to document that. And I would probably drop the
> WARN_ON_ONCE() here, since it can be triggered from userspace.
>
> Otherwise,
> Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
Thanks for the review!
Yeah, I thought of this thing yesterday when working on TASK_NEUTEAL or
IO_MIGRATION too, will send one fix later by tracking context id.
Thanks,
Ming
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH V5 3/6] ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG
2025-05-20 4:54 [PATCH V5 0/6] ublk: support to register bvec buffer automatically Ming Lei
2025-05-20 4:54 ` [PATCH V5 1/6] ublk: convert to refcount_t Ming Lei
2025-05-20 4:54 ` [PATCH V5 2/6] ublk: prepare for supporting to register request buffer automatically Ming Lei
@ 2025-05-20 4:54 ` Ming Lei
2025-05-20 17:40 ` Caleb Sander Mateos
2025-05-20 4:54 ` [PATCH V5 4/6] ublk: support UBLK_AUTO_BUF_REG_FALLBACK Ming Lei
` (3 subsequent siblings)
6 siblings, 1 reply; 13+ messages in thread
From: Ming Lei @ 2025-05-20 4:54 UTC (permalink / raw)
To: Jens Axboe, linux-block; +Cc: Uday Shankar, Caleb Sander Mateos, Ming Lei
Add UBLK_F_AUTO_BUF_REG for supporting to register buffer automatically
to local io_uring context with provided buffer index.
Add UAPI structure `struct ublk_auto_buf_reg` for holding user parameter
to register request buffer automatically, one 'flags' field is defined, and
there is still 32bit available for future extension, such as, adding one
io_ring FD field for registering buffer to external io_uring.
`struct ublk_auto_buf_reg` is populated from ublk uring_cmd's sqe->addr,
and all existing ublk commands are data-less, so it is just fine to reuse
sqe->addr for this purpose.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/block/ublk_drv.c | 56 ++++++++++++++++++++++++++----
include/uapi/linux/ublk_cmd.h | 64 +++++++++++++++++++++++++++++++++++
2 files changed, 113 insertions(+), 7 deletions(-)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 3e56a9d267fb..1aabc655652b 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -66,7 +66,8 @@
| UBLK_F_USER_COPY \
| UBLK_F_ZONED \
| UBLK_F_USER_RECOVERY_FAIL_IO \
- | UBLK_F_UPDATE_SIZE)
+ | UBLK_F_UPDATE_SIZE \
+ | UBLK_F_AUTO_BUF_REG)
#define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \
| UBLK_F_USER_RECOVERY_REISSUE \
@@ -80,6 +81,9 @@
struct ublk_rq_data {
refcount_t ref;
+
+ /* for auto-unregister buffer in case of UBLK_F_AUTO_BUF_REG */
+ u16 buf_index;
};
struct ublk_uring_cmd_pdu {
@@ -101,6 +105,9 @@ struct ublk_uring_cmd_pdu {
* setup in ublk uring_cmd handler
*/
struct ublk_queue *ubq;
+
+ struct ublk_auto_buf_reg buf;
+
u16 tag;
};
@@ -630,7 +637,7 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq)
static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq)
{
- return false;
+ return ubq->flags & UBLK_F_AUTO_BUF_REG;
}
static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
@@ -1178,17 +1185,20 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io,
unsigned int issue_flags)
{
+ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd);
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
int ret;
- ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release, 0,
- issue_flags);
+ ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release,
+ pdu->buf.index, issue_flags);
if (ret) {
blk_mq_end_request(req, BLK_STS_IOERR);
return false;
}
/* one extra reference is dropped by ublk_io_release */
refcount_set(&data->ref, 2);
+ /* store buffer index in request payload */
+ data->buf_index = pdu->buf.index;
io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG;
return true;
}
@@ -1952,6 +1962,18 @@ static inline void ublk_prep_cancel(struct io_uring_cmd *cmd,
io_uring_cmd_mark_cancelable(cmd, issue_flags);
}
+static inline int ublk_set_auto_buf_reg(struct io_uring_cmd *cmd)
+{
+ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+
+ pdu->buf = ublk_sqe_addr_to_auto_buf_reg(READ_ONCE(cmd->sqe->addr));
+
+ if (pdu->buf.reserved0 || pdu->buf.reserved1)
+ return -EINVAL;
+
+ return 0;
+}
+
static void ublk_io_release(void *priv)
{
struct request *rq = priv;
@@ -2034,6 +2056,12 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
goto out;
}
+ if (ublk_support_auto_buf_reg(ubq)) {
+ ret = ublk_set_auto_buf_reg(cmd);
+ if (ret)
+ return ret;
+ }
+
ublk_fill_io_cmd(io, cmd, buf_addr);
ublk_mark_io_ready(ub, ubq);
out:
@@ -2065,11 +2093,20 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
}
if (ublk_support_auto_buf_reg(ubq)) {
+ int ret;
+
if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) {
- WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, 0,
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ WARN_ON_ONCE(io_buffer_unregister_bvec(cmd,
+ data->buf_index,
issue_flags));
io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG;
}
+
+ ret = ublk_set_auto_buf_reg(cmd);
+ if (ret)
+ return ret;
}
ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
@@ -2791,8 +2828,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
* For USER_COPY, we depends on userspace to fill request
* buffer by pwrite() to ublk char device, which can't be
* used for unprivileged device
+ *
+ * Same with zero copy or auto buffer register.
*/
- if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
+ if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
+ UBLK_F_AUTO_BUF_REG))
return -EINVAL;
}
@@ -2850,7 +2890,8 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
UBLK_F_URING_CMD_COMP_IN_TASK;
/* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */
- if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
+ if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
+ UBLK_F_AUTO_BUF_REG))
ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA;
/*
@@ -3377,6 +3418,7 @@ static int __init ublk_init(void)
BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET +
UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET);
+ BUILD_BUG_ON(sizeof(struct ublk_auto_buf_reg) != 8);
init_waitqueue_head(&ublk_idr_wq);
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index be5c6c6b16e0..f6f516b1223b 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -219,6 +219,29 @@
*/
#define UBLK_F_UPDATE_SIZE (1ULL << 10)
+/*
+ * request buffer is registered automatically to uring_cmd's io_uring
+ * context before delivering this io command to ublk server, meantime
+ * it is un-registered automatically when completing this io command.
+ *
+ * For using this feature:
+ *
+ * - ublk server has to create sparse buffer table
+ *
+ * - ublk server passes auto buf register data via uring_cmd's sqe->addr,
+ * `struct ublk_auto_buf_reg` is populated from sqe->addr, please see
+ * the definition of ublk_sqe_addr_to_auto_buf_reg()
+ *
+ * - pass buffer index from `ublk_auto_buf_reg.index`
+ *
+ * - all reserved fields in `ublk_auto_buf_reg` need to be zeroed
+ *
+ * This way avoids extra cost from two uring_cmd, but also simplifies backend
+ * implementation, such as, the dependency on IO_REGISTER_IO_BUF and
+ * IO_UNREGISTER_IO_BUF becomes not necessary.
+ */
+#define UBLK_F_AUTO_BUF_REG (1ULL << 11)
+
/* device state */
#define UBLK_S_DEV_DEAD 0
#define UBLK_S_DEV_LIVE 1
@@ -339,6 +362,47 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod)
return iod->op_flags >> 8;
}
+struct ublk_auto_buf_reg {
+ /* index for registering the delivered request buffer */
+ __u16 index;
+ __u16 reserved0;
+
+ /*
+ * io_ring FD can be passed via the reserve field in future for
+ * supporting to register io buffer to external io_uring
+ */
+ __u32 reserved1;
+};
+
+/*
+ * For UBLK_F_AUTO_BUF_REG, auto buffer register data is carried via
+ * uring_cmd's sqe->addr:
+ *
+ * - bit0 ~ bit15: buffer index
+ * - bit24 ~ bit31: reserved0
+ * - bit32 ~ bit63: reserved1
+ */
+static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg(
+ __u64 sqe_addr)
+{
+ struct ublk_auto_buf_reg reg = {
+ .index = sqe_addr & 0xffff,
+ .reserved0 = (sqe_addr >> 16) & 0xffff,
+ .reserved1 = sqe_addr >> 32,
+ };
+
+ return reg;
+}
+
+static inline __u64
+ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf)
+{
+ __u64 addr = buf->index | (__u64)buf->reserved0 << 16 |
+ (__u64)buf->reserved1 << 32;
+
+ return addr;
+}
+
/* issued to ublk driver via /dev/ublkcN */
struct ublksrv_io_cmd {
__u16 q_id;
--
2.47.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH V5 3/6] ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG
2025-05-20 4:54 ` [PATCH V5 3/6] ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG Ming Lei
@ 2025-05-20 17:40 ` Caleb Sander Mateos
0 siblings, 0 replies; 13+ messages in thread
From: Caleb Sander Mateos @ 2025-05-20 17:40 UTC (permalink / raw)
To: Ming Lei; +Cc: Jens Axboe, linux-block, Uday Shankar
On Mon, May 19, 2025 at 9:55 PM Ming Lei <ming.lei@redhat.com> wrote:
>
> Add UBLK_F_AUTO_BUF_REG for supporting to register buffer automatically
> to local io_uring context with provided buffer index.
>
> Add UAPI structure `struct ublk_auto_buf_reg` for holding user parameter
> to register request buffer automatically, one 'flags' field is defined, and
> there is still 32bit available for future extension, such as, adding one
> io_ring FD field for registering buffer to external io_uring.
>
> `struct ublk_auto_buf_reg` is populated from ublk uring_cmd's sqe->addr,
> and all existing ublk commands are data-less, so it is just fine to reuse
> sqe->addr for this purpose.
>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
> drivers/block/ublk_drv.c | 56 ++++++++++++++++++++++++++----
> include/uapi/linux/ublk_cmd.h | 64 +++++++++++++++++++++++++++++++++++
> 2 files changed, 113 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index 3e56a9d267fb..1aabc655652b 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -66,7 +66,8 @@
> | UBLK_F_USER_COPY \
> | UBLK_F_ZONED \
> | UBLK_F_USER_RECOVERY_FAIL_IO \
> - | UBLK_F_UPDATE_SIZE)
> + | UBLK_F_UPDATE_SIZE \
> + | UBLK_F_AUTO_BUF_REG)
>
> #define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \
> | UBLK_F_USER_RECOVERY_REISSUE \
> @@ -80,6 +81,9 @@
>
> struct ublk_rq_data {
> refcount_t ref;
> +
> + /* for auto-unregister buffer in case of UBLK_F_AUTO_BUF_REG */
> + u16 buf_index;
> };
>
> struct ublk_uring_cmd_pdu {
> @@ -101,6 +105,9 @@ struct ublk_uring_cmd_pdu {
> * setup in ublk uring_cmd handler
> */
> struct ublk_queue *ubq;
> +
> + struct ublk_auto_buf_reg buf;
> +
> u16 tag;
> };
>
> @@ -630,7 +637,7 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq)
>
> static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq)
> {
> - return false;
> + return ubq->flags & UBLK_F_AUTO_BUF_REG;
> }
>
> static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
> @@ -1178,17 +1185,20 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
> static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io,
> unsigned int issue_flags)
> {
> + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd);
> struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
> int ret;
>
> - ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release, 0,
> - issue_flags);
> + ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release,
> + pdu->buf.index, issue_flags);
> if (ret) {
> blk_mq_end_request(req, BLK_STS_IOERR);
> return false;
> }
> /* one extra reference is dropped by ublk_io_release */
> refcount_set(&data->ref, 2);
> + /* store buffer index in request payload */
> + data->buf_index = pdu->buf.index;
> io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG;
> return true;
> }
> @@ -1952,6 +1962,18 @@ static inline void ublk_prep_cancel(struct io_uring_cmd *cmd,
> io_uring_cmd_mark_cancelable(cmd, issue_flags);
> }
>
> +static inline int ublk_set_auto_buf_reg(struct io_uring_cmd *cmd)
> +{
> + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
> +
> + pdu->buf = ublk_sqe_addr_to_auto_buf_reg(READ_ONCE(cmd->sqe->addr));
> +
> + if (pdu->buf.reserved0 || pdu->buf.reserved1)
> + return -EINVAL;
> +
> + return 0;
> +}
> +
> static void ublk_io_release(void *priv)
> {
> struct request *rq = priv;
> @@ -2034,6 +2056,12 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
> goto out;
> }
>
> + if (ublk_support_auto_buf_reg(ubq)) {
> + ret = ublk_set_auto_buf_reg(cmd);
> + if (ret)
> + return ret;
This should be goto out; to ensure the ub->mutex is released.
Otherwise, this looks good to me.
> + }
> +
> ublk_fill_io_cmd(io, cmd, buf_addr);
> ublk_mark_io_ready(ub, ubq);
> out:
> @@ -2065,11 +2093,20 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
> }
>
> if (ublk_support_auto_buf_reg(ubq)) {
> + int ret;
> +
> if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) {
> - WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, 0,
> + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
> +
> + WARN_ON_ONCE(io_buffer_unregister_bvec(cmd,
> + data->buf_index,
> issue_flags));
> io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG;
> }
> +
> + ret = ublk_set_auto_buf_reg(cmd);
> + if (ret)
> + return ret;
> }
>
> ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
> @@ -2791,8 +2828,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
> * For USER_COPY, we depends on userspace to fill request
> * buffer by pwrite() to ublk char device, which can't be
> * used for unprivileged device
> + *
> + * Same with zero copy or auto buffer register.
> */
> - if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
> + if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
> + UBLK_F_AUTO_BUF_REG))
> return -EINVAL;
> }
>
> @@ -2850,7 +2890,8 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
> UBLK_F_URING_CMD_COMP_IN_TASK;
>
> /* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */
> - if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
> + if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
> + UBLK_F_AUTO_BUF_REG))
> ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA;
>
> /*
> @@ -3377,6 +3418,7 @@ static int __init ublk_init(void)
>
> BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET +
> UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET);
> + BUILD_BUG_ON(sizeof(struct ublk_auto_buf_reg) != 8);
>
> init_waitqueue_head(&ublk_idr_wq);
>
> diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
> index be5c6c6b16e0..f6f516b1223b 100644
> --- a/include/uapi/linux/ublk_cmd.h
> +++ b/include/uapi/linux/ublk_cmd.h
> @@ -219,6 +219,29 @@
> */
> #define UBLK_F_UPDATE_SIZE (1ULL << 10)
>
> +/*
> + * request buffer is registered automatically to uring_cmd's io_uring
> + * context before delivering this io command to ublk server, meantime
> + * it is un-registered automatically when completing this io command.
> + *
> + * For using this feature:
> + *
> + * - ublk server has to create sparse buffer table
> + *
> + * - ublk server passes auto buf register data via uring_cmd's sqe->addr,
> + * `struct ublk_auto_buf_reg` is populated from sqe->addr, please see
> + * the definition of ublk_sqe_addr_to_auto_buf_reg()
> + *
> + * - pass buffer index from `ublk_auto_buf_reg.index`
> + *
> + * - all reserved fields in `ublk_auto_buf_reg` need to be zeroed
> + *
> + * This way avoids extra cost from two uring_cmd, but also simplifies backend
> + * implementation, such as, the dependency on IO_REGISTER_IO_BUF and
> + * IO_UNREGISTER_IO_BUF becomes not necessary.
> + */
> +#define UBLK_F_AUTO_BUF_REG (1ULL << 11)
> +
> /* device state */
> #define UBLK_S_DEV_DEAD 0
> #define UBLK_S_DEV_LIVE 1
> @@ -339,6 +362,47 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod)
> return iod->op_flags >> 8;
> }
>
> +struct ublk_auto_buf_reg {
> + /* index for registering the delivered request buffer */
> + __u16 index;
> + __u16 reserved0;
nit: alignment
> +
> + /*
> + * io_ring FD can be passed via the reserve field in future for
> + * supporting to register io buffer to external io_uring
> + */
> + __u32 reserved1;
> +};
> +
> +/*
> + * For UBLK_F_AUTO_BUF_REG, auto buffer register data is carried via
> + * uring_cmd's sqe->addr:
> + *
> + * - bit0 ~ bit15: buffer index
> + * - bit24 ~ bit31: reserved0
"bit24" should be "bit16" here. It would change to "bit24" in the next
patch adding UBLK_AUTO_BUF_REG_FALLBACK.
> + * - bit32 ~ bit63: reserved1
> + */
> +static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg(
> + __u64 sqe_addr)
> +{
> + struct ublk_auto_buf_reg reg = {
> + .index = sqe_addr & 0xffff,
> + .reserved0 = (sqe_addr >> 16) & 0xffff,
I think I suggested this before, but the masking with 0xffff is unnecessary.
Best,
Caleb
> + .reserved1 = sqe_addr >> 32,
> + };
> +
> + return reg;
> +}
> +
> +static inline __u64
> +ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf)
> +{
> + __u64 addr = buf->index | (__u64)buf->reserved0 << 16 |
> + (__u64)buf->reserved1 << 32;
> +
> + return addr;
> +}
> +
> /* issued to ublk driver via /dev/ublkcN */
> struct ublksrv_io_cmd {
> __u16 q_id;
> --
> 2.47.0
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH V5 4/6] ublk: support UBLK_AUTO_BUF_REG_FALLBACK
2025-05-20 4:54 [PATCH V5 0/6] ublk: support to register bvec buffer automatically Ming Lei
` (2 preceding siblings ...)
2025-05-20 4:54 ` [PATCH V5 3/6] ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG Ming Lei
@ 2025-05-20 4:54 ` Ming Lei
2025-05-20 17:40 ` Caleb Sander Mateos
2025-05-20 4:54 ` [PATCH V5 5/6] selftests: ublk: support UBLK_F_AUTO_BUF_REG Ming Lei
` (2 subsequent siblings)
6 siblings, 1 reply; 13+ messages in thread
From: Ming Lei @ 2025-05-20 4:54 UTC (permalink / raw)
To: Jens Axboe, linux-block; +Cc: Uday Shankar, Caleb Sander Mateos, Ming Lei
For UBLK_F_AUTO_BUF_REG, buffer is registered to uring_cmd context
automatically with the provided buffer index. User may provide one wrong
buffer index, or the specified buffer is registered by application already.
Add UBLK_AUTO_BUF_REG_FALLBACK for supporting to auto buffer registering
fallback by completing the uring_cmd and telling ublk server the
register failure via UBLK_AUTO_BUF_REG_FALLBACK, then ublk server still
can register the buffer from userspace.
So we can provide reliable way for supporting auto buffer register.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/block/ublk_drv.c | 16 ++++++++++++++
include/uapi/linux/ublk_cmd.h | 39 ++++++++++++++++++++++++++++++++---
2 files changed, 52 insertions(+), 3 deletions(-)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 1aabc655652b..2474788ef263 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -1182,6 +1182,16 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
blk_mq_end_request(rq, BLK_STS_IOERR);
}
+static void ublk_auto_buf_reg_fallback(struct request *req, struct ublk_io *io)
+{
+ const struct ublk_queue *ubq = req->mq_hctx->driver_data;
+ struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag);
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ iod->op_flags |= UBLK_IO_F_NEED_REG_BUF;
+ refcount_set(&data->ref, 1);
+}
+
static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io,
unsigned int issue_flags)
{
@@ -1192,6 +1202,10 @@ static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io,
ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release,
pdu->buf.index, issue_flags);
if (ret) {
+ if (pdu->buf.flags & UBLK_AUTO_BUF_REG_FALLBACK) {
+ ublk_auto_buf_reg_fallback(req, io);
+ return true;
+ }
blk_mq_end_request(req, BLK_STS_IOERR);
return false;
}
@@ -1971,6 +1985,8 @@ static inline int ublk_set_auto_buf_reg(struct io_uring_cmd *cmd)
if (pdu->buf.reserved0 || pdu->buf.reserved1)
return -EINVAL;
+ if (pdu->buf.flags & ~UBLK_AUTO_BUF_REG_F_MASK)
+ return -EINVAL;
return 0;
}
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index f6f516b1223b..c4b9942697fc 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -236,9 +236,16 @@
*
* - all reserved fields in `ublk_auto_buf_reg` need to be zeroed
*
+ * - pass flags from `ublk_auto_buf_reg.flags` if needed
+ *
* This way avoids extra cost from two uring_cmd, but also simplifies backend
* implementation, such as, the dependency on IO_REGISTER_IO_BUF and
* IO_UNREGISTER_IO_BUF becomes not necessary.
+ *
+ * If wrong data or flags are provided, both IO_FETCH_REQ and
+ * IO_COMMIT_AND_FETCH_REQ are failed, for the latter, the ublk IO request
+ * won't be completed until new IO_COMMIT_AND_FETCH_REQ command is issued
+ * successfully
*/
#define UBLK_F_AUTO_BUF_REG (1ULL << 11)
@@ -328,6 +335,17 @@ struct ublksrv_ctrl_dev_info {
#define UBLK_IO_F_FUA (1U << 13)
#define UBLK_IO_F_NOUNMAP (1U << 15)
#define UBLK_IO_F_SWAP (1U << 16)
+/*
+ * For UBLK_F_AUTO_BUF_REG & UBLK_AUTO_BUF_REG_FALLBACK only.
+ *
+ * This flag is set if auto buffer register is failed & ublk server passes
+ * UBLK_AUTO_BUF_REG_FALLBACK, and ublk server need to register buffer
+ * manually for handling the delivered IO command if this flag is observed
+ *
+ * ublk server has to check this flag if UBLK_AUTO_BUF_REG_FALLBACK is
+ * passed in.
+ */
+#define UBLK_IO_F_NEED_REG_BUF (1U << 17)
/*
* io cmd is described by this structure, and stored in share memory, indexed
@@ -362,10 +380,23 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod)
return iod->op_flags >> 8;
}
+/*
+ * If this flag is set, fallback by completing the uring_cmd and setting
+ * `UBLK_IO_F_NEED_REG_BUF` in case of auto-buf-register failure;
+ * otherwise the client ublk request is failed silently
+ *
+ * If ublk server passes this flag, it has to check if UBLK_IO_F_NEED_REG_BUF
+ * is set in `ublksrv_io_desc.op_flags`. If UBLK_IO_F_NEED_REG_BUF is set,
+ * ublk server needs to register io buffer manually for handling IO command.
+ */
+#define UBLK_AUTO_BUF_REG_FALLBACK (1 << 0)
+#define UBLK_AUTO_BUF_REG_F_MASK UBLK_AUTO_BUF_REG_FALLBACK
+
struct ublk_auto_buf_reg {
/* index for registering the delivered request buffer */
__u16 index;
- __u16 reserved0;
+ __u8 flags;
+ __u8 reserved0;
/*
* io_ring FD can be passed via the reserve field in future for
@@ -379,6 +410,7 @@ struct ublk_auto_buf_reg {
* uring_cmd's sqe->addr:
*
* - bit0 ~ bit15: buffer index
+ * - bit16 ~ bit23: flags
* - bit24 ~ bit31: reserved0
* - bit32 ~ bit63: reserved1
*/
@@ -387,7 +419,8 @@ static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg(
{
struct ublk_auto_buf_reg reg = {
.index = sqe_addr & 0xffff,
- .reserved0 = (sqe_addr >> 16) & 0xffff,
+ .flags = (sqe_addr >> 16) & 0xff,
+ .reserved0 = (sqe_addr >> 24) & 0xff,
.reserved1 = sqe_addr >> 32,
};
@@ -397,7 +430,7 @@ static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg(
static inline __u64
ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf)
{
- __u64 addr = buf->index | (__u64)buf->reserved0 << 16 |
+ __u64 addr = buf->index | (__u64)buf->flags << 16 | (__u64)buf->reserved0 << 24 |
(__u64)buf->reserved1 << 32;
return addr;
--
2.47.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH V5 4/6] ublk: support UBLK_AUTO_BUF_REG_FALLBACK
2025-05-20 4:54 ` [PATCH V5 4/6] ublk: support UBLK_AUTO_BUF_REG_FALLBACK Ming Lei
@ 2025-05-20 17:40 ` Caleb Sander Mateos
0 siblings, 0 replies; 13+ messages in thread
From: Caleb Sander Mateos @ 2025-05-20 17:40 UTC (permalink / raw)
To: Ming Lei; +Cc: Jens Axboe, linux-block, Uday Shankar
On Mon, May 19, 2025 at 9:55 PM Ming Lei <ming.lei@redhat.com> wrote:
>
> For UBLK_F_AUTO_BUF_REG, buffer is registered to uring_cmd context
> automatically with the provided buffer index. User may provide one wrong
> buffer index, or the specified buffer is registered by application already.
>
> Add UBLK_AUTO_BUF_REG_FALLBACK for supporting to auto buffer registering
> fallback by completing the uring_cmd and telling ublk server the
> register failure via UBLK_AUTO_BUF_REG_FALLBACK, then ublk server still
> can register the buffer from userspace.
>
> So we can provide reliable way for supporting auto buffer register.
>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
> drivers/block/ublk_drv.c | 16 ++++++++++++++
> include/uapi/linux/ublk_cmd.h | 39 ++++++++++++++++++++++++++++++++---
> 2 files changed, 52 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index 1aabc655652b..2474788ef263 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -1182,6 +1182,16 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
> blk_mq_end_request(rq, BLK_STS_IOERR);
> }
>
> +static void ublk_auto_buf_reg_fallback(struct request *req, struct ublk_io *io)
struct ublk_io *io appears unused.
> +{
> + const struct ublk_queue *ubq = req->mq_hctx->driver_data;
> + struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag);
> + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
> +
> + iod->op_flags |= UBLK_IO_F_NEED_REG_BUF;
> + refcount_set(&data->ref, 1);
> +}
> +
> static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io,
> unsigned int issue_flags)
> {
> @@ -1192,6 +1202,10 @@ static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io,
> ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release,
> pdu->buf.index, issue_flags);
> if (ret) {
> + if (pdu->buf.flags & UBLK_AUTO_BUF_REG_FALLBACK) {
> + ublk_auto_buf_reg_fallback(req, io);
> + return true;
> + }
> blk_mq_end_request(req, BLK_STS_IOERR);
> return false;
> }
> @@ -1971,6 +1985,8 @@ static inline int ublk_set_auto_buf_reg(struct io_uring_cmd *cmd)
> if (pdu->buf.reserved0 || pdu->buf.reserved1)
> return -EINVAL;
>
> + if (pdu->buf.flags & ~UBLK_AUTO_BUF_REG_F_MASK)
> + return -EINVAL;
> return 0;
> }
>
> diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
> index f6f516b1223b..c4b9942697fc 100644
> --- a/include/uapi/linux/ublk_cmd.h
> +++ b/include/uapi/linux/ublk_cmd.h
> @@ -236,9 +236,16 @@
> *
> * - all reserved fields in `ublk_auto_buf_reg` need to be zeroed
> *
> + * - pass flags from `ublk_auto_buf_reg.flags` if needed
> + *
> * This way avoids extra cost from two uring_cmd, but also simplifies backend
> * implementation, such as, the dependency on IO_REGISTER_IO_BUF and
> * IO_UNREGISTER_IO_BUF becomes not necessary.
> + *
> + * If wrong data or flags are provided, both IO_FETCH_REQ and
> + * IO_COMMIT_AND_FETCH_REQ are failed, for the latter, the ublk IO request
> + * won't be completed until new IO_COMMIT_AND_FETCH_REQ command is issued
> + * successfully
This part of the comment belongs in the previous commit adding
UBLK_F_AUTO_BUF_REG, right? It doesn't seem specific to
UBLK_AUTO_BUF_REG_FALLBACK.
Otherwise,
Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
> */
> #define UBLK_F_AUTO_BUF_REG (1ULL << 11)
>
> @@ -328,6 +335,17 @@ struct ublksrv_ctrl_dev_info {
> #define UBLK_IO_F_FUA (1U << 13)
> #define UBLK_IO_F_NOUNMAP (1U << 15)
> #define UBLK_IO_F_SWAP (1U << 16)
> +/*
> + * For UBLK_F_AUTO_BUF_REG & UBLK_AUTO_BUF_REG_FALLBACK only.
> + *
> + * This flag is set if auto buffer register is failed & ublk server passes
> + * UBLK_AUTO_BUF_REG_FALLBACK, and ublk server need to register buffer
> + * manually for handling the delivered IO command if this flag is observed
> + *
> + * ublk server has to check this flag if UBLK_AUTO_BUF_REG_FALLBACK is
> + * passed in.
> + */
> +#define UBLK_IO_F_NEED_REG_BUF (1U << 17)
>
> /*
> * io cmd is described by this structure, and stored in share memory, indexed
> @@ -362,10 +380,23 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod)
> return iod->op_flags >> 8;
> }
>
> +/*
> + * If this flag is set, fallback by completing the uring_cmd and setting
> + * `UBLK_IO_F_NEED_REG_BUF` in case of auto-buf-register failure;
> + * otherwise the client ublk request is failed silently
> + *
> + * If ublk server passes this flag, it has to check if UBLK_IO_F_NEED_REG_BUF
> + * is set in `ublksrv_io_desc.op_flags`. If UBLK_IO_F_NEED_REG_BUF is set,
> + * ublk server needs to register io buffer manually for handling IO command.
> + */
> +#define UBLK_AUTO_BUF_REG_FALLBACK (1 << 0)
> +#define UBLK_AUTO_BUF_REG_F_MASK UBLK_AUTO_BUF_REG_FALLBACK
> +
> struct ublk_auto_buf_reg {
> /* index for registering the delivered request buffer */
> __u16 index;
> - __u16 reserved0;
> + __u8 flags;
> + __u8 reserved0;
>
> /*
> * io_ring FD can be passed via the reserve field in future for
> @@ -379,6 +410,7 @@ struct ublk_auto_buf_reg {
> * uring_cmd's sqe->addr:
> *
> * - bit0 ~ bit15: buffer index
> + * - bit16 ~ bit23: flags
> * - bit24 ~ bit31: reserved0
> * - bit32 ~ bit63: reserved1
> */
> @@ -387,7 +419,8 @@ static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg(
> {
> struct ublk_auto_buf_reg reg = {
> .index = sqe_addr & 0xffff,
> - .reserved0 = (sqe_addr >> 16) & 0xffff,
> + .flags = (sqe_addr >> 16) & 0xff,
> + .reserved0 = (sqe_addr >> 24) & 0xff,
> .reserved1 = sqe_addr >> 32,
> };
>
> @@ -397,7 +430,7 @@ static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg(
> static inline __u64
> ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf)
> {
> - __u64 addr = buf->index | (__u64)buf->reserved0 << 16 |
> + __u64 addr = buf->index | (__u64)buf->flags << 16 | (__u64)buf->reserved0 << 24 |
> (__u64)buf->reserved1 << 32;
>
> return addr;
> --
> 2.47.0
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH V5 5/6] selftests: ublk: support UBLK_F_AUTO_BUF_REG
2025-05-20 4:54 [PATCH V5 0/6] ublk: support to register bvec buffer automatically Ming Lei
` (3 preceding siblings ...)
2025-05-20 4:54 ` [PATCH V5 4/6] ublk: support UBLK_AUTO_BUF_REG_FALLBACK Ming Lei
@ 2025-05-20 4:54 ` Ming Lei
2025-05-20 4:54 ` [PATCH V5 6/6] selftests: ublk: add test for covering UBLK_AUTO_BUF_REG_FALLBACK Ming Lei
2025-05-20 16:25 ` [PATCH V5 0/6] ublk: support to register bvec buffer automatically Jens Axboe
6 siblings, 0 replies; 13+ messages in thread
From: Ming Lei @ 2025-05-20 4:54 UTC (permalink / raw)
To: Jens Axboe, linux-block; +Cc: Uday Shankar, Caleb Sander Mateos, Ming Lei
Enable UBLK_F_AUTO_BUF_REG support for ublk utility by argument `--auto_zc`,
meantime support this feature in null, loop and stripe target code.
Add function test generic_08 for covering basic UBLK_F_AUTO_BUF_REG feature.
Also cover UBLK_F_AUTO_BUF_REG in stress_03, stress_04 and stress_05 test too.
'fio/t/io_uring -p0 /dev/ublkb0' shows that F_AUTO_BUF_REG can improve
IOPS by 50% compared with F_SUPPORT_ZERO_COPY in my test VM.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
tools/testing/selftests/ublk/Makefile | 2 +
tools/testing/selftests/ublk/file_backed.c | 12 ++++--
tools/testing/selftests/ublk/kublk.c | 30 +++++++++++--
tools/testing/selftests/ublk/kublk.h | 7 +++
tools/testing/selftests/ublk/null.c | 43 ++++++++++++++-----
tools/testing/selftests/ublk/stripe.c | 21 ++++-----
.../testing/selftests/ublk/test_generic_08.sh | 32 ++++++++++++++
.../testing/selftests/ublk/test_stress_03.sh | 6 +++
.../testing/selftests/ublk/test_stress_04.sh | 6 +++
.../testing/selftests/ublk/test_stress_05.sh | 8 ++++
10 files changed, 138 insertions(+), 29 deletions(-)
create mode 100755 tools/testing/selftests/ublk/test_generic_08.sh
diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile
index e2e7b1e52a06..14d574ac142c 100644
--- a/tools/testing/selftests/ublk/Makefile
+++ b/tools/testing/selftests/ublk/Makefile
@@ -15,6 +15,8 @@ TEST_PROGS += test_generic_05.sh
TEST_PROGS += test_generic_06.sh
TEST_PROGS += test_generic_07.sh
+TEST_PROGS += test_generic_08.sh
+
TEST_PROGS += test_null_01.sh
TEST_PROGS += test_null_02.sh
TEST_PROGS += test_loop_01.sh
diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c
index 6f34eabfae97..9dc00b217a66 100644
--- a/tools/testing/selftests/ublk/file_backed.c
+++ b/tools/testing/selftests/ublk/file_backed.c
@@ -29,19 +29,23 @@ static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_des
static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
{
unsigned ublk_op = ublksrv_get_op(iod);
- int zc = ublk_queue_use_zc(q);
- enum io_uring_op op = ublk_to_uring_op(iod, zc);
+ unsigned zc = ublk_queue_use_zc(q);
+ unsigned auto_zc = ublk_queue_use_auto_zc(q);
+ enum io_uring_op op = ublk_to_uring_op(iod, zc | auto_zc);
struct io_uring_sqe *sqe[3];
+ void *addr = (zc | auto_zc) ? NULL : (void *)iod->addr;
- if (!zc) {
+ if (!zc || auto_zc) {
ublk_queue_alloc_sqes(q, sqe, 1);
if (!sqe[0])
return -ENOMEM;
io_uring_prep_rw(op, sqe[0], 1 /*fds[1]*/,
- (void *)iod->addr,
+ addr,
iod->nr_sectors << 9,
iod->start_sector << 9);
+ if (auto_zc)
+ sqe[0]->buf_index = tag;
io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
/* bit63 marks us as tgt io */
sqe[0]->user_data = build_user_data(tag, ublk_op, 0, 1);
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index 842b40736a9b..5c02e78c5fcd 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -420,9 +420,12 @@ static int ublk_queue_init(struct ublk_queue *q)
q->cmd_inflight = 0;
q->tid = gettid();
- if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) {
+ if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) {
q->state |= UBLKSRV_NO_BUF;
- q->state |= UBLKSRV_ZC;
+ if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY)
+ q->state |= UBLKSRV_ZC;
+ if (dev->dev_info.flags & UBLK_F_AUTO_BUF_REG)
+ q->state |= UBLKSRV_AUTO_BUF_REG;
}
cmd_buf_size = ublk_queue_cmd_buf_sz(q);
@@ -461,7 +464,7 @@ static int ublk_queue_init(struct ublk_queue *q)
goto fail;
}
- if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) {
+ if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) {
ret = io_uring_register_buffers_sparse(&q->ring, q->q_depth);
if (ret) {
ublk_err("ublk dev %d queue %d register spare buffers failed %d",
@@ -525,6 +528,18 @@ static void ublk_dev_unprep(struct ublk_dev *dev)
close(dev->fds[0]);
}
+static void ublk_set_auto_buf_reg(struct io_uring_sqe *sqe,
+ unsigned short buf_idx,
+ unsigned char flags)
+{
+ struct ublk_auto_buf_reg buf = {
+ .index = buf_idx,
+ .flags = flags,
+ };
+
+ sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf);
+}
+
int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
{
struct ublksrv_io_cmd *cmd;
@@ -579,6 +594,9 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
else
cmd->addr = 0;
+ if (q->state & UBLKSRV_AUTO_BUF_REG)
+ ublk_set_auto_buf_reg(sqe[0], tag, 0);
+
user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0);
io_uring_sqe_set_data64(sqe[0], user_data);
@@ -1206,6 +1224,7 @@ static int cmd_dev_get_features(void)
[const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY",
[const_ilog2(UBLK_F_ZONED)] = "ZONED",
[const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO",
+ [const_ilog2(UBLK_F_AUTO_BUF_REG)] = "AUTO_BUF_REG",
};
struct ublk_dev *dev;
__u64 features = 0;
@@ -1245,7 +1264,7 @@ static void __cmd_create_help(char *exe, bool recovery)
printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n",
exe, recovery ? "recover" : "add");
- printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g]\n");
+ printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--debug_mask mask] [-r 0|1 ] [-g]\n");
printf("\t[-e 0|1 ] [-i 0|1]\n");
printf("\t[target options] [backfile1] [backfile2] ...\n");
printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
@@ -1300,6 +1319,7 @@ int main(int argc, char *argv[])
{ "recovery_fail_io", 1, NULL, 'e'},
{ "recovery_reissue", 1, NULL, 'i'},
{ "get_data", 1, NULL, 'g'},
+ { "auto_zc", 0, NULL, 0},
{ 0, 0, 0, 0 }
};
const struct ublk_tgt_ops *ops = NULL;
@@ -1368,6 +1388,8 @@ int main(int argc, char *argv[])
ublk_dbg_mask = 0;
if (!strcmp(longopts[option_idx].name, "foreground"))
ctx.fg = 1;
+ if (!strcmp(longopts[option_idx].name, "auto_zc"))
+ ctx.flags |= UBLK_F_AUTO_BUF_REG;
break;
case '?':
/*
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index 81fb5864ab72..ebbfad9e70aa 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -115,6 +115,7 @@ struct ublk_io {
#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
#define UBLKSRV_IO_FREE (1UL << 2)
#define UBLKSRV_NEED_GET_DATA (1UL << 3)
+#define UBLKSRV_NEED_REG_BUF (1UL << 4)
unsigned short flags;
unsigned short refs; /* used by target code only */
@@ -168,6 +169,7 @@ struct ublk_queue {
#define UBLKSRV_QUEUE_IDLE (1U << 1)
#define UBLKSRV_NO_BUF (1U << 2)
#define UBLKSRV_ZC (1U << 3)
+#define UBLKSRV_AUTO_BUF_REG (1U << 4)
unsigned state;
pid_t tid;
pthread_t thread;
@@ -387,6 +389,11 @@ static inline int ublk_queue_use_zc(const struct ublk_queue *q)
return q->state & UBLKSRV_ZC;
}
+static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q)
+{
+ return q->state & UBLKSRV_AUTO_BUF_REG;
+}
+
extern const struct ublk_tgt_ops null_tgt_ops;
extern const struct ublk_tgt_ops loop_tgt_ops;
extern const struct ublk_tgt_ops stripe_tgt_ops;
diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c
index 91fec3690d4b..1362dd422c6e 100644
--- a/tools/testing/selftests/ublk/null.c
+++ b/tools/testing/selftests/ublk/null.c
@@ -42,10 +42,22 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
return 0;
}
+static void __setup_nop_io(int tag, const struct ublksrv_io_desc *iod,
+ struct io_uring_sqe *sqe)
+{
+ unsigned ublk_op = ublksrv_get_op(iod);
+
+ io_uring_prep_nop(sqe);
+ sqe->buf_index = tag;
+ sqe->flags |= IOSQE_FIXED_FILE;
+ sqe->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT;
+ sqe->len = iod->nr_sectors << 9; /* injected result */
+ sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
+}
+
static int null_queue_zc_io(struct ublk_queue *q, int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
- unsigned ublk_op = ublksrv_get_op(iod);
struct io_uring_sqe *sqe[3];
ublk_queue_alloc_sqes(q, sqe, 3);
@@ -55,12 +67,8 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag)
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1);
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
- io_uring_prep_nop(sqe[1]);
- sqe[1]->buf_index = tag;
- sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK;
- sqe[1]->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT;
- sqe[1]->len = iod->nr_sectors << 9; /* injected result */
- sqe[1]->user_data = build_user_data(tag, ublk_op, 0, 1);
+ __setup_nop_io(tag, iod, sqe[1]);
+ sqe[1]->flags |= IOSQE_IO_HARDLINK;
io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag);
sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, 1);
@@ -69,6 +77,16 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag)
return 2;
}
+static int null_queue_auto_zc_io(struct ublk_queue *q, int tag)
+{
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
+ struct io_uring_sqe *sqe[1];
+
+ ublk_queue_alloc_sqes(q, sqe, 1);
+ __setup_nop_io(tag, iod, sqe[0]);
+ return 1;
+}
+
static void ublk_null_io_done(struct ublk_queue *q, int tag,
const struct io_uring_cqe *cqe)
{
@@ -94,15 +112,18 @@ static void ublk_null_io_done(struct ublk_queue *q, int tag,
static int ublk_null_queue_io(struct ublk_queue *q, int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
- int zc = ublk_queue_use_zc(q);
+ unsigned auto_zc = ublk_queue_use_auto_zc(q);
+ unsigned zc = ublk_queue_use_zc(q);
int queued;
- if (!zc) {
+ if (auto_zc)
+ queued = null_queue_auto_zc_io(q, tag);
+ else if (zc)
+ queued = null_queue_zc_io(q, tag);
+ else {
ublk_complete_io(q, tag, iod->nr_sectors << 9);
return 0;
}
-
- queued = null_queue_zc_io(q, tag);
ublk_queued_tgt_io(q, tag, queued);
return 0;
}
diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c
index 5dbd6392d83d..8fd8faeb5e76 100644
--- a/tools/testing/selftests/ublk/stripe.c
+++ b/tools/testing/selftests/ublk/stripe.c
@@ -70,7 +70,7 @@ static void free_stripe_array(struct stripe_array *s)
}
static void calculate_stripe_array(const struct stripe_conf *conf,
- const struct ublksrv_io_desc *iod, struct stripe_array *s)
+ const struct ublksrv_io_desc *iod, struct stripe_array *s, void *base)
{
const unsigned shift = conf->shift - 9;
const unsigned chunk_sects = 1 << shift;
@@ -102,7 +102,7 @@ static void calculate_stripe_array(const struct stripe_conf *conf,
}
assert(this->nr_vec < this->cap);
- this->vec[this->nr_vec].iov_base = (void *)(iod->addr + done);
+ this->vec[this->nr_vec].iov_base = (void *)(base + done);
this->vec[this->nr_vec++].iov_len = nr_sects << 9;
start += nr_sects;
@@ -126,15 +126,17 @@ static inline enum io_uring_op stripe_to_uring_op(
static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
{
const struct stripe_conf *conf = get_chunk_shift(q);
- int zc = !!(ublk_queue_use_zc(q) != 0);
- enum io_uring_op op = stripe_to_uring_op(iod, zc);
+ unsigned auto_zc = (ublk_queue_use_auto_zc(q) != 0);
+ unsigned zc = (ublk_queue_use_zc(q) != 0);
+ enum io_uring_op op = stripe_to_uring_op(iod, zc | auto_zc);
struct io_uring_sqe *sqe[NR_STRIPE];
struct stripe_array *s = alloc_stripe_array(conf, iod);
struct ublk_io *io = ublk_get_io(q, tag);
int i, extra = zc ? 2 : 0;
+ void *base = (zc | auto_zc) ? NULL : (void *)iod->addr;
io->private_data = s;
- calculate_stripe_array(conf, iod, s);
+ calculate_stripe_array(conf, iod, s, base);
ublk_queue_alloc_sqes(q, sqe, s->nr + extra);
@@ -153,12 +155,11 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_
(void *)t->vec,
t->nr_vec,
t->start << 9);
- if (zc) {
+ io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
+ if (auto_zc || zc) {
sqe[i]->buf_index = tag;
- io_uring_sqe_set_flags(sqe[i],
- IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK);
- } else {
- io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
+ if (zc)
+ sqe[i]->flags |= IOSQE_IO_HARDLINK;
}
/* bit63 marks us as tgt io */
sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, 1);
diff --git a/tools/testing/selftests/ublk/test_generic_08.sh b/tools/testing/selftests/ublk/test_generic_08.sh
new file mode 100755
index 000000000000..b222f3a77e12
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_08.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_08"
+ERR_CODE=0
+
+if ! _have_feature "AUTO_BUF_REG"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "generic" "test UBLK_F_AUTO_BUF_REG"
+
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+dev_id=$(_add_ublk_dev -t loop -q 2 --auto_zc "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
+
+if ! _mkfs_mount_test /dev/ublkb"${dev_id}"; then
+ _cleanup_test "generic"
+ _show_result $TID 255
+fi
+
+dev_id=$(_add_ublk_dev -t stripe --auto_zc "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
+_mkfs_mount_test /dev/ublkb"${dev_id}"
+ERR_CODE=$?
+
+_cleanup_test "generic"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh
index e0854f71d35b..b5a5520dcae6 100755
--- a/tools/testing/selftests/ublk/test_stress_03.sh
+++ b/tools/testing/selftests/ublk/test_stress_03.sh
@@ -32,6 +32,12 @@ _create_backfile 2 128M
ublk_io_and_remove 8G -t null -q 4 -z &
ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+
+if _have_feature "AUTO_BUF_REG"; then
+ ublk_io_and_remove 8G -t null -q 4 --auto_zc &
+ ublk_io_and_remove 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" &
+ ublk_io_and_remove 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+fi
wait
_cleanup_test "stress"
diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh
index 1798a98387e8..5b49a8025002 100755
--- a/tools/testing/selftests/ublk/test_stress_04.sh
+++ b/tools/testing/selftests/ublk/test_stress_04.sh
@@ -31,6 +31,12 @@ _create_backfile 2 128M
ublk_io_and_kill_daemon 8G -t null -q 4 -z &
ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+
+if _have_feature "AUTO_BUF_REG"; then
+ ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc &
+ ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" &
+ ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+fi
wait
_cleanup_test "stress"
diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh
index 88601b48f1cd..6f758f6070a5 100755
--- a/tools/testing/selftests/ublk/test_stress_05.sh
+++ b/tools/testing/selftests/ublk/test_stress_05.sh
@@ -60,5 +60,13 @@ if _have_feature "ZERO_COPY"; then
done
fi
+if _have_feature "AUTO_BUF_REG"; then
+ for reissue in $(seq 0 1); do
+ ublk_io_and_remove 8G -t null -q 4 -g --auto_zc -r 1 -i "$reissue" &
+ ublk_io_and_remove 256M -t loop -q 4 -g --auto_zc -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
+ wait
+ done
+fi
+
_cleanup_test "stress"
_show_result $TID $ERR_CODE
--
2.47.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH V5 6/6] selftests: ublk: add test for covering UBLK_AUTO_BUF_REG_FALLBACK
2025-05-20 4:54 [PATCH V5 0/6] ublk: support to register bvec buffer automatically Ming Lei
` (4 preceding siblings ...)
2025-05-20 4:54 ` [PATCH V5 5/6] selftests: ublk: support UBLK_F_AUTO_BUF_REG Ming Lei
@ 2025-05-20 4:54 ` Ming Lei
2025-05-20 16:25 ` [PATCH V5 0/6] ublk: support to register bvec buffer automatically Jens Axboe
6 siblings, 0 replies; 13+ messages in thread
From: Ming Lei @ 2025-05-20 4:54 UTC (permalink / raw)
To: Jens Axboe, linux-block; +Cc: Uday Shankar, Caleb Sander Mateos, Ming Lei
Add test for covering UBLK_AUTO_BUF_REG_FALLBACK:
- pass '--auto_zc_fallback' to null target, which requires both F_AUTO_BUF_REG
and F_SUPPORT_ZERO_COPY for handling UBLK_AUTO_BUF_REG_FALLBACK
- add ->buf_index() method for returning invalid buffer index to trigger
UBLK_AUTO_BUF_REG_FALLBACK
- add generic_09 for running the test
- add --auto_zc_fallback test in stress_03/stress_04/stress_05
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
tools/testing/selftests/ublk/Makefile | 1 +
tools/testing/selftests/ublk/fault_inject.c | 5 ++
tools/testing/selftests/ublk/file_backed.c | 5 ++
tools/testing/selftests/ublk/kublk.c | 49 ++++++++++++++-----
tools/testing/selftests/ublk/kublk.h | 11 +++++
tools/testing/selftests/ublk/null.c | 14 +++++-
tools/testing/selftests/ublk/stripe.c | 5 ++
.../testing/selftests/ublk/test_generic_09.sh | 28 +++++++++++
.../testing/selftests/ublk/test_stress_03.sh | 1 +
.../testing/selftests/ublk/test_stress_04.sh | 1 +
.../testing/selftests/ublk/test_stress_05.sh | 1 +
11 files changed, 108 insertions(+), 13 deletions(-)
create mode 100755 tools/testing/selftests/ublk/test_generic_09.sh
diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile
index 14d574ac142c..2a2ef0cb54bc 100644
--- a/tools/testing/selftests/ublk/Makefile
+++ b/tools/testing/selftests/ublk/Makefile
@@ -16,6 +16,7 @@ TEST_PROGS += test_generic_06.sh
TEST_PROGS += test_generic_07.sh
TEST_PROGS += test_generic_08.sh
+TEST_PROGS += test_generic_09.sh
TEST_PROGS += test_null_01.sh
TEST_PROGS += test_null_02.sh
diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c
index 94a8e729ba4c..5421774d7867 100644
--- a/tools/testing/selftests/ublk/fault_inject.c
+++ b/tools/testing/selftests/ublk/fault_inject.c
@@ -16,6 +16,11 @@ static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx,
const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
unsigned long dev_size = 250UL << 30;
+ if (ctx->auto_zc_fallback) {
+ ublk_err("%s: not support auto_zc_fallback\n", __func__);
+ return -EINVAL;
+ }
+
dev->tgt.dev_size = dev_size;
dev->tgt.params = (struct ublk_params) {
.types = UBLK_PARAM_TYPE_BASIC,
diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c
index 9dc00b217a66..509842df9bee 100644
--- a/tools/testing/selftests/ublk/file_backed.c
+++ b/tools/testing/selftests/ublk/file_backed.c
@@ -149,6 +149,11 @@ static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
},
};
+ if (ctx->auto_zc_fallback) {
+ ublk_err("%s: not support auto_zc_fallback\n", __func__);
+ return -EINVAL;
+ }
+
ret = backing_file_tgt_init(dev);
if (ret)
return ret;
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index 5c02e78c5fcd..c429a20ab51e 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -405,7 +405,7 @@ static void ublk_queue_deinit(struct ublk_queue *q)
free(q->ios[i].buf_addr);
}
-static int ublk_queue_init(struct ublk_queue *q)
+static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
{
struct ublk_dev *dev = q->dev;
int depth = dev->dev_info.queue_depth;
@@ -427,6 +427,7 @@ static int ublk_queue_init(struct ublk_queue *q)
if (dev->dev_info.flags & UBLK_F_AUTO_BUF_REG)
q->state |= UBLKSRV_AUTO_BUF_REG;
}
+ q->state |= extra_flags;
cmd_buf_size = ublk_queue_cmd_buf_sz(q);
off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz();
@@ -528,14 +529,19 @@ static void ublk_dev_unprep(struct ublk_dev *dev)
close(dev->fds[0]);
}
-static void ublk_set_auto_buf_reg(struct io_uring_sqe *sqe,
- unsigned short buf_idx,
- unsigned char flags)
+static void ublk_set_auto_buf_reg(const struct ublk_queue *q,
+ struct io_uring_sqe *sqe,
+ unsigned short tag)
{
- struct ublk_auto_buf_reg buf = {
- .index = buf_idx,
- .flags = flags,
- };
+ struct ublk_auto_buf_reg buf = {};
+
+ if (q->tgt_ops->buf_index)
+ buf.index = q->tgt_ops->buf_index(q, tag);
+ else
+ buf.index = tag;
+
+ if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK)
+ buf.flags = UBLK_AUTO_BUF_REG_FALLBACK;
sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf);
}
@@ -595,7 +601,7 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
cmd->addr = 0;
if (q->state & UBLKSRV_AUTO_BUF_REG)
- ublk_set_auto_buf_reg(sqe[0], tag, 0);
+ ublk_set_auto_buf_reg(q, sqe[0], tag);
user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0);
io_uring_sqe_set_data64(sqe[0], user_data);
@@ -747,6 +753,7 @@ struct ublk_queue_info {
struct ublk_queue *q;
sem_t *queue_sem;
cpu_set_t *affinity;
+ unsigned char auto_zc_fallback;
};
static void *ublk_io_handler_fn(void *data)
@@ -754,9 +761,13 @@ static void *ublk_io_handler_fn(void *data)
struct ublk_queue_info *info = data;
struct ublk_queue *q = info->q;
int dev_id = q->dev->dev_info.dev_id;
+ unsigned extra_flags = 0;
int ret;
- ret = ublk_queue_init(q);
+ if (info->auto_zc_fallback)
+ extra_flags = UBLKSRV_AUTO_BUF_REG_FALLBACK;
+
+ ret = ublk_queue_init(q, extra_flags);
if (ret) {
ublk_err("ublk dev %d queue %d init queue failed\n",
dev_id, q->q_id);
@@ -849,6 +860,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
qinfo[i].q = &dev->q[i];
qinfo[i].queue_sem = &queue_sem;
qinfo[i].affinity = &affinity_buf[i];
+ qinfo[i].auto_zc_fallback = ctx->auto_zc_fallback;
pthread_create(&dev->q[i].thread, NULL,
ublk_io_handler_fn,
&qinfo[i]);
@@ -1264,7 +1276,7 @@ static void __cmd_create_help(char *exe, bool recovery)
printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n",
exe, recovery ? "recover" : "add");
- printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--debug_mask mask] [-r 0|1 ] [-g]\n");
+ printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n");
printf("\t[-e 0|1 ] [-i 0|1]\n");
printf("\t[target options] [backfile1] [backfile2] ...\n");
printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
@@ -1319,7 +1331,8 @@ int main(int argc, char *argv[])
{ "recovery_fail_io", 1, NULL, 'e'},
{ "recovery_reissue", 1, NULL, 'i'},
{ "get_data", 1, NULL, 'g'},
- { "auto_zc", 0, NULL, 0},
+ { "auto_zc", 0, NULL, 0 },
+ { "auto_zc_fallback", 0, NULL, 0 },
{ 0, 0, 0, 0 }
};
const struct ublk_tgt_ops *ops = NULL;
@@ -1390,6 +1403,8 @@ int main(int argc, char *argv[])
ctx.fg = 1;
if (!strcmp(longopts[option_idx].name, "auto_zc"))
ctx.flags |= UBLK_F_AUTO_BUF_REG;
+ if (!strcmp(longopts[option_idx].name, "auto_zc_fallback"))
+ ctx.auto_zc_fallback = 1;
break;
case '?':
/*
@@ -1413,6 +1428,16 @@ int main(int argc, char *argv[])
}
}
+ /* auto_zc_fallback depends on F_AUTO_BUF_REG & F_SUPPORT_ZERO_COPY */
+ if (ctx.auto_zc_fallback &&
+ !((ctx.flags & UBLK_F_AUTO_BUF_REG) &&
+ (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY))) {
+ ublk_err("%s: auto_zc_fallback is set but neither "
+ "F_AUTO_BUF_REG nor F_SUPPORT_ZERO_COPY is enabled\n",
+ __func__);
+ return -EINVAL;
+ }
+
i = optind;
while (i < argc && ctx.nr_files < MAX_BACK_FILES) {
ctx.files[ctx.nr_files++] = argv[i++];
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index ebbfad9e70aa..9af930e951a3 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -84,6 +84,7 @@ struct dev_ctx {
unsigned int all:1;
unsigned int fg:1;
unsigned int recovery:1;
+ unsigned int auto_zc_fallback:1;
int _evtfd;
int _shmid;
@@ -141,6 +142,9 @@ struct ublk_tgt_ops {
*/
void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]);
void (*usage)(const struct ublk_tgt_ops *ops);
+
+ /* return buffer index for UBLK_F_AUTO_BUF_REG */
+ unsigned short (*buf_index)(const struct ublk_queue *, int tag);
};
struct ublk_tgt {
@@ -170,6 +174,7 @@ struct ublk_queue {
#define UBLKSRV_NO_BUF (1U << 2)
#define UBLKSRV_ZC (1U << 3)
#define UBLKSRV_AUTO_BUF_REG (1U << 4)
+#define UBLKSRV_AUTO_BUF_REG_FALLBACK (1U << 5)
unsigned state;
pid_t tid;
pthread_t thread;
@@ -205,6 +210,12 @@ struct ublk_dev {
extern unsigned int ublk_dbg_mask;
extern int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag);
+
+static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod)
+{
+ return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF);
+}
+
static inline int is_target_io(__u64 user_data)
{
return (user_data & (1ULL << 63)) != 0;
diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c
index 1362dd422c6e..44aca31cf2b0 100644
--- a/tools/testing/selftests/ublk/null.c
+++ b/tools/testing/selftests/ublk/null.c
@@ -116,7 +116,7 @@ static int ublk_null_queue_io(struct ublk_queue *q, int tag)
unsigned zc = ublk_queue_use_zc(q);
int queued;
- if (auto_zc)
+ if (auto_zc && !ublk_io_auto_zc_fallback(iod))
queued = null_queue_auto_zc_io(q, tag);
else if (zc)
queued = null_queue_zc_io(q, tag);
@@ -128,9 +128,21 @@ static int ublk_null_queue_io(struct ublk_queue *q, int tag)
return 0;
}
+/*
+ * return invalid buffer index for triggering auto buffer register failure,
+ * then UBLK_IO_RES_NEED_REG_BUF handling is covered
+ */
+static unsigned short ublk_null_buf_index(const struct ublk_queue *q, int tag)
+{
+ if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK)
+ return (unsigned short)-1;
+ return tag;
+}
+
const struct ublk_tgt_ops null_tgt_ops = {
.name = "null",
.init_tgt = ublk_null_tgt_init,
.queue_io = ublk_null_queue_io,
.tgt_io_done = ublk_null_io_done,
+ .buf_index = ublk_null_buf_index,
};
diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c
index 8fd8faeb5e76..404a143bf3d6 100644
--- a/tools/testing/selftests/ublk/stripe.c
+++ b/tools/testing/selftests/ublk/stripe.c
@@ -288,6 +288,11 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
loff_t bytes = 0;
int ret, i, mul = 1;
+ if (ctx->auto_zc_fallback) {
+ ublk_err("%s: not support auto_zc_fallback\n", __func__);
+ return -EINVAL;
+ }
+
if ((chunk_size & (chunk_size - 1)) || !chunk_size) {
ublk_err("invalid chunk size %u\n", chunk_size);
return -EINVAL;
diff --git a/tools/testing/selftests/ublk/test_generic_09.sh b/tools/testing/selftests/ublk/test_generic_09.sh
new file mode 100755
index 000000000000..bb6f77ca5522
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_09.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_09"
+ERR_CODE=0
+
+if ! _have_feature "AUTO_BUF_REG"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "null" "basic IO test"
+
+dev_id=$(_add_ublk_dev -t null -z --auto_zc --auto_zc_fallback)
+_check_add_dev $TID $?
+
+# run fio over the two disks
+fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1
+ERR_CODE=$?
+
+_cleanup_test "null"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh
index b5a5520dcae6..7d728ce50774 100755
--- a/tools/testing/selftests/ublk/test_stress_03.sh
+++ b/tools/testing/selftests/ublk/test_stress_03.sh
@@ -37,6 +37,7 @@ if _have_feature "AUTO_BUF_REG"; then
ublk_io_and_remove 8G -t null -q 4 --auto_zc &
ublk_io_and_remove 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" &
ublk_io_and_remove 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+ ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback &
fi
wait
diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh
index 5b49a8025002..9bcfa64ea1f0 100755
--- a/tools/testing/selftests/ublk/test_stress_04.sh
+++ b/tools/testing/selftests/ublk/test_stress_04.sh
@@ -36,6 +36,7 @@ if _have_feature "AUTO_BUF_REG"; then
ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc &
ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" &
ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+ ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback &
fi
wait
diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh
index 6f758f6070a5..bcfc904cefc6 100755
--- a/tools/testing/selftests/ublk/test_stress_05.sh
+++ b/tools/testing/selftests/ublk/test_stress_05.sh
@@ -64,6 +64,7 @@ if _have_feature "AUTO_BUF_REG"; then
for reissue in $(seq 0 1); do
ublk_io_and_remove 8G -t null -q 4 -g --auto_zc -r 1 -i "$reissue" &
ublk_io_and_remove 256M -t loop -q 4 -g --auto_zc -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
+ ublk_io_and_remove 8G -t null -q 4 -g -z --auto_zc --auto_zc_fallback -r 1 -i "$reissue" &
wait
done
fi
--
2.47.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH V5 0/6] ublk: support to register bvec buffer automatically
2025-05-20 4:54 [PATCH V5 0/6] ublk: support to register bvec buffer automatically Ming Lei
` (5 preceding siblings ...)
2025-05-20 4:54 ` [PATCH V5 6/6] selftests: ublk: add test for covering UBLK_AUTO_BUF_REG_FALLBACK Ming Lei
@ 2025-05-20 16:25 ` Jens Axboe
6 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2025-05-20 16:25 UTC (permalink / raw)
To: linux-block, Ming Lei; +Cc: Uday Shankar, Caleb Sander Mateos
On Tue, 20 May 2025 12:54:30 +0800, Ming Lei wrote:
> This patch tries to address limitation from in-tree ublk zero copy:
>
> - one IO needs two extra uring_cmd for register/unregister bvec buffer, not
> efficient
>
> - introduced dependency on the two buffer register/unregister uring_cmd, so
> buffer consumer has to linked with the two uring_cmd, hard to use & less
> efficient
>
> [...]
Applied, thanks!
[1/6] ublk: convert to refcount_t
commit: b1c3b4695a4d5f7a3bf43f1f7eb774bfa79b86a7
[2/6] ublk: prepare for supporting to register request buffer automatically
commit: 9e6b4756b35426801cae84a5a1d7a3e0d480d197
[3/6] ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG
commit: 99c1e4eb6a3fbbec27c7c70e5fce15cdc1422893
[4/6] ublk: support UBLK_AUTO_BUF_REG_FALLBACK
commit: 53f427e7944b4f288866cc4a69835086e0958c6a
[5/6] selftests: ublk: support UBLK_F_AUTO_BUF_REG
commit: 8ccebc19ee3db03284504d340e5cd2de4141350b
[6/6] selftests: ublk: add test for covering UBLK_AUTO_BUF_REG_FALLBACK
commit: 6f1a182a8750a679698366b021969a57ec589313
Best regards,
--
Jens Axboe
^ permalink raw reply [flat|nested] 13+ messages in thread