From: Zhu Yanjun <yanjun.zhu@linux.dev>
To: Ming Lei <ming.lei@redhat.com>, Jens Axboe <axboe@kernel.dk>,
linux-block@vger.kernel.org
Cc: Uday Shankar <ushankar@purestorage.com>,
Caleb Sander Mateos <csander@purestorage.com>,
Keith Busch <kbusch@kernel.org>
Subject: Re: [PATCH V2 4/6] ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG
Date: Wed, 7 May 2025 14:28:12 +0200 [thread overview]
Message-ID: <ce2ef739-75e3-48b3-857f-dddcbcc3d9db@linux.dev> (raw)
In-Reply-To: <20250507043859.2978132-5-ming.lei@redhat.com>
在 2025/5/7 6:38, Ming Lei 写道:
> Add UBLK_F_AUTO_BUF_REG for supporting to register buffer automatically
> to local io_uring context with provided buffer index.
>
> Add UAPI structure `struct ublk_auto_buf_reg` for holding user parameter
> to register request buffer automatically, one 'flags' field is defined, and
> there is still 32bit available for future extension, such as, adding one
> io_ring FD field for registering buffer to external io_uring.
>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
> drivers/block/ublk_drv.c | 59 +++++++++++++++++++++++++-------
> include/uapi/linux/ublk_cmd.h | 64 +++++++++++++++++++++++++++++++++++
> 2 files changed, 111 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index 87de455bec46..f2a57e633823 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -66,7 +66,8 @@
> | UBLK_F_USER_COPY \
> | UBLK_F_ZONED \
> | UBLK_F_USER_RECOVERY_FAIL_IO \
> - | UBLK_F_UPDATE_SIZE)
> + | UBLK_F_UPDATE_SIZE \
> + | UBLK_F_AUTO_BUF_REG)
>
> #define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \
> | UBLK_F_USER_RECOVERY_REISSUE \
> @@ -146,7 +147,16 @@ struct ublk_uring_cmd_pdu {
>
> struct ublk_io {
> /* userspace buffer address from io cmd */
> - __u64 addr;
> + union {
> + __u64 addr;
> +
> + /*
> + * Shared space with `addr`, see comment on
> + * `struct ublksrv_io_cmd` for the reason why it is safe to
> + * reuse '->addr' space
> + */
> + struct ublk_auto_buf_reg buf;
> + };
> unsigned int flags;
> int res;
>
> @@ -631,7 +641,7 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq)
>
> static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq)
> {
> - return false;
> + return ubq->flags & UBLK_F_AUTO_BUF_REG;
> }
>
> static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
> @@ -1176,16 +1186,31 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
> blk_mq_end_request(rq, BLK_STS_IOERR);
> }
>
> +static void ublk_auto_buf_reg_fallback(struct request *req, struct ublk_io *io,
> + unsigned int issue_flags)
> +{
> + const struct ublk_queue *ubq = req->mq_hctx->driver_data;
> + struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag);
> + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
> +
> + iod->op_flags |= UBLK_IO_F_NEED_REG_BUF;
> + refcount_set(&data->ref, 1);
> + ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags);
> +}
> +
> static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io,
> unsigned int issue_flags)
> {
> struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
> int ret;
>
> - ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release, 0,
> - issue_flags);
> + ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release,
> + io->buf.index, issue_flags);
> if (ret) {
> - blk_mq_end_request(req, BLK_STS_IOERR);
> + if (io->buf.flags & UBLK_AUTO_BUF_REG_FALLBACK)
> + ublk_auto_buf_reg_fallback(req, io, issue_flags);
> + else
> + blk_mq_end_request(req, BLK_STS_IOERR);
> return false;
> }
> /* one extra reference is dropped by ublk_io_release */
> @@ -2028,7 +2053,7 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
> */
> if (!buf_addr && !ublk_need_get_data(ubq))
> goto out;
> - } else if (buf_addr) {
> + } else if (buf_addr && !ublk_support_auto_buf_reg(ubq)) {
> /* User copy requires addr to be unset */
> ret = -EINVAL;
> goto out;
> @@ -2044,7 +2069,7 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
> static void ublk_auto_buf_unreg(struct ublk_io *io, struct io_uring_cmd *cmd,
> struct request *req, unsigned int issue_flags)
> {
> - WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, 0, issue_flags));
> + WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, io->buf.index, issue_flags));
> io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG;
> }
>
> @@ -2063,7 +2088,8 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
> if (!ub_cmd->addr && (!ublk_need_get_data(ubq) ||
> req_op(req) == REQ_OP_READ))
> return -EINVAL;
> - } else if (req_op(req) != REQ_OP_ZONE_APPEND && ub_cmd->addr) {
> + } else if ((req_op(req) != REQ_OP_ZONE_APPEND &&
> + !ublk_support_auto_buf_reg(ubq)) && ub_cmd->addr) {
> /*
> * User copy requires addr to be unset when command is
> * not zone append
> @@ -2807,8 +2833,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
> * For USER_COPY, we depends on userspace to fill request
> * buffer by pwrite() to ublk char device, which can't be
> * used for unprivileged device
> + *
> + * Same with zero copy or auto buffer register.
> */
> - if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
> + if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
> + UBLK_F_AUTO_BUF_REG))
> return -EINVAL;
> }
>
> @@ -2866,17 +2895,22 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
> UBLK_F_URING_CMD_COMP_IN_TASK;
>
> /* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */
> - if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
> + if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
> + UBLK_F_AUTO_BUF_REG))
> ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA;
>
> /*
> * Zoned storage support requires reuse `ublksrv_io_cmd->addr` for
> * returning write_append_lba, which is only allowed in case of
> * user copy or zero copy
> + *
> + * UBLK_F_AUTO_BUF_REG can't be enabled for zoned because it need
> + * the space for getting ring_fd and buffer index.
> */
> if (ublk_dev_is_zoned(ub) &&
> (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !(ub->dev_info.flags &
> - (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)))) {
> + (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)) ||
> + (ub->dev_info.flags & UBLK_F_AUTO_BUF_REG))) {
> ret = -EINVAL;
> goto out_free_dev_number;
> }
> @@ -3393,6 +3427,7 @@ static int __init ublk_init(void)
>
> BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET +
> UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET);
> + BUILD_BUG_ON(sizeof(struct ublk_auto_buf_reg) > sizeof(__u64));
>
> init_waitqueue_head(&ublk_idr_wq);
>
> diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
> index be5c6c6b16e0..a77b0cd7af48 100644
> --- a/include/uapi/linux/ublk_cmd.h
> +++ b/include/uapi/linux/ublk_cmd.h
> @@ -219,6 +219,27 @@
> */
> #define UBLK_F_UPDATE_SIZE (1ULL << 10)
>
> +/*
> + * request buffer is registered automatically to uring_cmd's io_uring
> + * context before delivering this io command to ublk server, meantime
> + * it is un-registered automatically when completing this io command.
> + *
> + * For using this feature:
> + *
> + * - ublk server has to create sparse buffer table
> + *
> + * - pass buffer index from `ublksrv_io_cmd.buf.index`
> + *
> + * - pass flags from `ublksrv_io_cmd.buf.flags` if needed
> + *
> + * This way avoids extra cost from two uring_cmd, but also simplifies backend
> + * implementation, such as, the dependency on IO_REGISTER_IO_BUF and
> + * IO_UNREGISTER_IO_BUF becomes not necessary.
> + *
> + * This feature isn't available for UBLK_F_ZONED
> + */
> +#define UBLK_F_AUTO_BUF_REG (1ULL << 11)
> +
> /* device state */
> #define UBLK_S_DEV_DEAD 0
> #define UBLK_S_DEV_LIVE 1
> @@ -305,6 +326,17 @@ struct ublksrv_ctrl_dev_info {
> #define UBLK_IO_F_FUA (1U << 13)
> #define UBLK_IO_F_NOUNMAP (1U << 15)
> #define UBLK_IO_F_SWAP (1U << 16)
> +/*
> + * For UBLK_F_AUTO_BUF_REG & UBLK_AUTO_BUF_REG_FALLBACK only.
> + *
> + * This flag is set if auto buffer register is failed & ublk server passes
> + * UBLK_AUTO_BUF_REG_FALLBACK, and ublk server need to register buffer
> + * manually for handling the delivered IO command if this flag is observed
> + *
> + * ublk server has to check this flag if UBLK_AUTO_BUF_REG_FALLBACK is
> + * passed in.
> + */
> +#define UBLK_IO_F_NEED_REG_BUF (1U << 17)
Replace (1U << 17) with BIT(17) ?
>
> /*
> * io cmd is described by this structure, and stored in share memory, indexed
> @@ -339,6 +371,30 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod)
> return iod->op_flags >> 8;
> }
>
> +/*
> + * If this flag is set, fallback by completing the uring_cmd and setting
> + * `UBLK_IO_F_NEED_REG_BUF` in case of auto-buf-register failure;
> + * otherwise the client ublk request is failed silently
> + *
> + * If ublk server passes this flag, it has to check if UBLK_IO_F_NEED_REG_BUF
> + * is set in `ublksrv_io_desc.op_flags`. If UBLK_IO_F_NEED_REG_BUF is set,
> + * ublk server needs to register io buffer manually for handling IO command.
> + */
> +#define UBLK_AUTO_BUF_REG_FALLBACK (1 << 0)
Replace (1 << 0) with BIT(0) ?
Zhu Yanjun
> +
> +struct ublk_auto_buf_reg {
> + /* index for registering the delivered request buffer */
> + __u16 index;
> + __u8 flags;
> + __u8 _pad;
> +
> + /*
> + * io_ring FD can be passed via the reserve field in future for
> + * supporting to register io buffer to external io_uring
> + */
> + __u32 reserved;
> +};
> +
> /* issued to ublk driver via /dev/ublkcN */
> struct ublksrv_io_cmd {
> __u16 q_id;
> @@ -363,6 +419,14 @@ struct ublksrv_io_cmd {
> */
> __u64 addr;
> __u64 zone_append_lba;
> +
> + /*
> + * for AUTO_BUF_REG feature, F_ZONED can't be supported,
> + * meantime ->addr is available for zero copy because
> + * AUTO_BUF_REG is zero copy too & needn't user backed
> + * buffer.
> + */
> + struct ublk_auto_buf_reg auto_buf;
> };
> };
>
next prev parent reply other threads:[~2025-05-07 12:28 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-07 4:38 [PATCH V2 0/6] ublk: support to register bvec buffer automatically Ming Lei
2025-05-07 4:38 ` [PATCH V2 1/6] ublk: allow io buffer register/unregister command issued from other task contexts Ming Lei
2025-05-07 4:38 ` [PATCH V2 2/6] ublk: convert to refcount_t Ming Lei
2025-05-07 4:38 ` [PATCH V2 3/6] ublk: prepare for supporting to register request buffer automatically Ming Lei
2025-05-07 4:38 ` [PATCH V2 4/6] ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG Ming Lei
2025-05-07 12:28 ` Zhu Yanjun [this message]
2025-05-07 4:38 ` [PATCH V2 5/6] selftests: ublk: support UBLK_F_AUTO_BUF_REG Ming Lei
2025-05-07 4:38 ` [PATCH V2 6/6] selftests: ublk: add test for covering UBLK_AUTO_BUF_REG_FALLBACK Ming Lei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ce2ef739-75e3-48b3-857f-dddcbcc3d9db@linux.dev \
--to=yanjun.zhu@linux.dev \
--cc=axboe@kernel.dk \
--cc=csander@purestorage.com \
--cc=kbusch@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=ming.lei@redhat.com \
--cc=ushankar@purestorage.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.