From: Ming Lei <ming.lei@redhat.com>
To: Caleb Sander Mateos <csander@purestorage.com>
Cc: Jens Axboe <axboe@kernel.dk>,
io-uring@vger.kernel.org, Pavel Begunkov <asml.silence@gmail.com>,
linux-block@vger.kernel.org,
Uday Shankar <ushankar@purestorage.com>,
Keith Busch <kbusch@kernel.org>
Subject: Re: [RFC PATCH 6/7] ublk: register buffer to specified io_uring & buf index via UBLK_F_AUTO_BUF_REG
Date: Wed, 30 Apr 2025 23:45:23 +0800 [thread overview]
Message-ID: <aBJFk0FuWwt9GpC_@fedora> (raw)
In-Reply-To: <CADUfDZrFDbYmnm7LEt94UVhn-tqGM6Fnfqvc2fuq8OqQPdNu3Q@mail.gmail.com>
On Mon, Apr 28, 2025 at 05:52:28PM -0700, Caleb Sander Mateos wrote:
> On Mon, Apr 28, 2025 at 2:45 AM Ming Lei <ming.lei@redhat.com> wrote:
> >
> > Add UBLK_F_AUTO_BUF_REG for supporting to register buffer automatically
> > to specified io_uring context and buffer index.
> >
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> > drivers/block/ublk_drv.c | 56 ++++++++++++++++++++++++++++-------
> > include/uapi/linux/ublk_cmd.h | 38 ++++++++++++++++++++++++
> > 2 files changed, 84 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> > index 1fd20e481a60..e82618442749 100644
> > --- a/drivers/block/ublk_drv.c
> > +++ b/drivers/block/ublk_drv.c
> > @@ -66,7 +66,8 @@
> > | UBLK_F_USER_COPY \
> > | UBLK_F_ZONED \
> > | UBLK_F_USER_RECOVERY_FAIL_IO \
> > - | UBLK_F_UPDATE_SIZE)
> > + | UBLK_F_UPDATE_SIZE \
> > + | UBLK_F_AUTO_BUF_REG)
> >
> > #define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \
> > | UBLK_F_USER_RECOVERY_REISSUE \
> > @@ -146,7 +147,10 @@ struct ublk_uring_cmd_pdu {
> >
> > struct ublk_io {
> > /* userspace buffer address from io cmd */
> > - __u64 addr;
> > + union {
> > + __u64 addr;
> > + struct ublk_auto_buf_reg buf;
>
> Maybe add a comment justifying why these fields can overlap? From my
> understanding, buf is valid iff UBLK_F_AUTO_BUF_REG is set on the
> ublk_queue and addr is valid iff neither UBLK_F_USER_COPY,
> UBLK_F_SUPPORT_ZERO_COPY, nor UBLK_F_AUTO_BUF_REG is set.
->addr is for storing the userspace buffer, which is only used in
non-zc cases(zc, auto_buf_reg) or user copy case.
>
> > + };
> > unsigned int flags;
> > int res;
> >
> > @@ -626,7 +630,7 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq)
> >
> > static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq)
> > {
> > - return false;
> > + return ubq->flags & UBLK_F_AUTO_BUF_REG;
> > }
> >
> > static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
> > @@ -1177,6 +1181,16 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
> > blk_mq_end_request(rq, BLK_STS_IOERR);
> > }
> >
> > +
> > +static inline void ublk_init_auto_buf_reg(const struct ublk_io *io,
> > + struct io_buf_data *data)
> > +{
> > + data->index = io->buf.index;
> > + data->ring_fd = io->buf.ring_fd;
> > + data->has_fd = true;
> > + data->registered_fd = io->buf.flags & UBLK_AUTO_BUF_REGISTERED_RING;
> > +}
> > +
> > static bool ublk_auto_buf_reg(struct ublk_queue *ubq, struct request *req,
> > struct ublk_io *io, unsigned int issue_flags)
> > {
> > @@ -1187,6 +1201,9 @@ static bool ublk_auto_buf_reg(struct ublk_queue *ubq, struct request *req,
> > };
> > int ret;
> >
> > + if (ublk_support_auto_buf_reg(ubq))
>
> This check seems redundant with the check in the caller? Same comment
> about ublk_auto_buf_unreg(). That would allow you to avoid adding the
> ubq argument to ublk_auto_buf_unreg().
Yeah, actually I removed one feature which just registers buffer to
the uring command context, then forget to update the check.
>
> > + ublk_init_auto_buf_reg(io, &data);
> > +
> > /* one extra reference is dropped by ublk_io_release */
> > ublk_init_req_ref(ubq, req, 2);
> > ret = io_buffer_register_bvec(io->cmd, &data, issue_flags);
> > @@ -2045,7 +2062,7 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
> > */
> > if (!buf_addr && !ublk_need_get_data(ubq))
> > goto out;
> > - } else if (buf_addr) {
> > + } else if (buf_addr && !ublk_support_auto_buf_reg(ubq)) {
> > /* User copy requires addr to be unset */
> > ret = -EINVAL;
> > goto out;
> > @@ -2058,13 +2075,17 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
> > return ret;
> > }
> >
> > -static void ublk_auto_buf_unreg(struct ublk_io *io, struct io_uring_cmd *cmd,
> > +static void ublk_auto_buf_unreg(const struct ublk_queue *ubq,
> > + struct ublk_io *io, struct io_uring_cmd *cmd,
> > struct request *req, unsigned int issue_flags)
> > {
> > struct io_buf_data data = {
> > .index = req->tag,
> > };
> >
> > + if (ublk_support_auto_buf_reg(ubq))
> > + ublk_init_auto_buf_reg(io, &data);
> > +
> > WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, &data, issue_flags));
> > io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG;
> > }
> > @@ -2088,7 +2109,8 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
> > if (!ub_cmd->addr && (!ublk_need_get_data(ubq) ||
> > req_op(req) == REQ_OP_READ))
> > return -EINVAL;
> > - } else if (req_op(req) != REQ_OP_ZONE_APPEND && ub_cmd->addr) {
> > + } else if ((req_op(req) != REQ_OP_ZONE_APPEND &&
> > + !ublk_support_auto_buf_reg(ubq)) && ub_cmd->addr) {
> > /*
> > * User copy requires addr to be unset when command is
> > * not zone append
> > @@ -2097,7 +2119,7 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
> > }
> >
> > if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG)
> > - ublk_auto_buf_unreg(io, cmd, req, issue_flags);
> > + ublk_auto_buf_unreg(ubq, io, cmd, req, issue_flags);
> >
> > ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
> >
> > @@ -2788,6 +2810,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
> > else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV))
> > return -EPERM;
> >
> > + /* F_AUTO_BUF_REG and F_SUPPORT_ZERO_COPY can't co-exist */
> > + if ((info.flags & UBLK_F_AUTO_BUF_REG) &&
> > + (info.flags & UBLK_F_SUPPORT_ZERO_COPY))
> > + return -EINVAL;
> > +
> > /* forbid nonsense combinations of recovery flags */
> > switch (info.flags & UBLK_F_ALL_RECOVERY_FLAGS) {
> > case 0:
> > @@ -2817,8 +2844,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
> > * For USER_COPY, we depends on userspace to fill request
> > * buffer by pwrite() to ublk char device, which can't be
> > * used for unprivileged device
> > + *
> > + * Same with zero copy or auto buffer register.
> > */
> > - if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
> > + if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
> > + UBLK_F_AUTO_BUF_REG))
> > return -EINVAL;
> > }
> >
> > @@ -2876,17 +2906,22 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
> > UBLK_F_URING_CMD_COMP_IN_TASK;
> >
> > /* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */
> > - if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
> > + if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
> > + UBLK_F_AUTO_BUF_REG))
> > ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA;
> >
> > /*
> > * Zoned storage support requires reuse `ublksrv_io_cmd->addr` for
> > * returning write_append_lba, which is only allowed in case of
> > * user copy or zero copy
> > + *
> > + * UBLK_F_AUTO_BUF_REG can't be enabled for zoned because it need
> > + * the space for getting ring_fd and buffer index.
> > */
> > if (ublk_dev_is_zoned(ub) &&
> > (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !(ub->dev_info.flags &
> > - (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)))) {
> > + (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)) ||
> > + (ub->dev_info.flags & UBLK_F_AUTO_BUF_REG))) {
> > ret = -EINVAL;
> > goto out_free_dev_number;
> > }
> > @@ -3403,6 +3438,7 @@ static int __init ublk_init(void)
> >
> > BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET +
> > UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET);
> > + BUILD_BUG_ON(sizeof(struct ublk_auto_buf_reg) != sizeof(__u64));
> >
> > init_waitqueue_head(&ublk_idr_wq);
> >
> > diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
> > index be5c6c6b16e0..3d7c8c69cf06 100644
> > --- a/include/uapi/linux/ublk_cmd.h
> > +++ b/include/uapi/linux/ublk_cmd.h
> > @@ -219,6 +219,30 @@
> > */
> > #define UBLK_F_UPDATE_SIZE (1ULL << 10)
> >
> > +/*
> > + * request buffer is registered automatically to ublk server specified
> > + * io_uring context before delivering this io command to ublk server,
> > + * meantime it is un-registered automatically when completing this io
> > + * command.
> > + *
> > + * For using this feature:
> > + *
> > + * - ublk server has to create sparse buffer table
> > + *
> > + * - pass io_ring context FD from `ublksrv_io_cmd.buf.ring_fd`, and the FD
> > + * can be registered io_ring FD if `UBLK_AUTO_BUF_REGISTERED_RING` is set
> > + * in `ublksrv_io_cmd.flags`, or plain FD
> > + *
> > + * - pass buffer index from `ublksrv_io_cmd.buf.index`
> > + *
> > + * This way avoids extra cost from two uring_cmd, but also simplifies backend
> > + * implementation, such as, the dependency on IO_REGISTER_IO_BUF and
> > + * IO_UNREGISTER_IO_BUF becomes not necessary.
> > + *
> > + * This feature isn't available for UBLK_F_ZONED
> > + */
> > +#define UBLK_F_AUTO_BUF_REG (1ULL << 11)
> > +
> > /* device state */
> > #define UBLK_S_DEV_DEAD 0
> > #define UBLK_S_DEV_LIVE 1
> > @@ -339,6 +363,14 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod)
> > return iod->op_flags >> 8;
> > }
> >
> > +struct ublk_auto_buf_reg {
> > + __s32 ring_fd;
> > + __u16 index;
> > +#define UBLK_AUTO_BUF_REGISTERED_RING (1 << 0)
> > + __u8 flags;
>
> The flag could potentially be stored in ublk_io's flags field instead
> to avoid taking up this byte.
`ublk_auto_buf_reg` takes the exact ->addr space in both 'struct ublk_io'
and 'struct ublksrv_io_cmd', this way won't take extra byte, but keep code simple
and reuse for dealing with auto_buf_reg from both 'struct ublk_io' and
'struct ublksrv_io_cmd'.
Thanks,
Ming
next prev parent reply other threads:[~2025-04-30 15:45 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-28 9:44 [RFC PATCH 0/7] ublk: support to register bvec buffer automatically Ming Lei
2025-04-28 9:44 ` [RFC PATCH 1/7] io_uring: add 'struct io_buf_data' for register/unregister bvec buffer Ming Lei
2025-04-29 0:35 ` Caleb Sander Mateos
2025-04-28 9:44 ` [RFC PATCH 2/7] io_uring: add helper __io_buffer_[un]register_bvec Ming Lei
2025-04-29 0:36 ` Caleb Sander Mateos
2025-04-28 9:44 ` [RFC PATCH 3/7] io_uring: support to register bvec buffer to specified io_uring Ming Lei
2025-04-28 10:28 ` Pavel Begunkov
2025-04-29 0:46 ` Caleb Sander Mateos
2025-04-29 0:47 ` Ming Lei
2025-04-30 8:25 ` Pavel Begunkov
2025-04-30 14:44 ` Ming Lei
2025-04-29 0:43 ` Caleb Sander Mateos
2025-04-30 15:34 ` Ming Lei
2025-05-02 1:31 ` Caleb Sander Mateos
2025-05-02 15:59 ` Ming Lei
2025-05-02 21:21 ` Caleb Sander Mateos
2025-05-03 1:00 ` Ming Lei
2025-05-03 18:55 ` Caleb Sander Mateos
2025-05-06 2:45 ` Ming Lei
2025-04-28 9:44 ` [RFC PATCH 4/7] ublk: convert to refcount_t Ming Lei
2025-04-28 17:13 ` Caleb Sander Mateos
2025-04-28 9:44 ` [RFC PATCH 5/7] ublk: prepare for supporting to register request buffer automatically Ming Lei
2025-04-29 0:50 ` Caleb Sander Mateos
2025-04-28 9:44 ` [RFC PATCH 6/7] ublk: register buffer to specified io_uring & buf index via UBLK_F_AUTO_BUF_REG Ming Lei
2025-04-29 0:52 ` Caleb Sander Mateos
2025-04-30 15:45 ` Ming Lei [this message]
2025-04-30 16:30 ` Caleb Sander Mateos
2025-05-02 14:09 ` Ming Lei
2025-04-28 9:44 ` [RFC PATCH 7/7] selftests: ublk: support UBLK_F_AUTO_BUF_REG Ming Lei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aBJFk0FuWwt9GpC_@fedora \
--to=ming.lei@redhat.com \
--cc=asml.silence@gmail.com \
--cc=axboe@kernel.dk \
--cc=csander@purestorage.com \
--cc=io-uring@vger.kernel.org \
--cc=kbusch@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=ushankar@purestorage.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.