From: Ming Lei <ming.lei@redhat.com>
To: Caleb Sander Mateos <csander@purestorage.com>
Cc: Jens Axboe <axboe@kernel.dk>,
linux-block@vger.kernel.org,
Uday Shankar <ushankar@purestorage.com>
Subject: Re: [PATCH V5 2/6] ublk: prepare for supporting to register request buffer automatically
Date: Wed, 21 May 2025 09:42:01 +0800 [thread overview]
Message-ID: <aC0vabQ_jm2b8hfq@fedora> (raw)
In-Reply-To: <CADUfDZrLLGDf2yYSuuTnbK_WDNQCxUyCbC2bziUg7_BB3vWAtA@mail.gmail.com>
On Tue, May 20, 2025 at 10:40:01AM -0700, Caleb Sander Mateos wrote:
> On Mon, May 19, 2025 at 9:55 PM Ming Lei <ming.lei@redhat.com> wrote:
> >
> > UBLK_F_SUPPORT_ZERO_COPY requires ublk server to issue explicit buffer
> > register/unregister uring_cmd for each IO, this way is not only inefficient,
> > but also introduce dependency between buffer consumer and buffer register/
> > unregister uring_cmd, please see tools/testing/selftests/ublk/stripe.c
> > in which backing file IO has to be issued one by one by IOSQE_IO_LINK.
> >
> > Prepare for adding feature UBLK_F_AUTO_BUF_REG for addressing the existing
> > zero copy limitation:
> >
> > - register request buffer automatically to ublk uring_cmd's io_uring
> > context before delivering io command to ublk server
> >
> > - unregister request buffer automatically from the ublk uring_cmd's
> > io_uring context when completing the request
> >
> > - io_uring will unregister the buffer automatically when uring is
> > exiting, so we needn't worry about accident exit
> >
> > For using this feature, ublk server has to create one sparse buffer table
> >
> > Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> > drivers/block/ublk_drv.c | 70 ++++++++++++++++++++++++++++++++++++----
> > 1 file changed, 64 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> > index ae2f47dc8224..3e56a9d267fb 100644
> > --- a/drivers/block/ublk_drv.c
> > +++ b/drivers/block/ublk_drv.c
> > @@ -133,6 +133,14 @@ struct ublk_uring_cmd_pdu {
> > */
> > #define UBLK_IO_FLAG_NEED_GET_DATA 0x08
> >
> > +/*
> > + * request buffer is registered automatically, so we have to unregister it
> > + * before completing this request.
> > + *
> > + * io_uring will unregister buffer automatically for us during exiting.
> > + */
> > +#define UBLK_IO_FLAG_AUTO_BUF_REG 0x10
> > +
> > /* atomic RW with ubq->cancel_lock */
> > #define UBLK_IO_FLAG_CANCELED 0x80000000
> >
> > @@ -205,6 +213,7 @@ struct ublk_params_header {
> > __u32 types;
> > };
> >
> > +static void ublk_io_release(void *priv);
> > static void ublk_stop_dev_unlocked(struct ublk_device *ub);
> > static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq);
> > static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
> > @@ -619,6 +628,11 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq)
> > return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY;
> > }
> >
> > +static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq)
> > +{
> > + return false;
> > +}
> > +
> > static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
> > {
> > return ubq->flags & UBLK_F_USER_COPY;
> > @@ -626,7 +640,8 @@ static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
> >
> > static inline bool ublk_need_map_io(const struct ublk_queue *ubq)
> > {
> > - return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq);
> > + return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq) &&
> > + !ublk_support_auto_buf_reg(ubq);
> > }
> >
> > static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
> > @@ -637,8 +652,13 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
> > *
> > * for zero copy, request buffer need to be registered to io_uring
> > * buffer table, so reference is needed
> > + *
> > + * For auto buffer register, ublk server still may issue
> > + * UBLK_IO_COMMIT_AND_FETCH_REQ before one registered buffer is used up,
> > + * so reference is required too.
> > */
> > - return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq);
> > + return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq) ||
> > + ublk_support_auto_buf_reg(ubq);
> > }
> >
> > static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
> > @@ -1155,6 +1175,35 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
> > blk_mq_end_request(rq, BLK_STS_IOERR);
> > }
> >
> > +static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io,
> > + unsigned int issue_flags)
> > +{
> > + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
> > + int ret;
> > +
> > + ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release, 0,
> > + issue_flags);
> > + if (ret) {
> > + blk_mq_end_request(req, BLK_STS_IOERR);
> > + return false;
> > + }
> > + /* one extra reference is dropped by ublk_io_release */
> > + refcount_set(&data->ref, 2);
> > + io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG;
> > + return true;
> > +}
> > +
> > +static bool ublk_prep_auto_buf_reg(struct ublk_queue *ubq,
> > + struct request *req, struct ublk_io *io,
> > + unsigned int issue_flags)
> > +{
> > + if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req))
> > + return ublk_auto_buf_reg(req, io, issue_flags);
> > +
> > + ublk_init_req_ref(ubq, req);
> > + return true;
> > +}
> > +
> > static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req,
> > struct ublk_io *io)
> > {
> > @@ -1180,7 +1229,6 @@ static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req,
> > mapped_bytes >> 9;
> > }
> >
> > - ublk_init_req_ref(ubq, req);
> > return true;
> > }
> >
> > @@ -1226,7 +1274,8 @@ static void ublk_dispatch_req(struct ublk_queue *ubq,
> > if (!ublk_start_io(ubq, req, io))
> > return;
> >
> > - ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags);
> > + if (ublk_prep_auto_buf_reg(ubq, req, io, issue_flags))
> > + ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags);
> > }
> >
> > static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd,
> > @@ -1994,7 +2043,8 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
> >
> > static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
> > struct ublk_io *io, struct io_uring_cmd *cmd,
> > - const struct ublksrv_io_cmd *ub_cmd)
> > + const struct ublksrv_io_cmd *ub_cmd,
> > + unsigned int issue_flags)
> > {
> > struct request *req = io->req;
> >
> > @@ -2014,6 +2064,14 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
> > return -EINVAL;
> > }
> >
> > + if (ublk_support_auto_buf_reg(ubq)) {
> > + if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) {
> > + WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, 0,
> > + issue_flags));
>
> Since the io_ring_ctx is determined from the io_uring_cmd, this only
> works if the UBLK_IO_COMMIT_AND_FETCH_REQ is submitted to the same
> io_uring as the previous UBLK_IO_(COMMIT_AND_)FETCH_REQ for the ublk
> I/O. It would be good to document that. And I would probably drop the
> WARN_ON_ONCE() here, since it can be triggered from userspace.
>
> Otherwise,
> Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
Thanks for the review!
Yeah, I thought of this thing yesterday when working on TASK_NEUTEAL or
IO_MIGRATION too, will send one fix later by tracking context id.
Thanks,
Ming
next prev parent reply other threads:[~2025-05-21 1:42 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-20 4:54 [PATCH V5 0/6] ublk: support to register bvec buffer automatically Ming Lei
2025-05-20 4:54 ` [PATCH V5 1/6] ublk: convert to refcount_t Ming Lei
2025-05-20 4:54 ` [PATCH V5 2/6] ublk: prepare for supporting to register request buffer automatically Ming Lei
2025-05-20 15:59 ` Jens Axboe
2025-05-20 17:40 ` Caleb Sander Mateos
2025-05-21 1:42 ` Ming Lei [this message]
2025-05-20 4:54 ` [PATCH V5 3/6] ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG Ming Lei
2025-05-20 17:40 ` Caleb Sander Mateos
2025-05-20 4:54 ` [PATCH V5 4/6] ublk: support UBLK_AUTO_BUF_REG_FALLBACK Ming Lei
2025-05-20 17:40 ` Caleb Sander Mateos
2025-05-20 4:54 ` [PATCH V5 5/6] selftests: ublk: support UBLK_F_AUTO_BUF_REG Ming Lei
2025-05-20 4:54 ` [PATCH V5 6/6] selftests: ublk: add test for covering UBLK_AUTO_BUF_REG_FALLBACK Ming Lei
2025-05-20 16:25 ` [PATCH V5 0/6] ublk: support to register bvec buffer automatically Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aC0vabQ_jm2b8hfq@fedora \
--to=ming.lei@redhat.com \
--cc=axboe@kernel.dk \
--cc=csander@purestorage.com \
--cc=linux-block@vger.kernel.org \
--cc=ushankar@purestorage.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.