From: Ming Lei <ming.lei@redhat.com>
To: Caleb Sander Mateos <csander@purestorage.com>
Cc: Jens Axboe <axboe@kernel.dk>,
linux-block@vger.kernel.org,
Uday Shankar <ushankar@purestorage.com>
Subject: Re: [PATCH 07/23] ublk: add new batch command UBLK_U_IO_PREP_IO_CMDS & UBLK_U_IO_COMMIT_IO_CMDS
Date: Wed, 10 Sep 2025 11:05:45 +0800 [thread overview]
Message-ID: <aMDrCcIYpa8EzBMb@fedora> (raw)
In-Reply-To: <CADUfDZq8hWU1z7_-s3We7pMSxW6uAmX=yp7RXOYwkrWxcvPXDA@mail.gmail.com>
On Sat, Sep 06, 2025 at 11:50:55AM -0700, Caleb Sander Mateos wrote:
> On Mon, Sep 1, 2025 at 3:03 AM Ming Lei <ming.lei@redhat.com> wrote:
> >
> > Add new command UBLK_U_IO_PREP_IO_CMDS, which is the batch version of
> > UBLK_IO_FETCH_REQ.
> >
> > Add new command UBLK_U_IO_COMMIT_IO_CMDS, which is for committing io command
> > result only, still the batch version.
> >
> > The new command header type is `struct ublk_batch_io`, and fixed buffer is
> > required for these two uring_cmd.
>
> The commit message could be clearer that it doesn't actually implement
> these commands yet, just validates the SQE fields.
OK.
>
> >
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> > drivers/block/ublk_drv.c | 102 +++++++++++++++++++++++++++++++++-
> > include/uapi/linux/ublk_cmd.h | 49 ++++++++++++++++
> > 2 files changed, 149 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> > index 46be5b656f22..4da0dbbd7e16 100644
> > --- a/drivers/block/ublk_drv.c
> > +++ b/drivers/block/ublk_drv.c
> > @@ -85,6 +85,11 @@
> > UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED | \
> > UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT)
> >
> > +#define UBLK_BATCH_F_ALL \
> > + (UBLK_BATCH_F_HAS_ZONE_LBA | \
> > + UBLK_BATCH_F_HAS_BUF_ADDR | \
> > + UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK)
> > +
> > struct ublk_uring_cmd_pdu {
> > /*
> > * Store requests in same batch temporarily for queuing them to
> > @@ -108,6 +113,11 @@ struct ublk_uring_cmd_pdu {
> > u16 tag;
> > };
> >
> > +struct ublk_batch_io_data {
> > + struct ublk_queue *ubq;
> > + struct io_uring_cmd *cmd;
> > +};
> > +
> > /*
> > * io command is active: sqe cmd is received, and its cqe isn't done
> > *
> > @@ -277,7 +287,7 @@ static inline bool ublk_dev_is_zoned(const struct ublk_device *ub)
> > return ub->dev_info.flags & UBLK_F_ZONED;
> > }
> >
> > -static inline bool ublk_queue_is_zoned(struct ublk_queue *ubq)
> > +static inline bool ublk_queue_is_zoned(const struct ublk_queue *ubq)
>
> This change could go in a separate commit.
Fair enough.
>
> > {
> > return ubq->flags & UBLK_F_ZONED;
> > }
> > @@ -2528,10 +2538,98 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
> > return ublk_ch_uring_cmd_local(cmd, issue_flags);
> > }
> >
> > +static int ublk_check_batch_cmd_flags(const struct ublk_batch_io *uc)
> > +{
> > + const unsigned short mask = UBLK_BATCH_F_HAS_BUF_ADDR |
> > + UBLK_BATCH_F_HAS_ZONE_LBA;
>
> Can we use a fixed-size integer type, i.e. u16?
Good point, since here the flag is defined in uapi.
>
> > +
> > + if (uc->flags & ~UBLK_BATCH_F_ALL)
> > + return -EINVAL;
> > +
> > + /* UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK requires buffer index */
> > + if ((uc->flags & UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK) &&
> > + (uc->flags & UBLK_BATCH_F_HAS_BUF_ADDR))
> > + return -EINVAL;
> > +
> > + switch (uc->flags & mask) {
> > + case 0:
> > + if (uc->elem_bytes != 8)
>
> sizeof(struct ublk_elem_header)?
Yes.
>
> > + return -EINVAL;
> > + break;
> > + case UBLK_BATCH_F_HAS_ZONE_LBA:
> > + case UBLK_BATCH_F_HAS_BUF_ADDR:
> > + if (uc->elem_bytes != 8 + 8)
>
> sizeof(u64)?
OK
>
> > + return -EINVAL;
> > + break;
> > + case UBLK_BATCH_F_HAS_ZONE_LBA | UBLK_BATCH_F_HAS_BUF_ADDR:
> > + if (uc->elem_bytes != 8 + 8 + 8)
> > + return -EINVAL;
>
> So elem_bytes is redundant with flags? Do we really need a separate a
> separate field then?
It is used for cross verification purpose, especially the command
header has enough space.
>
> > + break;
> > + default:
> > + return -EINVAL;
>
> default case is unreachable?
Right.
>
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static int ublk_check_batch_cmd(const struct ublk_batch_io_data *data,
> > + const struct ublk_batch_io *uc)
> > +{
> > + if (!(data->cmd->flags & IORING_URING_CMD_FIXED))
> > + return -EINVAL;
> > +
> > + if (uc->nr_elem * uc->elem_bytes > data->cmd->sqe->len)
>
> Cast nr_elem and/or elem_bytes to u32 to avoid overflow concerns?
`u16` * `u8` can't overflow.
>
> Should also use READ_ONCE() to read the userspace-mapped sqe->len.
Yes.
When I write the patch, sqe is always copied, but it isn't true now.
>
> > + return -E2BIG;
> > +
> > + if (uc->nr_elem > data->ubq->q_depth)
> > + return -E2BIG;
> > +
> > + if ((uc->flags & UBLK_BATCH_F_HAS_ZONE_LBA) &&
> > + !ublk_queue_is_zoned(data->ubq))
> > + return -EINVAL;
> > +
> > + if ((uc->flags & UBLK_BATCH_F_HAS_BUF_ADDR) &&
> > + !ublk_need_map_io(data->ubq))
> > + return -EINVAL;
> > +
> > + if ((uc->flags & UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK) &&
> > + !ublk_support_auto_buf_reg(data->ubq))
> > + return -EINVAL;
> > +
> > + if (uc->reserved || uc->reserved2)
> > + return -EINVAL;
> > +
> > + return ublk_check_batch_cmd_flags(uc);
> > +}
> > +
> > static int ublk_ch_batch_io_uring_cmd(struct io_uring_cmd *cmd,
> > unsigned int issue_flags)
> > {
> > - return -EOPNOTSUPP;
> > + const struct ublk_batch_io *uc = io_uring_sqe_cmd(cmd->sqe);
> > + struct ublk_device *ub = cmd->file->private_data;
> > + struct ublk_batch_io_data data = {
> > + .cmd = cmd,
> > + };
> > + u32 cmd_op = cmd->cmd_op;
> > + int ret = -EINVAL;
> > +
> > + if (uc->q_id >= ub->dev_info.nr_hw_queues)
> > + goto out;
> > + data.ubq = ublk_get_queue(ub, uc->q_id);
>
> Should be using READ_ONCE() to read from userspace-mapped memory.
Indeed, it can be copied to `ublk_batch_io_data`, just 64bits.
>
>
>
> > +
> > + switch (cmd_op) {
> > + case UBLK_U_IO_PREP_IO_CMDS:
> > + case UBLK_U_IO_COMMIT_IO_CMDS:
> > + ret = ublk_check_batch_cmd(&data, uc);
> > + if (ret)
> > + goto out;
> > + ret = -EOPNOTSUPP;
> > + break;
> > + default:
> > + ret = -EOPNOTSUPP;
> > + }
> > +out:
> > + return ret;
> > }
> >
> > static inline bool ublk_check_ubuf_dir(const struct request *req,
> > diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
> > index ec77dabba45b..01d3af52cfb4 100644
> > --- a/include/uapi/linux/ublk_cmd.h
> > +++ b/include/uapi/linux/ublk_cmd.h
> > @@ -102,6 +102,10 @@
> > _IOWR('u', 0x23, struct ublksrv_io_cmd)
> > #define UBLK_U_IO_UNREGISTER_IO_BUF \
> > _IOWR('u', 0x24, struct ublksrv_io_cmd)
> > +#define UBLK_U_IO_PREP_IO_CMDS \
> > + _IOWR('u', 0x25, struct ublk_batch_io)
> > +#define UBLK_U_IO_COMMIT_IO_CMDS \
> > + _IOWR('u', 0x26, struct ublk_batch_io)
> >
> > /* only ABORT means that no re-fetch */
> > #define UBLK_IO_RES_OK 0
> > @@ -525,6 +529,51 @@ struct ublksrv_io_cmd {
> > };
> > };
> >
> > +struct ublk_elem_header {
> > + __u16 tag; /* IO tag */
> > +
> > + /*
> > + * Buffer index for incoming io command, only valid iff
> > + * UBLK_F_AUTO_BUF_REG is set
> > + */
> > + __u16 buf_index;
> > + __u32 result; /* I/O completion result (commit only) */
>
> The result is unsigned? So there's no way to specify a request failure?
oops, definitely it should be __s32.
>
> > +};
> > +
> > +/*
> > + * uring_cmd buffer structure
>
> Add "for batch commands"?
Sure.
>
> > + *
> > + * buffer includes multiple elements, which number is specified by
> > + * `nr_elem`. Each element buffer is organized in the following order:
> > + *
> > + * struct ublk_elem_buffer {
> > + * // Mandatory fields (8 bytes)
> > + * struct ublk_elem_header header;
> > + *
> > + * // Optional fields (8 bytes each, included based on flags)
> > + *
> > + * // Buffer address (if UBLK_BATCH_F_HAS_BUF_ADDR) for copying data
> > + * // between ublk request and ublk server buffer
> > + * __u64 buf_addr;
> > + *
> > + * // returned Zone append LBA (if UBLK_BATCH_F_HAS_ZONE_LBA)
> > + * __u64 zone_lba;
> > + * }
> > + *
> > + * Used for `UBLK_U_IO_PREP_IO_CMDS` and `UBLK_U_IO_COMMIT_IO_CMDS`
> > + */
> > +struct ublk_batch_io {
> > + __u16 q_id;
>
> So this doesn't allow batching completions across ublk queues? That
Yes.
I tried device-wide batch, it may introduce a little complexity to
ublk_handle_batch_commit_cmd(), especially it becomes hard to apply
some future optimizations, such as batched request completion.
It isn't hard to add one variant for covering cross-queue commit,
just `qid` need to be added to 'ublk_elem_buffer', which becomes not
aligned any more.
> seems like it significantly limits the usefulness of this feature. A
> ublk server thread may be handling ublk requests from a number of
> client threads which are submitting to different ublk queues.
But ublk server still can support cross-queue completion.
The selftest code does support arbitrary ublk queues/thread combination,
just a one-shot per-queue commit buffer is needed.
It is sort of trade-off between driver and ublk server implementation.
Thanks,
Ming
next prev parent reply other threads:[~2025-09-10 3:05 UTC|newest]
Thread overview: 43+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-01 10:02 [PATCH 00/23] ublk: add UBLK_F_BATCH_IO Ming Lei
2025-09-01 10:02 ` [PATCH 01/23] ublk: add parameter `struct io_uring_cmd *` to ublk_prep_auto_buf_reg() Ming Lei
2025-09-03 3:47 ` Caleb Sander Mateos
2025-09-01 10:02 ` [PATCH 02/23] ublk: add `union ublk_io_buf` with improved naming Ming Lei
2025-09-03 4:01 ` Caleb Sander Mateos
2025-09-01 10:02 ` [PATCH 03/23] ublk: refactor auto buffer register in ublk_dispatch_req() Ming Lei
2025-09-03 4:41 ` Caleb Sander Mateos
2025-09-10 2:23 ` Ming Lei
2025-09-11 18:13 ` Caleb Sander Mateos
2025-09-01 10:02 ` [PATCH 04/23] ublk: add helper of __ublk_fetch() Ming Lei
2025-09-03 4:42 ` Caleb Sander Mateos
2025-09-10 2:30 ` Ming Lei
2025-09-01 10:02 ` [PATCH 05/23] ublk: define ublk_ch_batch_io_fops for the coming feature F_BATCH_IO Ming Lei
2025-09-06 18:47 ` Caleb Sander Mateos
2025-09-01 10:02 ` [PATCH 06/23] ublk: prepare for not tracking task context for command batch Ming Lei
2025-09-06 18:48 ` Caleb Sander Mateos
2025-09-10 2:35 ` Ming Lei
2025-09-01 10:02 ` [PATCH 07/23] ublk: add new batch command UBLK_U_IO_PREP_IO_CMDS & UBLK_U_IO_COMMIT_IO_CMDS Ming Lei
2025-09-06 18:50 ` Caleb Sander Mateos
2025-09-10 3:05 ` Ming Lei [this message]
2025-09-01 10:02 ` [PATCH 08/23] ublk: handle UBLK_U_IO_PREP_IO_CMDS Ming Lei
2025-09-06 19:48 ` Caleb Sander Mateos
2025-09-10 3:56 ` Ming Lei
2025-09-18 18:12 ` Caleb Sander Mateos
2025-10-16 10:08 ` Ming Lei
2025-10-22 8:00 ` Caleb Sander Mateos
2025-10-22 10:15 ` Ming Lei
2025-09-01 10:02 ` [PATCH 09/23] ublk: handle UBLK_U_IO_COMMIT_IO_CMDS Ming Lei
2025-09-02 6:19 ` kernel test robot
2025-09-01 10:02 ` [PATCH 10/23] ublk: add io events fifo structure Ming Lei
2025-09-01 10:02 ` [PATCH 11/23] ublk: add batch I/O dispatch infrastructure Ming Lei
2025-09-01 10:02 ` [PATCH 12/23] ublk: add UBLK_U_IO_FETCH_IO_CMDS for batch I/O processing Ming Lei
2025-09-01 10:02 ` [PATCH 13/23] ublk: abort requests filled in event kfifo Ming Lei
2025-09-01 10:02 ` [PATCH 14/23] ublk: add new feature UBLK_F_BATCH_IO Ming Lei
2025-09-01 10:02 ` [PATCH 15/23] ublk: document " Ming Lei
2025-09-01 10:02 ` [PATCH 16/23] selftests: ublk: replace assert() with ublk_assert() Ming Lei
2025-09-01 10:02 ` [PATCH 17/23] selftests: ublk: add ublk_io_buf_idx() for returning io buffer index Ming Lei
2025-09-01 10:02 ` [PATCH 18/23] selftests: ublk: add batch buffer management infrastructure Ming Lei
2025-09-01 10:02 ` [PATCH 19/23] selftests: ublk: handle UBLK_U_IO_PREP_IO_CMDS Ming Lei
2025-09-01 10:02 ` [PATCH 20/23] selftests: ublk: handle UBLK_U_IO_COMMIT_IO_CMDS Ming Lei
2025-09-01 10:02 ` [PATCH 21/23] selftests: ublk: handle UBLK_U_IO_FETCH_IO_CMDS Ming Lei
2025-09-01 10:02 ` [PATCH 22/23] selftests: ublk: add --batch/-b for enabling F_BATCH_IO Ming Lei
2025-09-01 10:02 ` [PATCH 23/23] selftests: ublk: support arbitrary threads/queues combination Ming Lei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aMDrCcIYpa8EzBMb@fedora \
--to=ming.lei@redhat.com \
--cc=axboe@kernel.dk \
--cc=csander@purestorage.com \
--cc=linux-block@vger.kernel.org \
--cc=ushankar@purestorage.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox