* [PATCH] ublk: eliminate unnecessary io_cmds queue
@ 2024-10-09 19:37 Uday Shankar
2024-10-10 3:45 ` Ming Lei
0 siblings, 1 reply; 4+ messages in thread
From: Uday Shankar @ 2024-10-09 19:37 UTC (permalink / raw)
To: Ming Lei, Jens Axboe; +Cc: Uday Shankar, linux-block
Currently, ublk_drv maintains a per-hctx queue of requests awaiting
dispatch to the ublk server, and pokes the ubq_daemon to come pick them
up via the task_work mechanism when needed. But task_work already
supports internal (lockless) queueing. Reuse this queueing mechanism
(i.e. have one task_work queue item per request awaiting dispatch)
instead of maintaining our own queue in ublk_drv.
Signed-off-by: Uday Shankar <ushankar@purestorage.com>
---
drivers/block/ublk_drv.c | 34 ++++++----------------------------
1 file changed, 6 insertions(+), 28 deletions(-)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 60f6d86ea1e6..2ea108347ec4 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -80,6 +80,7 @@ struct ublk_rq_data {
struct ublk_uring_cmd_pdu {
struct ublk_queue *ubq;
+ struct request *req;
u16 tag;
};
@@ -141,8 +142,6 @@ struct ublk_queue {
struct task_struct *ubq_daemon;
char *io_cmd_buf;
- struct llist_head io_cmds;
-
unsigned long io_addr; /* mapped vm address */
unsigned int max_io_sz;
bool force_abort;
@@ -1132,9 +1131,10 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
blk_mq_end_request(rq, BLK_STS_IOERR);
}
-static inline void __ublk_rq_task_work(struct request *req,
+static inline void __ublk_rq_task_work(struct io_uring_cmd *cmd,
unsigned issue_flags)
{
+ struct request *req = ublk_get_uring_cmd_pdu(cmd)->req;
struct ublk_queue *ubq = req->mq_hctx->driver_data;
int tag = req->tag;
struct ublk_io *io = &ubq->ios[tag];
@@ -1211,34 +1211,12 @@ static inline void __ublk_rq_task_work(struct request *req,
ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags);
}
-static inline void ublk_forward_io_cmds(struct ublk_queue *ubq,
- unsigned issue_flags)
-{
- struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
- struct ublk_rq_data *data, *tmp;
-
- io_cmds = llist_reverse_order(io_cmds);
- llist_for_each_entry_safe(data, tmp, io_cmds, node)
- __ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags);
-}
-
-static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags)
-{
- struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
- struct ublk_queue *ubq = pdu->ubq;
-
- ublk_forward_io_cmds(ubq, issue_flags);
-}
-
static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
{
- struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
-
- if (llist_add(&data->node, &ubq->io_cmds)) {
- struct ublk_io *io = &ubq->ios[rq->tag];
+ struct ublk_io *io = &ubq->ios[rq->tag];
- io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb);
- }
+ ublk_get_uring_cmd_pdu(io->cmd)->req = rq;
+ io_uring_cmd_complete_in_task(io->cmd, __ublk_rq_task_work);
}
static enum blk_eh_timer_return ublk_timeout(struct request *rq)
base-commit: 7a84944a4bf7abda16291ff13984960d0df4e74a
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] ublk: eliminate unnecessary io_cmds queue
2024-10-09 19:37 [PATCH] ublk: eliminate unnecessary io_cmds queue Uday Shankar
@ 2024-10-10 3:45 ` Ming Lei
2024-10-15 20:51 ` Uday Shankar
0 siblings, 1 reply; 4+ messages in thread
From: Ming Lei @ 2024-10-10 3:45 UTC (permalink / raw)
To: Uday Shankar; +Cc: Jens Axboe, linux-block
On Wed, Oct 09, 2024 at 01:37:00PM -0600, Uday Shankar wrote:
> Currently, ublk_drv maintains a per-hctx queue of requests awaiting
> dispatch to the ublk server, and pokes the ubq_daemon to come pick them
> up via the task_work mechanism when needed. But task_work already
> supports internal (lockless) queueing. Reuse this queueing mechanism
> (i.e. have one task_work queue item per request awaiting dispatch)
> instead of maintaining our own queue in ublk_drv.
>
> Signed-off-by: Uday Shankar <ushankar@purestorage.com>
> ---
> drivers/block/ublk_drv.c | 34 ++++++----------------------------
> 1 file changed, 6 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index 60f6d86ea1e6..2ea108347ec4 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -80,6 +80,7 @@ struct ublk_rq_data {
>
> struct ublk_uring_cmd_pdu {
> struct ublk_queue *ubq;
> + struct request *req;
> u16 tag;
> };
I'd suggest to add the following build check in init function since there is
only 32bytes in uring_cmd pdu:
BUILD_BUG_ON(sizeof(ublk_uring_cmd_pdu) < sizeof_field(io_uring_cmd, pud))
>
> @@ -141,8 +142,6 @@ struct ublk_queue {
> struct task_struct *ubq_daemon;
> char *io_cmd_buf;
>
> - struct llist_head io_cmds;
> -
> unsigned long io_addr; /* mapped vm address */
> unsigned int max_io_sz;
> bool force_abort;
> @@ -1132,9 +1131,10 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
> blk_mq_end_request(rq, BLK_STS_IOERR);
> }
>
> -static inline void __ublk_rq_task_work(struct request *req,
> +static inline void __ublk_rq_task_work(struct io_uring_cmd *cmd,
> unsigned issue_flags)
`inline` can be removed and __ublk_rq_task_work() can be named as
ublk_rq_task_work() now.
> {
> + struct request *req = ublk_get_uring_cmd_pdu(cmd)->req;
> struct ublk_queue *ubq = req->mq_hctx->driver_data;
> int tag = req->tag;
> struct ublk_io *io = &ubq->ios[tag];
> @@ -1211,34 +1211,12 @@ static inline void __ublk_rq_task_work(struct request *req,
> ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags);
> }
>
> -static inline void ublk_forward_io_cmds(struct ublk_queue *ubq,
> - unsigned issue_flags)
> -{
> - struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
> - struct ublk_rq_data *data, *tmp;
> -
> - io_cmds = llist_reverse_order(io_cmds);
> - llist_for_each_entry_safe(data, tmp, io_cmds, node)
> - __ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags);
> -}
> -
> -static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags)
> -{
> - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
> - struct ublk_queue *ubq = pdu->ubq;
> -
> - ublk_forward_io_cmds(ubq, issue_flags);
> -}
> -
> static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
> {
> - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
> -
> - if (llist_add(&data->node, &ubq->io_cmds)) {
> - struct ublk_io *io = &ubq->ios[rq->tag];
> + struct ublk_io *io = &ubq->ios[rq->tag];
>
> - io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb);
> - }
> + ublk_get_uring_cmd_pdu(io->cmd)->req = rq;
> + io_uring_cmd_complete_in_task(io->cmd, __ublk_rq_task_work);
> }
I'd suggest to comment that io_uring_cmd_complete_in_task() needs to
maintain io command order.
Otherwise this patch looks fine.
Thanks,
Ming
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] ublk: eliminate unnecessary io_cmds queue
2024-10-10 3:45 ` Ming Lei
@ 2024-10-15 20:51 ` Uday Shankar
2024-10-16 2:29 ` Ming Lei
0 siblings, 1 reply; 4+ messages in thread
From: Uday Shankar @ 2024-10-15 20:51 UTC (permalink / raw)
To: Ming Lei; +Cc: Jens Axboe, linux-block
On Thu, Oct 10, 2024 at 11:45:03AM +0800, Ming Lei wrote:
> > static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
> > {
> > - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
> > -
> > - if (llist_add(&data->node, &ubq->io_cmds)) {
> > - struct ublk_io *io = &ubq->ios[rq->tag];
> > + struct ublk_io *io = &ubq->ios[rq->tag];
> >
> > - io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb);
> > - }
> > + ublk_get_uring_cmd_pdu(io->cmd)->req = rq;
> > + io_uring_cmd_complete_in_task(io->cmd, __ublk_rq_task_work);
> > }
>
> I'd suggest to comment that io_uring_cmd_complete_in_task() needs to
> maintain io command order.
Sorry, can you explain why this is important? Generally speaking
out-of-order completion of I/Os is considered okay, so what's the issue
if the dispatch to the ublk server here is not in order?
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] ublk: eliminate unnecessary io_cmds queue
2024-10-15 20:51 ` Uday Shankar
@ 2024-10-16 2:29 ` Ming Lei
0 siblings, 0 replies; 4+ messages in thread
From: Ming Lei @ 2024-10-16 2:29 UTC (permalink / raw)
To: Uday Shankar; +Cc: Jens Axboe, linux-block
On Tue, Oct 15, 2024 at 02:51:14PM -0600, Uday Shankar wrote:
> On Thu, Oct 10, 2024 at 11:45:03AM +0800, Ming Lei wrote:
> > > static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
> > > {
> > > - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
> > > -
> > > - if (llist_add(&data->node, &ubq->io_cmds)) {
> > > - struct ublk_io *io = &ubq->ios[rq->tag];
> > > + struct ublk_io *io = &ubq->ios[rq->tag];
> > >
> > > - io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb);
> > > - }
> > > + ublk_get_uring_cmd_pdu(io->cmd)->req = rq;
> > > + io_uring_cmd_complete_in_task(io->cmd, __ublk_rq_task_work);
> > > }
> >
> > I'd suggest to comment that io_uring_cmd_complete_in_task() needs to
> > maintain io command order.
>
> Sorry, can you explain why this is important? Generally speaking
> out-of-order completion of I/Os is considered okay, so what's the issue
> if the dispatch to the ublk server here is not in order?
It is just okay, but proper implementation requires to keep IO order.
Please see:
1) 7d4a93176e01 ("ublk_drv: don't forward io commands in reserve order")
2) [Report] requests are submitted to hardware in reverse order from nvme/virtio-blk queue_rqs()
https://lore.kernel.org/linux-block/ZbD7ups50ryrlJ%2FG@fedora/
I am also working on ublk-bpf which needs this extra list for submitting
IO in batch, please hold on this patch now.
I plan to send out bpf patches in this cycle or next, and we can restart
the cleanup if the bpf thing turns out not doable.
Thanks,
Ming
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-10-16 2:30 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-09 19:37 [PATCH] ublk: eliminate unnecessary io_cmds queue Uday Shankar
2024-10-10 3:45 ` Ming Lei
2024-10-15 20:51 ` Uday Shankar
2024-10-16 2:29 ` Ming Lei
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).