On Fri, Mar 24, 2023 at 02:28:03PM -0700, Keith Busch wrote: >From: Keith Busch > >The first advantage is that unshared and multipath namespaces can use >the same polling callback. > >The other advantage is that we don't need a bio payload in order to >poll, allowing commands like 'flush' and 'write zeroes' to be submitted >on the same high priority queue as read and write commands. > >This can also allow for a future optimization for the driver since we no >longer need to create special hidden block devices to back nvme-generic >char dev's with unsupported command sets. > >Signed-off-by: Keith Busch >--- > drivers/nvme/host/ioctl.c | 79 ++++++++++++----------------------- > drivers/nvme/host/multipath.c | 2 +- > drivers/nvme/host/nvme.h | 2 - > 3 files changed, 28 insertions(+), 55 deletions(-) > >diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c >index 723e7d5b778f2..369e8519b87a2 100644 >--- a/drivers/nvme/host/ioctl.c >+++ b/drivers/nvme/host/ioctl.c >@@ -503,7 +503,6 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, > { > struct io_uring_cmd *ioucmd = req->end_io_data; > struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); >- void *cookie = READ_ONCE(ioucmd->cookie); > > req->bio = pdu->bio; > if (nvme_req(req)->flags & NVME_REQ_CANCELLED) >@@ -516,9 +515,10 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, > * For iopoll, complete it directly. > * Otherwise, move the completion to task work. > */ >- if (cookie != NULL && blk_rq_is_poll(req)) >+ if (blk_rq_is_poll(req)) { >+ WRITE_ONCE(ioucmd->cookie, NULL); > nvme_uring_task_cb(ioucmd); >- else >+ } else > io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb); > > return RQ_END_IO_FREE; >@@ -529,7 +529,6 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req, > { > struct io_uring_cmd *ioucmd = req->end_io_data; > struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); >- void *cookie = READ_ONCE(ioucmd->cookie); > > req->bio = pdu->bio; > pdu->req = req; >@@ -538,9 +537,10 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req, > * For iopoll, complete it directly. > * Otherwise, move the completion to task work. > */ >- if (cookie != NULL && blk_rq_is_poll(req)) >+ if (blk_rq_is_poll(req)) { >+ WRITE_ONCE(ioucmd->cookie, NULL); > nvme_uring_task_meta_cb(ioucmd); >- else >+ } else > io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_meta_cb); > > return RQ_END_IO_NONE; >@@ -597,7 +597,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, > if (issue_flags & IO_URING_F_IOPOLL) > rq_flags |= REQ_POLLED; > >-retry: > req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); > if (IS_ERR(req)) > return PTR_ERR(req); >@@ -611,17 +610,9 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, > return ret; > } > >- if (issue_flags & IO_URING_F_IOPOLL && rq_flags & REQ_POLLED) { >- if (unlikely(!req->bio)) { >- /* we can't poll this, so alloc regular req instead */ >- blk_mq_free_request(req); >- rq_flags &= ~REQ_POLLED; >- goto retry; >- } else { >- WRITE_ONCE(ioucmd->cookie, req->bio); >- req->bio->bi_opf |= REQ_POLLED; >- } >- } >+ if (blk_rq_is_poll(req)) >+ WRITE_ONCE(ioucmd->cookie, req); blk_rq_is_poll(req) warns for null "req->bio" and returns false if that is the case. That defeats one of the purpose of the series i.e. poll on no-payload commands such as flush/write-zeroes. > /* to free bio on completion, as req->bio will be null at that time */ > pdu->bio = req->bio; > pdu->meta_len = d.metadata_len; >@@ -780,18 +771,27 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, > struct io_comp_batch *iob, > unsigned int poll_flags) > { >- struct bio *bio; >+ struct request *req; > int ret = 0; >- struct nvme_ns *ns; >- struct request_queue *q; > >+ /* >+ * The rcu lock ensures the 'req' in the command cookie will not be >+ * freed until after the unlock. The queue must be frozen to free the >+ * request, and the freeze requires an rcu grace period. The cookie is >+ * cleared before the request is completed, so we're fine even if a >+ * competing polling thread completes this thread's request. >+ */ > rcu_read_lock(); >- bio = READ_ONCE(ioucmd->cookie); >- ns = container_of(file_inode(ioucmd->file)->i_cdev, >- struct nvme_ns, cdev); >- q = ns->queue; >- if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio && bio->bi_bdev) >- ret = bio_poll(bio, iob, poll_flags); >+ req = READ_ONCE(ioucmd->cookie); >+ if (req) { This is risky. We are not sure if the cookie is actually "req" at this moment. If driver is loaded without the poll-queues, we will not be able to set req into ioucmd->cookie during the submission (in nvme_uring_cmd_io). Therefore, the original code checked for QUEUE_FLAG_POLL before treating ioucmd->cookie as bio here. This should handle it (on top of your patch): diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 369e8519b87a..89eef5da4b5c 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -773,6 +773,8 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, { struct request *req; int ret = 0; + struct request_queue *q; + struct nvme_ns *ns; /* * The rcu lock ensures the 'req' in the command cookie will not be @@ -783,8 +785,10 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, */ rcu_read_lock(); req = READ_ONCE(ioucmd->cookie); - if (req) { - struct request_queue *q = req->q; + ns = container_of(file_inode(ioucmd->file)->i_cdev, struct nvme_ns, + cdev); + q = ns->queue; + if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && req) { if (percpu_ref_tryget(&q->q_usage_counter)) { ret = blk_mq_poll(q, blk_rq_to_qc(req), iob,