* [PATCH 1/3] nvme: Sync queues on controller resets
@ 2018-01-29 23:59 Keith Busch
2018-01-29 23:59 ` [PATCH 2/3] nvme: Asynchronous driver commands API Keith Busch
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Keith Busch @ 2018-01-29 23:59 UTC (permalink / raw)
This patch has the nvme pci driver synchronize request queues to ensure
that starting up the controller is not racing with a previously running
timeout handler.
Reported-by: Jianchao Wang <jianchao.w.wang at oracle.com>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
drivers/nvme/host/core.c | 15 ++++++++++++++-
drivers/nvme/host/nvme.h | 1 +
drivers/nvme/host/pci.c | 1 +
3 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e8104871cbbf..ceb5d72d8c97 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3540,12 +3540,25 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
struct nvme_ns *ns;
mutex_lock(&ctrl->namespaces_mutex);
- list_for_each_entry(ns, &ctrl->namespaces, list)
+ list_for_each_entry(ns, &ctrl->namespaces, list) {
blk_mq_unquiesce_queue(ns->queue);
+ blk_mq_kick_requeue_list(ns->queue);
+ }
mutex_unlock(&ctrl->namespaces_mutex);
}
EXPORT_SYMBOL_GPL(nvme_start_queues);
+void nvme_sync_queues(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ list_for_each_entry(ns, &ctrl->namespaces, list)
+ blk_sync_queue(ns->queue);
+ mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_sync_queues);
+
int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
{
if (!ctrl->ops->reinit_request)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 8e4550fa08f8..e7786bc845fe 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -374,6 +374,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
void nvme_stop_queues(struct nvme_ctrl *ctrl);
void nvme_start_queues(struct nvme_ctrl *ctrl);
+void nvme_sync_queues(struct nvme_ctrl *ctrl);
void nvme_kill_queues(struct nvme_ctrl *ctrl);
void nvme_unfreeze(struct nvme_ctrl *ctrl);
void nvme_wait_freeze(struct nvme_ctrl *ctrl);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6fe7af00a1f4..9e3d7b293509 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2286,6 +2286,7 @@ static void nvme_reset_work(struct work_struct *work)
*/
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
nvme_dev_disable(dev, false);
+ nvme_sync_queues(&dev->ctrl);
/*
* Introduce RECONNECTING state from nvme-fc/rdma transports to mark the
--
2.14.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/3] nvme: Asynchronous driver commands API
2018-01-29 23:59 [PATCH 1/3] nvme: Sync queues on controller resets Keith Busch
@ 2018-01-29 23:59 ` Keith Busch
2018-01-30 7:05 ` Christoph Hellwig
2018-01-29 23:59 ` [PATCH 3/3] nvme-pci: Delete HMB asynchronously Keith Busch
2018-01-30 9:28 ` [PATCH 1/3] nvme: Sync queues on controller resets jianchao.wang
2 siblings, 1 reply; 5+ messages in thread
From: Keith Busch @ 2018-01-29 23:59 UTC (permalink / raw)
The driver has a recurring pattern of sending internally generated
non-synchronous commands. This patch just provides a common API to reduce
the repetition.
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
drivers/nvme/host/core.c | 31 +++++++++++++++++++------------
drivers/nvme/host/nvme.h | 3 +++
drivers/nvme/host/pci.c | 27 +++++++--------------------
3 files changed, 29 insertions(+), 32 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index ceb5d72d8c97..4bfb4ba6cd14 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -686,6 +686,22 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
}
EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
+int nvme_submit_async_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void *end_io_data, rq_end_io_fn *done,
+ unsigned timeout, blk_mq_req_flags_t flags)
+{
+ struct request *req;
+
+ req = nvme_alloc_request(q, cmd, flags, NVME_QID_ANY);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+ req->end_io_data = end_io_data;
+ blk_execute_rq_nowait(q, NULL, req, false, done);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_submit_async_cmd);
+
static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf,
unsigned len, u32 seed, bool write)
{
@@ -795,22 +811,13 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
static int nvme_keep_alive(struct nvme_ctrl *ctrl)
{
struct nvme_command c;
- struct request *rq;
memset(&c, 0, sizeof(c));
c.common.opcode = nvme_admin_keep_alive;
- rq = nvme_alloc_request(ctrl->admin_q, &c, BLK_MQ_REQ_RESERVED,
- NVME_QID_ANY);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
-
- rq->timeout = ctrl->kato * HZ;
- rq->end_io_data = ctrl;
-
- blk_execute_rq_nowait(rq->q, NULL, rq, 0, nvme_keep_alive_end_io);
-
- return 0;
+ return nvme_submit_async_cmd(ctrl->admin_q, &c, ctrl,
+ nvme_keep_alive_end_io, ctrl->kato * HZ,
+ BLK_MQ_REQ_RESERVED);
}
static void nvme_keep_alive_work(struct work_struct *work)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index e7786bc845fe..d0889dcd79d9 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -389,6 +389,9 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmd);
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buf, unsigned bufflen);
+int nvme_submit_async_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void *end_io_data, rq_end_io_fn *done,
+ unsigned timeout, blk_mq_req_flags_t flags);
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
union nvme_result *result, void *buffer, unsigned bufflen,
unsigned timeout, int qid, int at_head,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 9e3d7b293509..8fd0e87f0efe 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1185,7 +1185,6 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = iod->nvmeq;
struct nvme_dev *dev = nvmeq->dev;
- struct request *abort_req;
struct nvme_command cmd;
u32 csts = readl(dev->bar + NVME_REG_CSTS);
@@ -1259,17 +1258,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
"I/O %d QID %d timeout, aborting\n",
req->tag, nvmeq->qid);
- abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd,
- BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
- if (IS_ERR(abort_req)) {
+ if (nvme_submit_async_cmd(dev->ctrl.admin_q, &cmd, NULL, abort_endio,
+ ADMIN_TIMEOUT, BLK_MQ_REQ_NOWAIT)) {
atomic_inc(&dev->ctrl.abort_limit);
return BLK_EH_RESET_TIMER;
}
- abort_req->timeout = ADMIN_TIMEOUT;
- abort_req->end_io_data = NULL;
- blk_execute_rq_nowait(abort_req->q, NULL, abort_req, 0, abort_endio);
-
/*
* The aborted req will be completed on receiving the abort req.
* We enable the timer again. If hit twice, it'll cause a device reset,
@@ -1991,24 +1985,17 @@ static void nvme_del_cq_end(struct request *req, blk_status_t error)
static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
{
struct request_queue *q = nvmeq->dev->ctrl.admin_q;
- struct request *req;
struct nvme_command cmd;
+ rq_end_io_fn *done = (opcode == nvme_admin_delete_cq) ?
+ nvme_del_cq_end : nvme_del_queue_end;
+
memset(&cmd, 0, sizeof(cmd));
cmd.delete_queue.opcode = opcode;
cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid);
- req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->timeout = ADMIN_TIMEOUT;
- req->end_io_data = nvmeq;
-
- blk_execute_rq_nowait(q, NULL, req, false,
- opcode == nvme_admin_delete_cq ?
- nvme_del_cq_end : nvme_del_queue_end);
- return 0;
+ return nvme_submit_async_cmd(q, &cmd, nvmeq, done, ADMIN_TIMEOUT,
+ BLK_MQ_REQ_NOWAIT);
}
static void nvme_disable_io_queues(struct nvme_dev *dev)
--
2.14.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 3/3] nvme-pci: Delete HMB asynchronously
2018-01-29 23:59 [PATCH 1/3] nvme: Sync queues on controller resets Keith Busch
2018-01-29 23:59 ` [PATCH 2/3] nvme: Asynchronous driver commands API Keith Busch
@ 2018-01-29 23:59 ` Keith Busch
2018-01-30 9:28 ` [PATCH 1/3] nvme: Sync queues on controller resets jianchao.wang
2 siblings, 0 replies; 5+ messages in thread
From: Keith Busch @ 2018-01-29 23:59 UTC (permalink / raw)
Deleting the host memory buffer occurs in the controller disabling
path. The driver needs to be able to make forward progress even if
the controller can't produce a completion for that command. Issuing a
synchronous nvme command within the controller shutdown path could block
indefinitely if the controller is unable to provide a response for any
reason, so this patch sends the HMB teardown asynchronously.
Reported-by: Jianchao Wang <jianchao.w.wang at oracle.com>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
drivers/nvme/host/pci.c | 29 ++++++++++++++++++++++-------
1 file changed, 22 insertions(+), 7 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8fd0e87f0efe..9977b66d98cd 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1714,7 +1714,15 @@ static inline void nvme_release_cmb(struct nvme_dev *dev)
}
}
-static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
+static void nvme_hmb_endio(struct request *req, blk_status_t error)
+{
+ struct completion *c = req->end_io_data;
+
+ blk_mq_free_request(req);
+ complete(c);
+}
+
+static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits, struct completion *complete)
{
u64 dma_addr = dev->host_mem_descs_dma;
struct nvme_command c;
@@ -1730,6 +1738,11 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
c.features.dword14 = cpu_to_le32(upper_32_bits(dma_addr));
c.features.dword15 = cpu_to_le32(dev->nr_host_mem_descs);
+ if (complete)
+ return nvme_submit_async_cmd(dev->ctrl.admin_q, &c,
+ complete, nvme_hmb_endio,
+ ADMIN_TIMEOUT, BLK_MQ_REQ_NOWAIT);
+
ret = nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
if (ret) {
dev_warn(dev->ctrl.device,
@@ -1760,9 +1773,7 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
dev->nr_host_mem_descs = 0;
}
-static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
- u32 chunk_size)
-{
+static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, u32 chunk_size) {
struct nvme_host_mem_buf_desc *descs;
u32 max_entries, len;
dma_addr_t descs_dma;
@@ -1884,7 +1895,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
dev->host_mem_size >> ilog2(SZ_1M));
}
- ret = nvme_set_host_mem(dev, enable_bits);
+ ret = nvme_set_host_mem(dev, enable_bits, NULL);
if (ret)
nvme_free_host_mem(dev);
return ret;
@@ -2152,8 +2163,9 @@ static void nvme_pci_disable(struct nvme_dev *dev)
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
int i;
- bool dead = true;
+ bool dead = true, hmb_wait = false;
struct pci_dev *pdev = to_pci_dev(dev->dev);
+ DECLARE_COMPLETION_ONSTACK(hmb_complete);
mutex_lock(&dev->shutdown_lock);
if (pci_is_enabled(pdev)) {
@@ -2181,13 +2193,16 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
* but I'd rather be safe than sorry..
*/
if (dev->host_mem_descs)
- nvme_set_host_mem(dev, 0);
+ hmb_wait = !nvme_set_host_mem(dev, 0, &hmb_complete);
}
nvme_stop_queues(&dev->ctrl);
if (!dead) {
nvme_disable_io_queues(dev);
+ if (hmb_wait)
+ wait_for_completion_timeout(&hmb_complete,
+ ADMIN_TIMEOUT);
nvme_disable_admin_queue(dev, shutdown);
}
for (i = dev->ctrl.queue_count - 1; i >= 0; i--)
--
2.14.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/3] nvme: Asynchronous driver commands API
2018-01-29 23:59 ` [PATCH 2/3] nvme: Asynchronous driver commands API Keith Busch
@ 2018-01-30 7:05 ` Christoph Hellwig
0 siblings, 0 replies; 5+ messages in thread
From: Christoph Hellwig @ 2018-01-30 7:05 UTC (permalink / raw)
As pointed our by Roland we'll need to make sure req->cmd is allocated
dynamically for async commands. I suspect the best way to handle that
is to turn it into an actual embedded structure instead of a pointer.
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/3] nvme: Sync queues on controller resets
2018-01-29 23:59 [PATCH 1/3] nvme: Sync queues on controller resets Keith Busch
2018-01-29 23:59 ` [PATCH 2/3] nvme: Asynchronous driver commands API Keith Busch
2018-01-29 23:59 ` [PATCH 3/3] nvme-pci: Delete HMB asynchronously Keith Busch
@ 2018-01-30 9:28 ` jianchao.wang
2 siblings, 0 replies; 5+ messages in thread
From: jianchao.wang @ 2018-01-30 9:28 UTC (permalink / raw)
Hi Keith
Thanks for your patch.
That's really appreciated.
On 01/30/2018 07:59 AM, Keith Busch wrote:
> This patch has the nvme pci driver synchronize request queues to ensure
> that starting up the controller is not racing with a previously running
> timeout handler.
>
> Reported-by: Jianchao Wang <jianchao.w.wang at oracle.com>
> Signed-off-by: Keith Busch <keith.busch at intel.com>
> ---
> drivers/nvme/host/core.c | 15 ++++++++++++++-
> drivers/nvme/host/nvme.h | 1 +
> drivers/nvme/host/pci.c | 1 +
> 3 files changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index e8104871cbbf..ceb5d72d8c97 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3540,12 +3540,25 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
> struct nvme_ns *ns;
>
> mutex_lock(&ctrl->namespaces_mutex);
> - list_for_each_entry(ns, &ctrl->namespaces, list)
> + list_for_each_entry(ns, &ctrl->namespaces, list) {
> blk_mq_unquiesce_queue(ns->queue);
> + blk_mq_kick_requeue_list(ns->queue);
> + }
> mutex_unlock(&ctrl->namespaces_mutex);
> }
> EXPORT_SYMBOL_GPL(nvme_start_queues);
>
> +void nvme_sync_queues(struct nvme_ctrl *ctrl)
> +{
> + struct nvme_ns *ns;
> +
> + mutex_lock(&ctrl->namespaces_mutex);
> + list_for_each_entry(ns, &ctrl->namespaces, list)
> + blk_sync_queue(ns->queue);
> + mutex_unlock(&ctrl->namespaces_mutex);
> +}
> +EXPORT_SYMBOL_GPL(nvme_sync_queues);
> +
> int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
> {
> if (!ctrl->ops->reinit_request)
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 8e4550fa08f8..e7786bc845fe 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -374,6 +374,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
>
> void nvme_stop_queues(struct nvme_ctrl *ctrl);
> void nvme_start_queues(struct nvme_ctrl *ctrl);
> +void nvme_sync_queues(struct nvme_ctrl *ctrl);
> void nvme_kill_queues(struct nvme_ctrl *ctrl);
> void nvme_unfreeze(struct nvme_ctrl *ctrl);
> void nvme_wait_freeze(struct nvme_ctrl *ctrl);
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 6fe7af00a1f4..9e3d7b293509 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -2286,6 +2286,7 @@ static void nvme_reset_work(struct work_struct *work)
> */
> if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
> nvme_dev_disable(dev, false);
> + nvme_sync_queues(&dev->ctrl);
There could be a circular pattern here. Please consider the following scenario:
timeout_work context reset_work context
nvme_timeout nvme_reset_work
-> nvme_dev_disable -> nvme_sync_queues // hold namespace_mutex
-> nvme_stop_queues -> blk_sync_queue
-> require namespaces_mutex -> cancel_work_sync(&q->timeout_work)
On the other hand, the blk_mq_kick_requeue_list() should be also added in nvme_kill_queues
for the case of queue_count < 2
Thanks
Jianchao
>
> /*
> * Introduce RECONNECTING state from nvme-fc/rdma transports to mark the
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2018-01-30 9:28 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-01-29 23:59 [PATCH 1/3] nvme: Sync queues on controller resets Keith Busch
2018-01-29 23:59 ` [PATCH 2/3] nvme: Asynchronous driver commands API Keith Busch
2018-01-30 7:05 ` Christoph Hellwig
2018-01-29 23:59 ` [PATCH 3/3] nvme-pci: Delete HMB asynchronously Keith Busch
2018-01-30 9:28 ` [PATCH 1/3] nvme: Sync queues on controller resets jianchao.wang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).