[PATCH 1/3] nvme: Sync queues on controller resets

linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/3] nvme: Sync queues on controller resets
@ 2018-01-29 23:59 Keith Busch
  2018-01-29 23:59 ` [PATCH 2/3] nvme: Asynchronous driver commands API Keith Busch
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Keith Busch @ 2018-01-29 23:59 UTC (permalink / raw)


This patch has the nvme pci driver synchronize request queues to ensure
that starting up the controller is not racing with a previously running
timeout handler.

Reported-by: Jianchao Wang <jianchao.w.wang at oracle.com>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/core.c | 15 ++++++++++++++-
 drivers/nvme/host/nvme.h |  1 +
 drivers/nvme/host/pci.c  |  1 +
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e8104871cbbf..ceb5d72d8c97 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3540,12 +3540,25 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
 	struct nvme_ns *ns;
 
 	mutex_lock(&ctrl->namespaces_mutex);
-	list_for_each_entry(ns, &ctrl->namespaces, list)
+	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		blk_mq_unquiesce_queue(ns->queue);
+		blk_mq_kick_requeue_list(ns->queue);
+	}
 	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_start_queues);
 
+void nvme_sync_queues(struct nvme_ctrl *ctrl)
+{
+	struct nvme_ns *ns;
+
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list)
+		blk_sync_queue(ns->queue);
+	mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_sync_queues);
+
 int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
 {
 	if (!ctrl->ops->reinit_request)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 8e4550fa08f8..e7786bc845fe 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -374,6 +374,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 
 void nvme_stop_queues(struct nvme_ctrl *ctrl);
 void nvme_start_queues(struct nvme_ctrl *ctrl);
+void nvme_sync_queues(struct nvme_ctrl *ctrl);
 void nvme_kill_queues(struct nvme_ctrl *ctrl);
 void nvme_unfreeze(struct nvme_ctrl *ctrl);
 void nvme_wait_freeze(struct nvme_ctrl *ctrl);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6fe7af00a1f4..9e3d7b293509 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2286,6 +2286,7 @@ static void nvme_reset_work(struct work_struct *work)
 	 */
 	if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
 		nvme_dev_disable(dev, false);
+	nvme_sync_queues(&dev->ctrl);
 
 	/*
 	 * Introduce RECONNECTING state from nvme-fc/rdma transports to mark the
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] nvme: Asynchronous driver commands API
  2018-01-29 23:59 [PATCH 1/3] nvme: Sync queues on controller resets Keith Busch
@ 2018-01-29 23:59 ` Keith Busch
  2018-01-30  7:05   ` Christoph Hellwig
  2018-01-29 23:59 ` [PATCH 3/3] nvme-pci: Delete HMB asynchronously Keith Busch
  2018-01-30  9:28 ` [PATCH 1/3] nvme: Sync queues on controller resets jianchao.wang
  2 siblings, 1 reply; 5+ messages in thread
From: Keith Busch @ 2018-01-29 23:59 UTC (permalink / raw)


The driver has a recurring pattern of sending internally generated
non-synchronous commands. This patch just provides a common API to reduce
the repetition.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/core.c | 31 +++++++++++++++++++------------
 drivers/nvme/host/nvme.h |  3 +++
 drivers/nvme/host/pci.c  | 27 +++++++--------------------
 3 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index ceb5d72d8c97..4bfb4ba6cd14 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -686,6 +686,22 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 }
 EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
 
+int nvme_submit_async_cmd(struct request_queue *q, struct nvme_command *cmd,
+			  void *end_io_data, rq_end_io_fn *done,
+			  unsigned timeout, blk_mq_req_flags_t flags)
+{
+	struct request *req;
+
+	req = nvme_alloc_request(q, cmd, flags, NVME_QID_ANY);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+	req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+	req->end_io_data = end_io_data;
+	blk_execute_rq_nowait(q, NULL, req, false, done);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_submit_async_cmd);
+
 static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf,
 		unsigned len, u32 seed, bool write)
 {
@@ -795,22 +811,13 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
 static int nvme_keep_alive(struct nvme_ctrl *ctrl)
 {
 	struct nvme_command c;
-	struct request *rq;
 
 	memset(&c, 0, sizeof(c));
 	c.common.opcode = nvme_admin_keep_alive;
 
-	rq = nvme_alloc_request(ctrl->admin_q, &c, BLK_MQ_REQ_RESERVED,
-			NVME_QID_ANY);
-	if (IS_ERR(rq))
-		return PTR_ERR(rq);
-
-	rq->timeout = ctrl->kato * HZ;
-	rq->end_io_data = ctrl;
-
-	blk_execute_rq_nowait(rq->q, NULL, rq, 0, nvme_keep_alive_end_io);
-
-	return 0;
+	return nvme_submit_async_cmd(ctrl->admin_q, &c, ctrl,
+				     nvme_keep_alive_end_io, ctrl->kato * HZ,
+				     BLK_MQ_REQ_RESERVED);
 }
 
 static void nvme_keep_alive_work(struct work_struct *work)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index e7786bc845fe..d0889dcd79d9 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -389,6 +389,9 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmd);
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void *buf, unsigned bufflen);
+int nvme_submit_async_cmd(struct request_queue *q, struct nvme_command *cmd,
+			  void *end_io_data, rq_end_io_fn *done,
+			  unsigned timeout, blk_mq_req_flags_t flags);
 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		union nvme_result *result, void *buffer, unsigned bufflen,
 		unsigned timeout, int qid, int at_head,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 9e3d7b293509..8fd0e87f0efe 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1185,7 +1185,6 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct nvme_queue *nvmeq = iod->nvmeq;
 	struct nvme_dev *dev = nvmeq->dev;
-	struct request *abort_req;
 	struct nvme_command cmd;
 	u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
@@ -1259,17 +1258,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 		"I/O %d QID %d timeout, aborting\n",
 		 req->tag, nvmeq->qid);
 
-	abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd,
-			BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
-	if (IS_ERR(abort_req)) {
+	if (nvme_submit_async_cmd(dev->ctrl.admin_q, &cmd, NULL, abort_endio,
+				  ADMIN_TIMEOUT, BLK_MQ_REQ_NOWAIT)) {
 		atomic_inc(&dev->ctrl.abort_limit);
 		return BLK_EH_RESET_TIMER;
 	}
 
-	abort_req->timeout = ADMIN_TIMEOUT;
-	abort_req->end_io_data = NULL;
-	blk_execute_rq_nowait(abort_req->q, NULL, abort_req, 0, abort_endio);
-
 	/*
 	 * The aborted req will be completed on receiving the abort req.
 	 * We enable the timer again. If hit twice, it'll cause a device reset,
@@ -1991,24 +1985,17 @@ static void nvme_del_cq_end(struct request *req, blk_status_t error)
 static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
 {
 	struct request_queue *q = nvmeq->dev->ctrl.admin_q;
-	struct request *req;
 	struct nvme_command cmd;
 
+	rq_end_io_fn *done = (opcode == nvme_admin_delete_cq) ?
+					nvme_del_cq_end : nvme_del_queue_end;
+
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.delete_queue.opcode = opcode;
 	cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid);
 
-	req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
-	req->timeout = ADMIN_TIMEOUT;
-	req->end_io_data = nvmeq;
-
-	blk_execute_rq_nowait(q, NULL, req, false,
-			opcode == nvme_admin_delete_cq ?
-				nvme_del_cq_end : nvme_del_queue_end);
-	return 0;
+	return nvme_submit_async_cmd(q, &cmd, nvmeq, done, ADMIN_TIMEOUT,
+				     BLK_MQ_REQ_NOWAIT);
 }
 
 static void nvme_disable_io_queues(struct nvme_dev *dev)
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/3] nvme-pci: Delete HMB asynchronously
  2018-01-29 23:59 [PATCH 1/3] nvme: Sync queues on controller resets Keith Busch
  2018-01-29 23:59 ` [PATCH 2/3] nvme: Asynchronous driver commands API Keith Busch
@ 2018-01-29 23:59 ` Keith Busch
  2018-01-30  9:28 ` [PATCH 1/3] nvme: Sync queues on controller resets jianchao.wang
  2 siblings, 0 replies; 5+ messages in thread
From: Keith Busch @ 2018-01-29 23:59 UTC (permalink / raw)


Deleting the host memory buffer occurs in the controller disabling
path. The driver needs to be able to make forward progress even if
the controller can't produce a completion for that command. Issuing a
synchronous nvme command within the controller shutdown path could block
indefinitely if the controller is unable to provide a response for any
reason, so this patch sends the HMB teardown asynchronously.

Reported-by: Jianchao Wang <jianchao.w.wang at oracle.com>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/pci.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8fd0e87f0efe..9977b66d98cd 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1714,7 +1714,15 @@ static inline void nvme_release_cmb(struct nvme_dev *dev)
 	}
 }
 
-static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
+static void nvme_hmb_endio(struct request *req, blk_status_t error)
+{
+	struct completion *c = req->end_io_data;
+
+	blk_mq_free_request(req);
+	complete(c);
+}
+
+static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits, struct completion *complete)
 {
 	u64 dma_addr = dev->host_mem_descs_dma;
 	struct nvme_command c;
@@ -1730,6 +1738,11 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
 	c.features.dword14	= cpu_to_le32(upper_32_bits(dma_addr));
 	c.features.dword15	= cpu_to_le32(dev->nr_host_mem_descs);
 
+	if (complete)
+		return nvme_submit_async_cmd(dev->ctrl.admin_q, &c,
+					     complete, nvme_hmb_endio,
+					     ADMIN_TIMEOUT, BLK_MQ_REQ_NOWAIT);
+
 	ret = nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
 	if (ret) {
 		dev_warn(dev->ctrl.device,
@@ -1760,9 +1773,7 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
 	dev->nr_host_mem_descs = 0;
 }
 
-static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
-		u32 chunk_size)
-{
+static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, u32 chunk_size) {
 	struct nvme_host_mem_buf_desc *descs;
 	u32 max_entries, len;
 	dma_addr_t descs_dma;
@@ -1884,7 +1895,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
 			dev->host_mem_size >> ilog2(SZ_1M));
 	}
 
-	ret = nvme_set_host_mem(dev, enable_bits);
+	ret = nvme_set_host_mem(dev, enable_bits, NULL);
 	if (ret)
 		nvme_free_host_mem(dev);
 	return ret;
@@ -2152,8 +2163,9 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
 	int i;
-	bool dead = true;
+	bool dead = true, hmb_wait = false;
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	DECLARE_COMPLETION_ONSTACK(hmb_complete);
 
 	mutex_lock(&dev->shutdown_lock);
 	if (pci_is_enabled(pdev)) {
@@ -2181,13 +2193,16 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 		 * but I'd rather be safe than sorry..
 		 */
 		if (dev->host_mem_descs)
-			nvme_set_host_mem(dev, 0);
+			hmb_wait = !nvme_set_host_mem(dev, 0, &hmb_complete);
 
 	}
 	nvme_stop_queues(&dev->ctrl);
 
 	if (!dead) {
 		nvme_disable_io_queues(dev);
+		if (hmb_wait)
+			wait_for_completion_timeout(&hmb_complete,
+						    ADMIN_TIMEOUT);
 		nvme_disable_admin_queue(dev, shutdown);
 	}
 	for (i = dev->ctrl.queue_count - 1; i >= 0; i--)
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] nvme: Asynchronous driver commands API
  2018-01-29 23:59 ` [PATCH 2/3] nvme: Asynchronous driver commands API Keith Busch
@ 2018-01-30  7:05   ` Christoph Hellwig
  0 siblings, 0 replies; 5+ messages in thread
From: Christoph Hellwig @ 2018-01-30  7:05 UTC (permalink / raw)


As pointed our by Roland we'll need to make sure req->cmd is allocated
dynamically for async commands.  I suspect the best way to handle that
is to turn it into an actual embedded structure instead of a pointer.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/3] nvme: Sync queues on controller resets
  2018-01-29 23:59 [PATCH 1/3] nvme: Sync queues on controller resets Keith Busch
  2018-01-29 23:59 ` [PATCH 2/3] nvme: Asynchronous driver commands API Keith Busch
  2018-01-29 23:59 ` [PATCH 3/3] nvme-pci: Delete HMB asynchronously Keith Busch
@ 2018-01-30  9:28 ` jianchao.wang
  2 siblings, 0 replies; 5+ messages in thread
From: jianchao.wang @ 2018-01-30  9:28 UTC (permalink / raw)


Hi Keith

Thanks for your patch.
That's really appreciated.

On 01/30/2018 07:59 AM, Keith Busch wrote:
> This patch has the nvme pci driver synchronize request queues to ensure
> that starting up the controller is not racing with a previously running
> timeout handler.
> 
> Reported-by: Jianchao Wang <jianchao.w.wang at oracle.com>
> Signed-off-by: Keith Busch <keith.busch at intel.com>
> ---
>  drivers/nvme/host/core.c | 15 ++++++++++++++-
>  drivers/nvme/host/nvme.h |  1 +
>  drivers/nvme/host/pci.c  |  1 +
>  3 files changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index e8104871cbbf..ceb5d72d8c97 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3540,12 +3540,25 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
>  	struct nvme_ns *ns;
>  
>  	mutex_lock(&ctrl->namespaces_mutex);
> -	list_for_each_entry(ns, &ctrl->namespaces, list)
> +	list_for_each_entry(ns, &ctrl->namespaces, list) {
>  		blk_mq_unquiesce_queue(ns->queue);
> +		blk_mq_kick_requeue_list(ns->queue);
> +	}
>  	mutex_unlock(&ctrl->namespaces_mutex);
>  }
>  EXPORT_SYMBOL_GPL(nvme_start_queues);
>  
> +void nvme_sync_queues(struct nvme_ctrl *ctrl)
> +{
> +	struct nvme_ns *ns;
> +
> +	mutex_lock(&ctrl->namespaces_mutex);
> +	list_for_each_entry(ns, &ctrl->namespaces, list)
> +		blk_sync_queue(ns->queue);
> +	mutex_unlock(&ctrl->namespaces_mutex);
> +}
> +EXPORT_SYMBOL_GPL(nvme_sync_queues);
> +
>  int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
>  {
>  	if (!ctrl->ops->reinit_request)
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 8e4550fa08f8..e7786bc845fe 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -374,6 +374,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
>  
>  void nvme_stop_queues(struct nvme_ctrl *ctrl);
>  void nvme_start_queues(struct nvme_ctrl *ctrl);
> +void nvme_sync_queues(struct nvme_ctrl *ctrl);
>  void nvme_kill_queues(struct nvme_ctrl *ctrl);
>  void nvme_unfreeze(struct nvme_ctrl *ctrl);
>  void nvme_wait_freeze(struct nvme_ctrl *ctrl);
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 6fe7af00a1f4..9e3d7b293509 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -2286,6 +2286,7 @@ static void nvme_reset_work(struct work_struct *work)
>  	 */
>  	if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
>  		nvme_dev_disable(dev, false);
> +	nvme_sync_queues(&dev->ctrl);

There could be a circular pattern here. Please consider the following scenario:

timeout_work context                    reset_work context
nvme_timeout                            nvme_reset_work
  -> nvme_dev_disable                     -> nvme_sync_queues // hold namespace_mutex
    -> nvme_stop_queues                     -> blk_sync_queue
      -> require namespaces_mutex               -> cancel_work_sync(&q->timeout_work)

On the other hand, the blk_mq_kick_requeue_list() should be also added in nvme_kill_queues
for the case of queue_count < 2

Thanks
Jianchao

>  
>  	/*
>  	 * Introduce RECONNECTING state from nvme-fc/rdma transports to mark the
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-01-30  9:28 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-01-29 23:59 [PATCH 1/3] nvme: Sync queues on controller resets Keith Busch
2018-01-29 23:59 ` [PATCH 2/3] nvme: Asynchronous driver commands API Keith Busch
2018-01-30  7:05   ` Christoph Hellwig
2018-01-29 23:59 ` [PATCH 3/3] nvme-pci: Delete HMB asynchronously Keith Busch
2018-01-30  9:28 ` [PATCH 1/3] nvme: Sync queues on controller resets jianchao.wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).