linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] NVMe updates
@ 2016-01-12 21:41 Keith Busch
  2016-01-12 21:41 ` [PATCH 1/2] NVMe: IO queue deletion re-write Keith Busch
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Keith Busch @ 2016-01-12 21:41 UTC (permalink / raw)


These are the last two with the folded in suggestions from reviewers.

The first 3 patches in the series are still here, and some of this based
on updates from Christoph and Sagi.

  http://lists.infradead.org/pipermail/linux-nvme/2016-January/003561.html

Keith Busch (2):
  NVMe: IO queue deletion re-write
  NVMe: Shutdown controller only for power-off

 drivers/nvme/host/pci.c | 291 +++++++++++++++++-------------------------------
 1 file changed, 100 insertions(+), 191 deletions(-)

-- 
2.6.2.307.g37023ba

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] NVMe: IO queue deletion re-write
  2016-01-12 21:41 [PATCH 0/2] NVMe updates Keith Busch
@ 2016-01-12 21:41 ` Keith Busch
  2016-01-12 21:41 ` [PATCH 2/2] NVMe: Shutdown controller only for power-off Keith Busch
  2016-01-12 21:51 ` [PATCH 0/2] NVMe updates Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Keith Busch @ 2016-01-12 21:41 UTC (permalink / raw)


The nvme driver deletes IO queues asynchronously since this operation
may potentially take an undesirable amount of time with a large number
of queues if done serially.

The driver used to manage coordinating asynchronous deletions. This
patch simplifies that by leveraging the block layer rather than using
kthread workers and chaining more complicated callbacks.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/pci.c | 251 ++++++++++++++++--------------------------------
 1 file changed, 81 insertions(+), 170 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index a7e5499..0fdef0a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -89,13 +89,6 @@ static void nvme_process_cq(struct nvme_queue *nvmeq);
 static void nvme_remove_dead_ctrl(struct nvme_dev *dev);
 static void nvme_dev_shutdown(struct nvme_dev *dev);
 
-struct async_cmd_info {
-	struct kthread_work work;
-	struct kthread_worker *worker;
-	int status;
-	void *ctx;
-};
-
 /*
  * Represents an NVM Express device.  Each nvme_dev is a PCI function.
  */
@@ -125,9 +118,11 @@ struct nvme_dev {
 	u64 cmb_size;
 	u32 cmbsz;
 	unsigned long flags;
+
 #define NVME_CTRL_RESETTING    0
 
 	struct nvme_ctrl ctrl;
+	struct completion ioq_wait;
 };
 
 static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
@@ -159,7 +154,6 @@ struct nvme_queue {
 	u16 qid;
 	u8 cq_phase;
 	u8 cqe_seen;
-	struct async_cmd_info cmdinfo;
 };
 
 /*
@@ -844,15 +838,6 @@ static void nvme_submit_async_event(struct nvme_dev *dev)
 	__nvme_submit_cmd(dev->queues[0], &c);
 }
 
-static void async_cmd_info_endio(struct request *req, int error)
-{
-	struct async_cmd_info *cmdinfo = req->end_io_data;
-
-	cmdinfo->status = req->errors;
-	queue_kthread_work(cmdinfo->worker, &cmdinfo->work);
-	blk_mq_free_request(req);
-}
-
 static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
 {
 	struct nvme_command c;
@@ -1595,6 +1580,84 @@ static void nvme_dev_scan(struct work_struct *work)
 	nvme_set_irq_hints(dev);
 }
 
+static void nvme_del_queue_end(struct request *req, int error)
+{
+	struct nvme_queue *nvmeq = req->end_io_data;
+
+	blk_mq_free_request(req);
+	complete(&nvmeq->dev->ioq_wait);
+}
+
+static void nvme_del_cq_end(struct request *req, int error)
+{
+	struct nvme_queue *nvmeq = req->end_io_data;
+
+	if (!error) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&nvmeq->q_lock, flags);
+		nvme_process_cq(nvmeq);
+		spin_unlock_irqrestore(&nvmeq->q_lock, flags);
+	}
+
+	nvme_del_queue_end(req, error);
+}
+
+static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
+{
+	struct request_queue *q = nvmeq->dev->ctrl.admin_q;
+	struct request *req;
+	struct nvme_command cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.delete_queue.opcode = opcode;
+	cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid);
+
+	req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	req->timeout = ADMIN_TIMEOUT;
+	req->end_io_data = nvmeq;
+
+	blk_execute_rq_nowait(q, NULL, req, false,
+			opcode == nvme_admin_delete_cq ?
+				nvme_del_cq_end : nvme_del_queue_end);
+	return 0;
+}
+
+static void nvme_disable_io_queues(struct nvme_dev *dev)
+{
+	int pass;
+	unsigned long timeout;
+	u8 opcode = nvme_admin_delete_sq;
+
+	for (pass = 0; pass < 2; pass++) {
+		int sent = 0, i = dev->queue_count - 1;
+
+		reinit_completion(&dev->ioq_wait);
+ retry:
+		timeout = ADMIN_TIMEOUT;
+		for (; i > 0; i--) {
+			struct nvme_queue *nvmeq = dev->queues[i];
+
+			if (!pass)
+				nvme_suspend_queue(nvmeq);
+			if (nvme_delete_queue(nvmeq, opcode))
+				break;
+			++sent;
+		}
+		while (sent--) {
+			timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout);
+			if (timeout == 0)
+				return;
+			if (i)
+				goto retry;
+		}
+		opcode = nvme_admin_delete_cq;
+	}
+}
+
 /*
  * Return: error value if an error occurred setting up the queues or calling
  * Identify Device.  0 if these succeeded, even if adding some of the
@@ -1706,159 +1769,6 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
 	}
 }
 
-struct nvme_delq_ctx {
-	struct task_struct *waiter;
-	struct kthread_worker *worker;
-	atomic_t refcount;
-};
-
-static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev)
-{
-	dq->waiter = current;
-	mb();
-
-	for (;;) {
-		set_current_state(TASK_KILLABLE);
-		if (!atomic_read(&dq->refcount))
-			break;
-		if (!schedule_timeout(ADMIN_TIMEOUT) ||
-					fatal_signal_pending(current)) {
-			/*
-			 * Disable the controller first since we can't trust it
-			 * at this point, but leave the admin queue enabled
-			 * until all queue deletion requests are flushed.
-			 * FIXME: This may take a while if there are more h/w
-			 * queues than admin tags.
-			 */
-			set_current_state(TASK_RUNNING);
-			nvme_disable_ctrl(&dev->ctrl,
-				lo_hi_readq(dev->bar + NVME_REG_CAP));
-			nvme_clear_queue(dev->queues[0]);
-			flush_kthread_worker(dq->worker);
-			nvme_disable_queue(dev, 0);
-			return;
-		}
-	}
-	set_current_state(TASK_RUNNING);
-}
-
-static void nvme_put_dq(struct nvme_delq_ctx *dq)
-{
-	atomic_dec(&dq->refcount);
-	if (dq->waiter)
-		wake_up_process(dq->waiter);
-}
-
-static struct nvme_delq_ctx *nvme_get_dq(struct nvme_delq_ctx *dq)
-{
-	atomic_inc(&dq->refcount);
-	return dq;
-}
-
-static void nvme_del_queue_end(struct nvme_queue *nvmeq)
-{
-	struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx;
-	nvme_put_dq(dq);
-
-	spin_lock_irq(&nvmeq->q_lock);
-	nvme_process_cq(nvmeq);
-	spin_unlock_irq(&nvmeq->q_lock);
-}
-
-static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode,
-						kthread_work_func_t fn)
-{
-	struct request *req;
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
-	c.delete_queue.opcode = opcode;
-	c.delete_queue.qid = cpu_to_le16(nvmeq->qid);
-
-	init_kthread_work(&nvmeq->cmdinfo.work, fn);
-
-	req = nvme_alloc_request(nvmeq->dev->ctrl.admin_q, &c, 0);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
-	req->timeout = ADMIN_TIMEOUT;
-	req->end_io_data = &nvmeq->cmdinfo;
-	blk_execute_rq_nowait(req->q, NULL, req, 0, async_cmd_info_endio);
-	return 0;
-}
-
-static void nvme_del_cq_work_handler(struct kthread_work *work)
-{
-	struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
-							cmdinfo.work);
-	nvme_del_queue_end(nvmeq);
-}
-
-static int nvme_delete_cq(struct nvme_queue *nvmeq)
-{
-	return adapter_async_del_queue(nvmeq, nvme_admin_delete_cq,
-						nvme_del_cq_work_handler);
-}
-
-static void nvme_del_sq_work_handler(struct kthread_work *work)
-{
-	struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
-							cmdinfo.work);
-	int status = nvmeq->cmdinfo.status;
-
-	if (!status)
-		status = nvme_delete_cq(nvmeq);
-	if (status)
-		nvme_del_queue_end(nvmeq);
-}
-
-static int nvme_delete_sq(struct nvme_queue *nvmeq)
-{
-	return adapter_async_del_queue(nvmeq, nvme_admin_delete_sq,
-						nvme_del_sq_work_handler);
-}
-
-static void nvme_del_queue_start(struct kthread_work *work)
-{
-	struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
-							cmdinfo.work);
-	if (nvme_delete_sq(nvmeq))
-		nvme_del_queue_end(nvmeq);
-}
-
-static void nvme_disable_io_queues(struct nvme_dev *dev)
-{
-	int i;
-	DEFINE_KTHREAD_WORKER_ONSTACK(worker);
-	struct nvme_delq_ctx dq;
-	struct task_struct *kworker_task = kthread_run(kthread_worker_fn,
-					&worker, "nvme%d", dev->ctrl.instance);
-
-	if (IS_ERR(kworker_task)) {
-		dev_err(dev->dev,
-			"Failed to create queue del task\n");
-		for (i = dev->queue_count - 1; i > 0; i--)
-			nvme_disable_queue(dev, i);
-		return;
-	}
-
-	dq.waiter = NULL;
-	atomic_set(&dq.refcount, 0);
-	dq.worker = &worker;
-	for (i = dev->queue_count - 1; i > 0; i--) {
-		struct nvme_queue *nvmeq = dev->queues[i];
-
-		if (nvme_suspend_queue(nvmeq))
-			continue;
-		nvmeq->cmdinfo.ctx = nvme_get_dq(&dq);
-		nvmeq->cmdinfo.worker = dq.worker;
-		init_kthread_work(&nvmeq->cmdinfo.work, nvme_del_queue_start);
-		queue_kthread_work(dq.worker, &nvmeq->cmdinfo.work);
-	}
-	nvme_wait_dq(&dq, dev);
-	kthread_stop(kworker_task);
-}
-
 static int nvme_dev_list_add(struct nvme_dev *dev)
 {
 	bool start_thread = false;
@@ -2141,6 +2051,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	INIT_WORK(&dev->reset_work, nvme_reset_work);
 	INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
 	mutex_init(&dev->shutdown_lock);
+	init_completion(&dev->ioq_wait);
 
 	result = nvme_setup_prp_pools(dev);
 	if (result)
-- 
2.6.2.307.g37023ba

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] NVMe: Shutdown controller only for power-off
  2016-01-12 21:41 [PATCH 0/2] NVMe updates Keith Busch
  2016-01-12 21:41 ` [PATCH 1/2] NVMe: IO queue deletion re-write Keith Busch
@ 2016-01-12 21:41 ` Keith Busch
  2016-01-12 21:51 ` [PATCH 0/2] NVMe updates Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Keith Busch @ 2016-01-12 21:41 UTC (permalink / raw)


We don't need to shutdown a controller for a reset. A controller in a
shutdown state may take longer to become ready than one that was simply
disabled. This patch has the driver shut down a controller only if the
device is about to be powered off or being removed. When taking the
controller down for a reset reason, the controller will be disabled
instead.

Function names have been updated in this patch to reflect their changed
semantics.

Signed-off-by: Keith Busch <keith.busch at intel.com>
Reviewed-by: Sagi Grimberg <sagig at mellanox.com>
---
 drivers/nvme/host/pci.c | 40 +++++++++++++++++++---------------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 0fdef0a..0b67e9e 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -87,7 +87,7 @@ struct nvme_queue;
 static int nvme_reset(struct nvme_dev *dev);
 static void nvme_process_cq(struct nvme_queue *nvmeq);
 static void nvme_remove_dead_ctrl(struct nvme_dev *dev);
-static void nvme_dev_shutdown(struct nvme_dev *dev);
+static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
 
 /*
  * Represents an NVM Express device.  Each nvme_dev is a PCI function.
@@ -932,7 +932,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 		dev_warn(dev->dev,
 			 "I/O %d QID %d timeout, disable controller\n",
 			 req->tag, nvmeq->qid);
-		nvme_dev_shutdown(dev);
+		nvme_dev_disable(dev, false);
 		req->errors = NVME_SC_CANCELLED;
 		return BLK_EH_HANDLED;
 	}
@@ -946,7 +946,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 		dev_warn(dev->dev,
 			 "I/O %d QID %d timeout, reset controller\n",
 			 req->tag, nvmeq->qid);
-		nvme_dev_shutdown(dev);
+		nvme_dev_disable(dev, false);
 		queue_work(nvme_workq, &dev->reset_work);
 
 		/*
@@ -1065,21 +1065,20 @@ static void nvme_clear_queue(struct nvme_queue *nvmeq)
 	spin_unlock_irq(&nvmeq->q_lock);
 }
 
-static void nvme_disable_queue(struct nvme_dev *dev, int qid)
+static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
 {
-	struct nvme_queue *nvmeq = dev->queues[qid];
+	struct nvme_queue *nvmeq = dev->queues[0];
 
 	if (!nvmeq)
 		return;
 	if (nvme_suspend_queue(nvmeq))
 		return;
 
-	/* Don't tell the adapter to delete the admin queue.
-	 * Don't tell a removed adapter to delete IO queues. */
-	if (qid && readl(dev->bar + NVME_REG_CSTS) != -1) {
-		adapter_delete_sq(dev, qid);
-		adapter_delete_cq(dev, qid);
-	}
+	if (shutdown)
+		nvme_shutdown_ctrl(&dev->ctrl);
+	else
+		nvme_disable_ctrl(&dev->ctrl, lo_hi_readq(
+						dev->bar + NVME_REG_CAP));
 
 	spin_lock_irq(&nvmeq->q_lock);
 	nvme_process_cq(nvmeq);
@@ -1813,7 +1812,7 @@ static void nvme_dev_list_remove(struct nvme_dev *dev)
 		kthread_stop(tmp);
 }
 
-static void nvme_dev_shutdown(struct nvme_dev *dev)
+static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
 	int i;
 	u32 csts = -1;
@@ -1832,8 +1831,7 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
 		}
 	} else {
 		nvme_disable_io_queues(dev);
-		nvme_shutdown_ctrl(&dev->ctrl);
-		nvme_disable_queue(dev, 0);
+		nvme_disable_admin_queue(dev, shutdown);
 	}
 	nvme_dev_unmap(dev);
 
@@ -1892,7 +1890,7 @@ static void nvme_reset_work(struct work_struct *work)
 	 * moving on.
 	 */
 	if (dev->bar)
-		nvme_dev_shutdown(dev);
+		nvme_dev_disable(dev, false);
 
 	set_bit(NVME_CTRL_RESETTING, &dev->flags);
 
@@ -1946,7 +1944,7 @@ static void nvme_reset_work(struct work_struct *work)
 	dev->ctrl.admin_q = NULL;
 	dev->queues[0]->tags = NULL;
  disable:
-	nvme_disable_queue(dev, 0);
+	nvme_disable_admin_queue(dev, false);
  unmap:
 	nvme_dev_unmap(dev);
  out:
@@ -2081,7 +2079,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
 	struct nvme_dev *dev = pci_get_drvdata(pdev);
 
 	if (prepare)
-		nvme_dev_shutdown(dev);
+		nvme_dev_disable(dev, false);
 	else
 		queue_work(nvme_workq, &dev->reset_work);
 }
@@ -2089,7 +2087,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
 static void nvme_shutdown(struct pci_dev *pdev)
 {
 	struct nvme_dev *dev = pci_get_drvdata(pdev);
-	nvme_dev_shutdown(dev);
+	nvme_dev_disable(dev, true);
 }
 
 static void nvme_remove(struct pci_dev *pdev)
@@ -2105,7 +2103,7 @@ static void nvme_remove(struct pci_dev *pdev)
 	flush_work(&dev->scan_work);
 	nvme_remove_namespaces(&dev->ctrl);
 	nvme_uninit_ctrl(&dev->ctrl);
-	nvme_dev_shutdown(dev);
+	nvme_dev_disable(dev, true);
 	nvme_dev_remove_admin(dev);
 	nvme_free_queues(dev, 0);
 	nvme_release_cmb(dev);
@@ -2119,7 +2117,7 @@ static int nvme_suspend(struct device *dev)
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct nvme_dev *ndev = pci_get_drvdata(pdev);
 
-	nvme_dev_shutdown(ndev);
+	nvme_dev_disable(ndev, true);
 	return 0;
 }
 
@@ -2150,7 +2148,7 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
 	case pci_channel_io_normal:
 		return PCI_ERS_RESULT_CAN_RECOVER;
 	case pci_channel_io_frozen:
-		nvme_dev_shutdown(dev);
+		nvme_dev_disable(dev, false);
 		return PCI_ERS_RESULT_NEED_RESET;
 	case pci_channel_io_perm_failure:
 		return PCI_ERS_RESULT_DISCONNECT;
-- 
2.6.2.307.g37023ba

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 0/2] NVMe updates
  2016-01-12 21:41 [PATCH 0/2] NVMe updates Keith Busch
  2016-01-12 21:41 ` [PATCH 1/2] NVMe: IO queue deletion re-write Keith Busch
  2016-01-12 21:41 ` [PATCH 2/2] NVMe: Shutdown controller only for power-off Keith Busch
@ 2016-01-12 21:51 ` Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2016-01-12 21:51 UTC (permalink / raw)


On 01/12/2016 02:41 PM, Keith Busch wrote:
> These are the last two with the folded in suggestions from reviewers.
>
> The first 3 patches in the series are still here, and some of this based
> on updates from Christoph and Sagi.
>
>    https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.infradead.org_pipermail_linux-2Dnvme_2016-2DJanuary_003561.html&d=CwIBAg&c=5VD0RTtNlTh3ycd41b3MUw&r=cK1a7KivzZRh1fKQMjSm2A&m=_Xz21o_ofD4kgXGfyjlGhGvB8shFPbW2kl9wRETts8U&s=cTT2bQ0JfsS7YZRb36wi33_EBDacCaq4qeNT2bkm1CQ&e=
>
> Keith Busch (2):
>    NVMe: IO queue deletion re-write
>    NVMe: Shutdown controller only for power-off

Added, thanks for respinning/collapsing Keith.


-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-01-12 21:51 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-01-12 21:41 [PATCH 0/2] NVMe updates Keith Busch
2016-01-12 21:41 ` [PATCH 1/2] NVMe: IO queue deletion re-write Keith Busch
2016-01-12 21:41 ` [PATCH 2/2] NVMe: Shutdown controller only for power-off Keith Busch
2016-01-12 21:51 ` [PATCH 0/2] NVMe updates Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).