roolup of my pending NVMe patches for 4.6

All of lore.kernel.org
 help / color / mirror / Atom feed

* roolup of my pending NVMe patches for 4.6
@ 2016-02-29 14:59 Christoph Hellwig
  2016-02-29 14:59 ` [PATCH 1/4] nvme: use a work item to submit async event requests Christoph Hellwig
                   ` (4 more replies)
  0 siblings, 5 replies; 9+ messages in thread
From: Christoph Hellwig @ 2016-02-29 14:59 UTC (permalink / raw)


All of them had a fair amount of review, but for the last one an ACK
from Keith would be nice.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/4] nvme: use a work item to submit async event requests
  2016-02-29 14:59 roolup of my pending NVMe patches for 4.6 Christoph Hellwig
@ 2016-02-29 14:59 ` Christoph Hellwig
  2016-02-29 14:59 ` [PATCH 2/4] nvme: don't poll the CQ from the kthread Christoph Hellwig
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 9+ messages in thread
From: Christoph Hellwig @ 2016-02-29 14:59 UTC (permalink / raw)


Use a dedicated work item to submit async event requests instead of the
global kthread.  This simplifies the code and reduces the latencies to
resubmit a request once an even notification happened.

Signed-off-by: Christoph Hellwig <hch at lst.de>
Reviewed-by: Keith Busch <keith.busch at intel.com>
Reviewed-by: Sagi Grimberg <sagig at mellanox.com>
Reviewed-by: Johannes Thumshirn <jthumshirn at suse.de>
---
 drivers/nvme/host/pci.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index fec7479..21b0be4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -100,6 +100,7 @@ struct nvme_dev {
 	struct work_struct reset_work;
 	struct work_struct scan_work;
 	struct work_struct remove_work;
+	struct work_struct async_work;
 	struct mutex shutdown_lock;
 	bool subsystem;
 	void __iomem *cmb;
@@ -281,8 +282,11 @@ static void nvme_complete_async_event(struct nvme_dev *dev,
 	u16 status = le16_to_cpu(cqe->status) >> 1;
 	u32 result = le32_to_cpu(cqe->result);
 
-	if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
+	if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) {
 		++dev->ctrl.event_limit;
+		queue_work(nvme_workq, &dev->async_work);
+	}
+
 	if (status != NVME_SC_SUCCESS)
 		return;
 
@@ -816,15 +820,22 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 	return 0;
 }
 
-static void nvme_submit_async_event(struct nvme_dev *dev)
+static void nvme_async_event_work(struct work_struct *work)
 {
+	struct nvme_dev *dev = container_of(work, struct nvme_dev, async_work);
+	struct nvme_queue *nvmeq = dev->queues[0];
 	struct nvme_command c;
 
 	memset(&c, 0, sizeof(c));
 	c.common.opcode = nvme_admin_async_event;
-	c.common.command_id = NVME_AQ_BLKMQ_DEPTH + --dev->ctrl.event_limit;
 
-	__nvme_submit_cmd(dev->queues[0], &c);
+	spin_lock_irq(&nvmeq->q_lock);
+	while (dev->ctrl.event_limit > 0) {
+		c.common.command_id = NVME_AQ_BLKMQ_DEPTH +
+			--dev->ctrl.event_limit;
+		__nvme_submit_cmd(nvmeq, &c);
+	}
+	spin_unlock_irq(&nvmeq->q_lock);
 }
 
 static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
@@ -1358,9 +1369,6 @@ static int nvme_kthread(void *data)
 					continue;
 				spin_lock_irq(&nvmeq->q_lock);
 				nvme_process_cq(nvmeq);
-
-				while (i == 0 && dev->ctrl.event_limit > 0)
-					nvme_submit_async_event(dev);
 				spin_unlock_irq(&nvmeq->q_lock);
 			}
 		}
@@ -1929,6 +1937,7 @@ static void nvme_reset_work(struct work_struct *work)
 		goto free_tags;
 
 	dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS;
+	queue_work(nvme_workq, &dev->async_work);
 
 	result = nvme_dev_list_add(dev);
 	if (result)
@@ -2062,6 +2071,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	INIT_WORK(&dev->scan_work, nvme_dev_scan);
 	INIT_WORK(&dev->reset_work, nvme_reset_work);
 	INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
+	INIT_WORK(&dev->async_work, nvme_async_event_work);
 	mutex_init(&dev->shutdown_lock);
 	init_completion(&dev->ioq_wait);
 
@@ -2115,6 +2125,7 @@ static void nvme_remove(struct pci_dev *pdev)
 	spin_unlock(&dev_list_lock);
 
 	pci_set_drvdata(pdev, NULL);
+	flush_work(&dev->async_work);
 	flush_work(&dev->reset_work);
 	flush_work(&dev->scan_work);
 	nvme_remove_namespaces(&dev->ctrl);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/4] nvme: don't poll the CQ from the kthread
  2016-02-29 14:59 roolup of my pending NVMe patches for 4.6 Christoph Hellwig
  2016-02-29 14:59 ` [PATCH 1/4] nvme: use a work item to submit async event requests Christoph Hellwig
@ 2016-02-29 14:59 ` Christoph Hellwig
  2016-02-29 20:56   ` Matthew Wilcox
  2016-02-29 14:59 ` [PATCH 3/4] nvme: replace the kthread with a per-device watchdog timer Christoph Hellwig
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 9+ messages in thread
From: Christoph Hellwig @ 2016-02-29 14:59 UTC (permalink / raw)


There is no reason to do unconditional polling of CQs per the NVMe
spec.

Signed-off-by: Christoph Hellwig <hch at lst.de>
Reviewed-by: Keith Busch <keith.busch at intel.com>
Reviewed-by: Sagi Grimberg <sagig at mellanox.com>
Reviewed-by: Johannes Thumshirn <jthumshirn at suse.de>
---
 drivers/nvme/host/pci.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 21b0be4..10839f7 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1156,9 +1156,6 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	nvmeq->qid = qid;
 	nvmeq->cq_vector = -1;
 	dev->queues[qid] = nvmeq;
-
-	/* make sure queue descriptor is set before queue count, for kthread */
-	mb();
 	dev->queue_count++;
 
 	return nvmeq;
@@ -1345,7 +1342,6 @@ static int nvme_kthread(void *data)
 		set_current_state(TASK_INTERRUPTIBLE);
 		spin_lock(&dev_list_lock);
 		list_for_each_entry_safe(dev, next, &dev_list, node) {
-			int i;
 			u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
 			/*
@@ -1363,14 +1359,6 @@ static int nvme_kthread(void *data)
 				}
 				continue;
 			}
-			for (i = 0; i < dev->queue_count; i++) {
-				struct nvme_queue *nvmeq = dev->queues[i];
-				if (!nvmeq)
-					continue;
-				spin_lock_irq(&nvmeq->q_lock);
-				nvme_process_cq(nvmeq);
-				spin_unlock_irq(&nvmeq->q_lock);
-			}
 		}
 		spin_unlock(&dev_list_lock);
 		schedule_timeout(round_jiffies_relative(HZ));
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/4] nvme: replace the kthread with a per-device watchdog timer
  2016-02-29 14:59 roolup of my pending NVMe patches for 4.6 Christoph Hellwig
  2016-02-29 14:59 ` [PATCH 1/4] nvme: use a work item to submit async event requests Christoph Hellwig
  2016-02-29 14:59 ` [PATCH 2/4] nvme: don't poll the CQ from the kthread Christoph Hellwig
@ 2016-02-29 14:59 ` Christoph Hellwig
  2016-02-29 14:59 ` [PATCH 4/4] nvme: return the whole CQE through the request passthrough interface Christoph Hellwig
  2016-02-29 15:47 ` roolup of my pending NVMe patches for 4.6 Jens Axboe
  4 siblings, 0 replies; 9+ messages in thread
From: Christoph Hellwig @ 2016-02-29 14:59 UTC (permalink / raw)


The only work left in the kthread is the periodic health check for each
controller.  There is no need to run this from process context or keep
a thread context around for it, so replace it with a simpler timer.

Signed-off-by: Christoph Hellwig <hch at lst.de>
Reviewed-by: Keith Busch <keith.busch at intel.com>
Reviewed-by: Sagi Grimberg <sagig at mellanox.com>
Reviewed-by: Johannes Thumshirn <jthumshirn at suse.de>
---
 drivers/nvme/host/pci.c | 112 ++++++++++--------------------------------------
 1 file changed, 23 insertions(+), 89 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 10839f7..a623360 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -27,7 +27,6 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kdev_t.h>
-#include <linux/kthread.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -39,6 +38,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/t10-pi.h>
+#include <linux/timer.h>
 #include <linux/types.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <asm/unaligned.h>
@@ -64,11 +64,7 @@ static bool use_cmb_sqes = true;
 module_param(use_cmb_sqes, bool, 0644);
 MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
 
-static LIST_HEAD(dev_list);
-static DEFINE_SPINLOCK(dev_list_lock);
-static struct task_struct *nvme_thread;
 static struct workqueue_struct *nvme_workq;
-static wait_queue_head_t nvme_kthread_wait;
 
 struct nvme_dev;
 struct nvme_queue;
@@ -82,7 +78,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
  * Represents an NVM Express device.  Each nvme_dev is a PCI function.
  */
 struct nvme_dev {
-	struct list_head node;
 	struct nvme_queue **queues;
 	struct blk_mq_tag_set tagset;
 	struct blk_mq_tag_set admin_tagset;
@@ -101,6 +96,7 @@ struct nvme_dev {
 	struct work_struct scan_work;
 	struct work_struct remove_work;
 	struct work_struct async_work;
+	struct timer_list watchdog_timer;
 	struct mutex shutdown_lock;
 	bool subsystem;
 	void __iomem *cmb;
@@ -1334,36 +1330,26 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	return result;
 }
 
-static int nvme_kthread(void *data)
+static void nvme_watchdog_timer(unsigned long data)
 {
-	struct nvme_dev *dev, *next;
+	struct nvme_dev *dev = (struct nvme_dev *)data;
+	u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
-	while (!kthread_should_stop()) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		spin_lock(&dev_list_lock);
-		list_for_each_entry_safe(dev, next, &dev_list, node) {
-			u32 csts = readl(dev->bar + NVME_REG_CSTS);
-
-			/*
-			 * Skip controllers currently under reset.
-			 */
-			if (work_pending(&dev->reset_work) || work_busy(&dev->reset_work))
-				continue;
-
-			if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
-							csts & NVME_CSTS_CFS) {
-				if (queue_work(nvme_workq, &dev->reset_work)) {
-					dev_warn(dev->ctrl.device,
-						"Failed status: %x, reset controller\n",
-						readl(dev->bar + NVME_REG_CSTS));
-				}
-				continue;
-			}
+	/*
+	 * Skip controllers currently under reset.
+	 */
+	if (!work_pending(&dev->reset_work) && !work_busy(&dev->reset_work) &&
+	    ((csts & NVME_CSTS_CFS) ||
+	     (dev->subsystem && (csts & NVME_CSTS_NSSRO)))) {
+		if (queue_work(nvme_workq, &dev->reset_work)) {
+			dev_warn(dev->dev,
+				"Failed status: 0x%x, reset controller.\n",
+				csts);
 		}
-		spin_unlock(&dev_list_lock);
-		schedule_timeout(round_jiffies_relative(HZ));
+		return;
 	}
-	return 0;
+
+	mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
 }
 
 static int nvme_create_io_queues(struct nvme_dev *dev)
@@ -1777,56 +1763,12 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
 	}
 }
 
-static int nvme_dev_list_add(struct nvme_dev *dev)
-{
-	bool start_thread = false;
-
-	spin_lock(&dev_list_lock);
-	if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) {
-		start_thread = true;
-		nvme_thread = NULL;
-	}
-	list_add(&dev->node, &dev_list);
-	spin_unlock(&dev_list_lock);
-
-	if (start_thread) {
-		nvme_thread = kthread_run(nvme_kthread, NULL, "nvme");
-		wake_up_all(&nvme_kthread_wait);
-	} else
-		wait_event_killable(nvme_kthread_wait, nvme_thread);
-
-	if (IS_ERR_OR_NULL(nvme_thread))
-		return nvme_thread ? PTR_ERR(nvme_thread) : -EINTR;
-
-	return 0;
-}
-
-/*
-* Remove the node from the device list and check
-* for whether or not we need to stop the nvme_thread.
-*/
-static void nvme_dev_list_remove(struct nvme_dev *dev)
-{
-	struct task_struct *tmp = NULL;
-
-	spin_lock(&dev_list_lock);
-	list_del_init(&dev->node);
-	if (list_empty(&dev_list) && !IS_ERR_OR_NULL(nvme_thread)) {
-		tmp = nvme_thread;
-		nvme_thread = NULL;
-	}
-	spin_unlock(&dev_list_lock);
-
-	if (tmp)
-		kthread_stop(tmp);
-}
-
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
 	int i;
 	u32 csts = -1;
 
-	nvme_dev_list_remove(dev);
+	del_timer_sync(&dev->watchdog_timer);
 
 	mutex_lock(&dev->shutdown_lock);
 	if (dev->bar) {
@@ -1927,9 +1869,7 @@ static void nvme_reset_work(struct work_struct *work)
 	dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS;
 	queue_work(nvme_workq, &dev->async_work);
 
-	result = nvme_dev_list_add(dev);
-	if (result)
-		goto remove;
+	mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
 
 	/*
 	 * Keep the controller around but remove all namespaces if we don't have
@@ -1946,8 +1886,6 @@ static void nvme_reset_work(struct work_struct *work)
 	clear_bit(NVME_CTRL_RESETTING, &dev->flags);
 	return;
 
- remove:
-	nvme_dev_list_remove(dev);
  free_tags:
 	nvme_dev_remove_admin(dev);
 	blk_put_queue(dev->ctrl.admin_q);
@@ -2055,11 +1993,12 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	dev->dev = get_device(&pdev->dev);
 	pci_set_drvdata(pdev, dev);
 
-	INIT_LIST_HEAD(&dev->node);
 	INIT_WORK(&dev->scan_work, nvme_dev_scan);
 	INIT_WORK(&dev->reset_work, nvme_reset_work);
 	INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
 	INIT_WORK(&dev->async_work, nvme_async_event_work);
+	setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
+		(unsigned long)dev);
 	mutex_init(&dev->shutdown_lock);
 	init_completion(&dev->ioq_wait);
 
@@ -2108,9 +2047,7 @@ static void nvme_remove(struct pci_dev *pdev)
 {
 	struct nvme_dev *dev = pci_get_drvdata(pdev);
 
-	spin_lock(&dev_list_lock);
-	list_del_init(&dev->node);
-	spin_unlock(&dev_list_lock);
+	del_timer_sync(&dev->watchdog_timer);
 
 	pci_set_drvdata(pdev, NULL);
 	flush_work(&dev->async_work);
@@ -2223,8 +2160,6 @@ static int __init nvme_init(void)
 {
 	int result;
 
-	init_waitqueue_head(&nvme_kthread_wait);
-
 	nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
 	if (!nvme_workq)
 		return -ENOMEM;
@@ -2239,7 +2174,6 @@ static void __exit nvme_exit(void)
 {
 	pci_unregister_driver(&nvme_driver);
 	destroy_workqueue(nvme_workq);
-	BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
 	_nvme_check_size();
 }
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/4] nvme: return the whole CQE through the request passthrough interface
  2016-02-29 14:59 roolup of my pending NVMe patches for 4.6 Christoph Hellwig
                   ` (2 preceding siblings ...)
  2016-02-29 14:59 ` [PATCH 3/4] nvme: replace the kthread with a per-device watchdog timer Christoph Hellwig
@ 2016-02-29 14:59 ` Christoph Hellwig
  2016-02-29 15:42   ` Keith Busch
  2016-02-29 15:47 ` roolup of my pending NVMe patches for 4.6 Jens Axboe
  4 siblings, 1 reply; 9+ messages in thread
From: Christoph Hellwig @ 2016-02-29 14:59 UTC (permalink / raw)


Both LighNVM and NVMe over Fabrics need to look at more than just the
status and result field.

Signed-off-by: Christoph Hellwig <hch at lst.de>
Reviewed-by: Matias Bj?rling <m at bjorling.me>
Reviewed-by: Jay Freyensee <james.p.freyensee at intel.com>
Reviewed-by: Sagi Grimberg <sagig at mellanox.com>
Signed-off-by: Sagi Grimberg <sagig at mellanox.com>
---
 drivers/nvme/host/core.c | 27 +++++++++++++++++++--------
 drivers/nvme/host/nvme.h |  3 ++-
 drivers/nvme/host/pci.c  | 11 +++--------
 3 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 07b7ec69..66fd3d9 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -132,7 +132,6 @@ struct request *nvme_alloc_request(struct request_queue *q,
 
 	req->cmd = (unsigned char *)cmd;
 	req->cmd_len = sizeof(struct nvme_command);
-	req->special = (void *)0;
 
 	return req;
 }
@@ -143,7 +142,8 @@ EXPORT_SYMBOL_GPL(nvme_alloc_request);
  * if the result is positive, it's an NVM Express status code
  */
 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
-		void *buffer, unsigned bufflen, u32 *result, unsigned timeout)
+		struct nvme_completion *cqe, void *buffer, unsigned bufflen,
+		unsigned timeout)
 {
 	struct request *req;
 	int ret;
@@ -153,6 +153,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		return PTR_ERR(req);
 
 	req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+	req->special = cqe;
 
 	if (buffer && bufflen) {
 		ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
@@ -161,8 +162,6 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 	}
 
 	blk_execute_rq(req->q, NULL, req, 0);
-	if (result)
-		*result = (u32)(uintptr_t)req->special;
 	ret = req->errors;
  out:
 	blk_mq_free_request(req);
@@ -172,7 +171,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void *buffer, unsigned bufflen)
 {
-	return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0);
+	return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0);
 }
 EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
 
@@ -182,6 +181,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 		u32 *result, unsigned timeout)
 {
 	bool write = cmd->common.opcode & 1;
+	struct nvme_completion cqe;
 	struct nvme_ns *ns = q->queuedata;
 	struct gendisk *disk = ns ? ns->disk : NULL;
 	struct request *req;
@@ -194,6 +194,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 		return PTR_ERR(req);
 
 	req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+	req->special = &cqe;
 
 	if (ubuffer && bufflen) {
 		ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
@@ -248,7 +249,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 	blk_execute_rq(req->q, disk, req, 0);
 	ret = req->errors;
 	if (result)
-		*result = (u32)(uintptr_t)req->special;
+		*result = le32_to_cpu(cqe.result);
 	if (meta && !ret && !write) {
 		if (copy_to_user(meta_buffer, meta, meta_len))
 			ret = -EFAULT;
@@ -329,6 +330,8 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
 					dma_addr_t dma_addr, u32 *result)
 {
 	struct nvme_command c;
+	struct nvme_completion cqe;
+	int ret;
 
 	memset(&c, 0, sizeof(c));
 	c.features.opcode = nvme_admin_get_features;
@@ -336,13 +339,18 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
 	c.features.prp1 = cpu_to_le64(dma_addr);
 	c.features.fid = cpu_to_le32(fid);
 
-	return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
+	ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0);
+	if (ret >= 0)
+		*result = le32_to_cpu(cqe.result);
+	return ret;
 }
 
 int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 					dma_addr_t dma_addr, u32 *result)
 {
 	struct nvme_command c;
+	struct nvme_completion cqe;
+	int ret;
 
 	memset(&c, 0, sizeof(c));
 	c.features.opcode = nvme_admin_set_features;
@@ -350,7 +358,10 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 	c.features.fid = cpu_to_le32(fid);
 	c.features.dword11 = cpu_to_le32(dword11);
 
-	return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
+	ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0);
+	if (ret >= 0)
+		*result = le32_to_cpu(cqe.result);
+	return ret;
 }
 
 int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 63ba8a5..2ac7539 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -248,7 +248,8 @@ void nvme_requeue_req(struct request *req);
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void *buf, unsigned bufflen);
 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
-		void *buffer, unsigned bufflen,  u32 *result, unsigned timeout);
+		struct nvme_completion *cqe, void *buffer, unsigned bufflen,
+		unsigned timeout);
 int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void __user *ubuffer, unsigned bufflen, u32 *result,
 		unsigned timeout);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index a623360..d47b087 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -748,10 +748,8 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
 		}
 
 		req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
-		if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
-			u32 result = le32_to_cpu(cqe.result);
-			req->special = (void *)(uintptr_t)result;
-		}
+		if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
+			memcpy(req->special, &cqe, sizeof(cqe));
 		blk_mq_complete_request(req, status >> 1);
 
 	}
@@ -901,13 +899,10 @@ static void abort_endio(struct request *req, int error)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct nvme_queue *nvmeq = iod->nvmeq;
-	u32 result = (u32)(uintptr_t)req->special;
 	u16 status = req->errors;
 
-	dev_warn(nvmeq->dev->ctrl.device,
-		"Abort status:%x result:%x", status, result);
+	dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", status);
 	atomic_inc(&nvmeq->dev->ctrl.abort_limit);
-
 	blk_mq_free_request(req);
 }
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/4] nvme: return the whole CQE through the request passthrough interface
  2016-02-29 14:59 ` [PATCH 4/4] nvme: return the whole CQE through the request passthrough interface Christoph Hellwig
@ 2016-02-29 15:42   ` Keith Busch
  0 siblings, 0 replies; 9+ messages in thread
From: Keith Busch @ 2016-02-29 15:42 UTC (permalink / raw)


On Mon, Feb 29, 2016@03:59:47PM +0100, Christoph Hellwig wrote:
> Both LighNVM and NVMe over Fabrics need to look at more than just the
> status and result field.
> 
> Signed-off-by: Christoph Hellwig <hch at lst.de>
> Reviewed-by: Matias Bj?rling <m at bjorling.me>
> Reviewed-by: Jay Freyensee <james.p.freyensee at intel.com>
> Reviewed-by: Sagi Grimberg <sagig at mellanox.com>
> Signed-off-by: Sagi Grimberg <sagig at mellanox.com>

Looks good,

Reviewed-by: Keith Busch <keith.busch at intel.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* roolup of my pending NVMe patches for 4.6
  2016-02-29 14:59 roolup of my pending NVMe patches for 4.6 Christoph Hellwig
                   ` (3 preceding siblings ...)
  2016-02-29 14:59 ` [PATCH 4/4] nvme: return the whole CQE through the request passthrough interface Christoph Hellwig
@ 2016-02-29 15:47 ` Jens Axboe
  4 siblings, 0 replies; 9+ messages in thread
From: Jens Axboe @ 2016-02-29 15:47 UTC (permalink / raw)


On 02/29/2016 07:59 AM, Christoph Hellwig wrote:
> All of them had a fair amount of review, but for the last one an ACK
> from Keith would be nice.

Queued them up for 4.6, with Keith's ack. Thanks!

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 2/4] nvme: don't poll the CQ from the kthread
  2016-02-29 14:59 ` [PATCH 2/4] nvme: don't poll the CQ from the kthread Christoph Hellwig
@ 2016-02-29 20:56   ` Matthew Wilcox
  2016-03-01  8:49     ` Christoph Hellwig
  0 siblings, 1 reply; 9+ messages in thread
From: Matthew Wilcox @ 2016-02-29 20:56 UTC (permalink / raw)

On Mon, Feb 29, 2016@03:59:45PM +0100, Christoph Hellwig wrote:
> There is no reason to do unconditional polling of CQs per the NVMe
> spec.

Sure, per the spec there isn't.  But in reality, devices ship with
broken interrupt support.  What's your plan for dealing with the bug
reports from users who have misconfigured MSI support or IRQ lines that
are routed to the wrong place?  From my time maintaining SCSI drivers,
this wasn't an uncommon bug report.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 2/4] nvme: don't poll the CQ from the kthread
  2016-02-29 20:56   ` Matthew Wilcox
@ 2016-03-01  8:49     ` Christoph Hellwig
  0 siblings, 0 replies; 9+ messages in thread
From: Christoph Hellwig @ 2016-03-01  8:49 UTC (permalink / raw)


On Mon, Feb 29, 2016@03:56:07PM -0500, Matthew Wilcox wrote:
> On Mon, Feb 29, 2016@03:59:45PM +0100, Christoph Hellwig wrote:
> > There is no reason to do unconditional polling of CQs per the NVMe
> > spec.
> 
> Sure, per the spec there isn't.  But in reality, devices ship with
> broken interrupt support.  What's your plan for dealing with the bug
> reports from users who have misconfigured MSI support or IRQ lines that
> are routed to the wrong place?  From my time maintaining SCSI drivers,
> this wasn't an uncommon bug report.

The same as for all other devices that have drivers without a dedicated
once per second polling loop?  That is: fix the irq routing if possible
and find a specific workaround if really needed and required by
a blacklist instead of papering over it just in case.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-03-01  8:49 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-02-29 14:59 roolup of my pending NVMe patches for 4.6 Christoph Hellwig
2016-02-29 14:59 ` [PATCH 1/4] nvme: use a work item to submit async event requests Christoph Hellwig
2016-02-29 14:59 ` [PATCH 2/4] nvme: don't poll the CQ from the kthread Christoph Hellwig
2016-02-29 20:56   ` Matthew Wilcox
2016-03-01  8:49     ` Christoph Hellwig
2016-02-29 14:59 ` [PATCH 3/4] nvme: replace the kthread with a per-device watchdog timer Christoph Hellwig
2016-02-29 14:59 ` [PATCH 4/4] nvme: return the whole CQE through the request passthrough interface Christoph Hellwig
2016-02-29 15:42   ` Keith Busch
2016-02-29 15:47 ` roolup of my pending NVMe patches for 4.6 Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.