* [PATCHv3 0/5] NVMe: Power management patch set
@ 2013-06-24 20:30 Keith Busch
2013-06-24 20:30 ` [PATCHv3 1/5] NVMe: Group pci related actions in functions Keith Busch
` (4 more replies)
0 siblings, 5 replies; 6+ messages in thread
From: Keith Busch @ 2013-06-24 20:30 UTC (permalink / raw)
This adds power management to the NVMe devices.
I ran this on some real devices and found some errors from the previous
patch that could occur if the resume fails, but not many changes from
the previous set.
v2->v3:
Hand merge to latest head.
Additional checks when releasing the pci resources in case it happens
twice. This could occur if the module is unloaded after the device
failed to initialize on resume.
If the resume fails, free all the queues and check for a null queue
before issuing IO so that IO can not be sent to the block device if the
controller failed to re-initialize. I tried deleting the gendisks too,
but resume never completed when an error was forced with that code. Not
entirely sure why that happened.
Additional error checking on ioremap.
Use SIMPLE_DEV_PM_OPS instead of UNIVERSAL_DEV_PM_OPS. I don't think we
want to use these for runtime suspend/resume on an NVMe device.
Keith Busch (5):
NVMe: Group pci related actions in functions
NVMe: Separate queue alloc/free from create/delete
NVMe: Separate controller init from disk discovery
NVMe: Use normal shutdown
NVMe: Add pci suspend/resume driver callbacks
drivers/block/nvme-core.c | 408 +++++++++++++++++++++++++++++++--------------
include/linux/nvme.h | 2 +
2 files changed, 287 insertions(+), 123 deletions(-)
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCHv3 1/5] NVMe: Group pci related actions in functions
2013-06-24 20:30 [PATCHv3 0/5] NVMe: Power management patch set Keith Busch
@ 2013-06-24 20:30 ` Keith Busch
2013-06-24 20:30 ` [PATCHv3 2/5] NVMe: Separate queue alloc/free from create/delete Keith Busch
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Keith Busch @ 2013-06-24 20:30 UTC (permalink / raw)
This will make it easier to reuse these outside probe/remove.
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
drivers/block/nvme-core.c | 116 +++++++++++++++++++++++++++------------------
1 files changed, 70 insertions(+), 46 deletions(-)
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 07d527c..3751888 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1192,9 +1192,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
u64 cap = readq(&dev->bar->cap);
struct nvme_queue *nvmeq;
- dev->dbs = ((void __iomem *)dev->bar) + 4096;
- dev->db_stride = NVME_CAP_STRIDE(cap);
-
result = nvme_disable_ctrl(dev, cap);
if (result < 0)
return result;
@@ -1686,6 +1683,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
if (db_bar_size > 8192) {
iounmap(dev->bar);
dev->bar = ioremap(pci_resource_start(pdev, 0), db_bar_size);
+ if (!dev->bar)
+ return -ENOMEM;
dev->dbs = ((void __iomem *)dev->bar) + 4096;
dev->queues[0]->q_db = dev->dbs;
}
@@ -1830,6 +1829,63 @@ static int nvme_dev_add(struct nvme_dev *dev)
return res;
}
+static int nvme_dev_map(struct nvme_dev *dev)
+{
+ int bars, result = -ENOMEM;
+ struct pci_dev *pdev = dev->pci_dev;
+
+ if (pci_enable_device_mem(pdev))
+ return result;
+
+ dev->entry[0].vector = pdev->irq;
+ pci_set_master(pdev);
+ bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ if (pci_request_selected_regions(pdev, bars, "nvme"))
+ goto disable_pci;
+
+ if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)))
+ dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
+ else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)))
+ dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
+ else
+ goto disable_pci;
+
+ dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
+ pci_set_drvdata(pdev, dev);
+
+ dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
+ if (!dev->bar)
+ goto disable;
+
+ dev->db_stride = NVME_CAP_STRIDE(readq(&dev->bar->cap));
+ dev->dbs = ((void __iomem *)dev->bar) + 4096;
+
+ return 0;
+
+ disable:
+ pci_release_regions(pdev);
+ disable_pci:
+ pci_disable_device(pdev);
+ return result;
+}
+
+static void nvme_dev_unmap(struct nvme_dev *dev)
+{
+ if (dev->pci_dev->msi_enabled)
+ pci_disable_msi(dev->pci_dev);
+ else if (dev->pci_dev->msix_enabled)
+ pci_disable_msix(dev->pci_dev);
+
+ if (dev->bar) {
+ iounmap(dev->bar);
+ dev->bar = 0;
+ }
+
+ pci_release_regions(dev->pci_dev);
+ if (pci_is_enabled(dev->pci_dev))
+ pci_disable_device(dev->pci_dev);
+}
+
static int nvme_dev_remove(struct nvme_dev *dev)
{
struct nvme_ns *ns, *next;
@@ -1906,15 +1962,9 @@ static void nvme_free_dev(struct kref *kref)
{
struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
nvme_dev_remove(dev);
- if (dev->pci_dev->msi_enabled)
- pci_disable_msi(dev->pci_dev);
- else if (dev->pci_dev->msix_enabled)
- pci_disable_msix(dev->pci_dev);
- iounmap(dev->bar);
+ nvme_dev_unmap(dev);
nvme_release_instance(dev);
nvme_release_prp_pools(dev);
- pci_disable_device(dev->pci_dev);
- pci_release_regions(dev->pci_dev);
kfree(dev->queues);
kfree(dev->entry);
kfree(dev);
@@ -1957,7 +2007,7 @@ static const struct file_operations nvme_dev_fops = {
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
- int bars, result = -ENOMEM;
+ int result = -ENOMEM;
struct nvme_dev *dev;
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
@@ -1972,39 +2022,19 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (!dev->queues)
goto free;
- if (pci_enable_device_mem(pdev))
- goto free;
- pci_set_master(pdev);
- bars = pci_select_bars(pdev, IORESOURCE_MEM);
- if (pci_request_selected_regions(pdev, bars, "nvme"))
- goto disable;
-
INIT_LIST_HEAD(&dev->namespaces);
dev->pci_dev = pdev;
- pci_set_drvdata(pdev, dev);
-
- if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)))
- dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
- else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)))
- dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
- else
- goto disable;
-
result = nvme_set_instance(dev);
if (result)
- goto disable;
-
- dev->entry[0].vector = pdev->irq;
+ goto free;
result = nvme_setup_prp_pools(dev);
if (result)
- goto disable_msix;
+ goto release;
- dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
- if (!dev->bar) {
- result = -ENOMEM;
- goto disable_msix;
- }
+ result = nvme_dev_map(dev);
+ if (result)
+ goto release_pools;
result = nvme_configure_admin_queue(dev);
if (result)
@@ -2040,17 +2070,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
nvme_free_queues(dev);
unmap:
- iounmap(dev->bar);
- disable_msix:
- if (dev->pci_dev->msi_enabled)
- pci_disable_msi(dev->pci_dev);
- else if (dev->pci_dev->msix_enabled)
- pci_disable_msix(dev->pci_dev);
- nvme_release_instance(dev);
+ nvme_dev_unmap(dev);
+ release_pools:
nvme_release_prp_pools(dev);
- disable:
- pci_disable_device(pdev);
- pci_release_regions(pdev);
+ release:
+ nvme_release_instance(dev);
free:
kfree(dev->queues);
kfree(dev->entry);
--
1.7.0.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCHv3 2/5] NVMe: Separate queue alloc/free from create/delete
2013-06-24 20:30 [PATCHv3 0/5] NVMe: Power management patch set Keith Busch
2013-06-24 20:30 ` [PATCHv3 1/5] NVMe: Group pci related actions in functions Keith Busch
@ 2013-06-24 20:30 ` Keith Busch
2013-06-24 20:30 ` [PATCHv3 3/5] NVMe: Separate controller init from disk discovery Keith Busch
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Keith Busch @ 2013-06-24 20:30 UTC (permalink / raw)
This separates nvme queue allocation from creation, and queue deletion
from freeing. This is so that we may in the future temporarily disable
queues and reuse the same memory when bringing them back online, like
coming back from suspend state.
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
drivers/block/nvme-core.c | 165 ++++++++++++++++++++++++++++++++-------------
1 files changed, 119 insertions(+), 46 deletions(-)
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 3751888..e16d962 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -81,6 +81,7 @@ struct nvme_queue {
u16 cq_head;
u8 cq_phase;
u8 cqe_seen;
+ bool q_suspended;
unsigned long cmdid_data[];
};
@@ -784,8 +785,14 @@ static void nvme_make_request(struct request_queue *q, struct bio *bio)
struct nvme_queue *nvmeq = get_nvmeq(ns->dev);
int result = -EBUSY;
+ if (!nvmeq) {
+ put_nvmeq(NULL);
+ bio_endio(bio, -EIO);
+ return;
+ }
+
spin_lock_irq(&nvmeq->q_lock);
- if (bio_list_empty(&nvmeq->sq_cong))
+ if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong))
result = nvme_submit_bio_queue(nvmeq, ns, bio);
if (unlikely(result)) {
if (bio_list_empty(&nvmeq->sq_cong))
@@ -1021,6 +1028,13 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
static void nvme_free_queue_mem(struct nvme_queue *nvmeq)
{
+ spin_lock_irq(&nvmeq->q_lock);
+ while (bio_list_peek(&nvmeq->sq_cong)) {
+ struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
+ bio_endio(bio, -EIO);
+ }
+ spin_unlock_irq(&nvmeq->q_lock);
+
dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
@@ -1028,17 +1042,28 @@ static void nvme_free_queue_mem(struct nvme_queue *nvmeq)
kfree(nvmeq);
}
-static void nvme_free_queue(struct nvme_dev *dev, int qid)
+static void nvme_free_queues(struct nvme_dev *dev)
+{
+ int i;
+
+ for (i = dev->queue_count - 1; i >= 0; i--) {
+ nvme_free_queue_mem(dev->queues[i]);
+ dev->queue_count--;
+ dev->queues[i] = NULL;
+ }
+}
+
+static void nvme_disable_queue(struct nvme_dev *dev, int qid)
{
struct nvme_queue *nvmeq = dev->queues[qid];
int vector = dev->entry[nvmeq->cq_vector].vector;
spin_lock_irq(&nvmeq->q_lock);
- nvme_cancel_ios(nvmeq, false);
- while (bio_list_peek(&nvmeq->sq_cong)) {
- struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
- bio_endio(bio, -EIO);
+ if (nvmeq->q_suspended) {
+ spin_unlock_irq(&nvmeq->q_lock);
+ return;
}
+ nvmeq->q_suspended = true;
spin_unlock_irq(&nvmeq->q_lock);
irq_set_affinity_hint(vector, NULL);
@@ -1050,7 +1075,10 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid)
adapter_delete_cq(dev, qid);
}
- nvme_free_queue_mem(nvmeq);
+ spin_lock_irq(&nvmeq->q_lock);
+ nvme_process_cq(nvmeq);
+ nvme_cancel_ios(nvmeq, false);
+ spin_unlock_irq(&nvmeq->q_lock);
}
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
@@ -1085,6 +1113,8 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
nvmeq->q_depth = depth;
nvmeq->cq_vector = vector;
+ nvmeq->q_suspended = true;
+ dev->queue_count++;
return nvmeq;
@@ -1108,40 +1138,50 @@ static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq,
IRQF_DISABLED | IRQF_SHARED, name, nvmeq);
}
-static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid,
- int cq_size, int vector)
+static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
{
- int result;
- struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector);
+ struct nvme_dev *dev = nvmeq->dev;
+ unsigned extra = DIV_ROUND_UP(nvmeq->q_depth, 8) + (nvmeq->q_depth *
+ sizeof(struct nvme_cmd_info));
+ nvmeq->sq_tail = 0;
+ nvmeq->cq_head = 0;
+ nvmeq->cq_phase = 1;
+ nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
+ memset (nvmeq->cmdid_data, 0, extra);
+ memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
+ nvme_cancel_ios(nvmeq, false);
+ nvmeq->q_suspended = false;
+}
- if (!nvmeq)
- return ERR_PTR(-ENOMEM);
+
+static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
+{
+ struct nvme_dev *dev = nvmeq->dev;
+ int result;
result = adapter_alloc_cq(dev, qid, nvmeq);
if (result < 0)
- goto free_nvmeq;
+ return result;
result = adapter_alloc_sq(dev, qid, nvmeq);
if (result < 0)
goto release_cq;
result = queue_request_irq(dev, nvmeq, "nvme");
- if (result < 0)
+ if (result)
goto release_sq;
- return nvmeq;
+ spin_lock(&nvmeq->q_lock);
+ nvme_init_queue(nvmeq, qid);
+ spin_unlock(&nvmeq->q_lock);
+
+ return result;
release_sq:
adapter_delete_sq(dev, qid);
release_cq:
adapter_delete_cq(dev, qid);
- free_nvmeq:
- dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
- (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
- dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
- nvmeq->sq_cmds, nvmeq->sq_dma_addr);
- kfree(nvmeq);
- return ERR_PTR(result);
+ return result;
}
static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
@@ -1196,9 +1236,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
if (result < 0)
return result;
- nvmeq = nvme_alloc_queue(dev, 0, 64, 0);
- if (!nvmeq)
- return -ENOMEM;
+ nvmeq = dev->queues[0];
+ if (!nvmeq) {
+ nvmeq = nvme_alloc_queue(dev, 0, 64, 0);
+ if (!nvmeq)
+ return -ENOMEM;
+ dev->queues[0] = nvmeq;
+ }
aqa = nvmeq->q_depth - 1;
aqa |= aqa << 16;
@@ -1215,17 +1259,16 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
result = nvme_enable_ctrl(dev, cap);
if (result)
- goto free_q;
+ return result;
result = queue_request_irq(dev, nvmeq, "nvme admin");
if (result)
- goto free_q;
+ return result;
- dev->queues[0] = nvmeq;
- return result;
+ spin_lock(&nvmeq->q_lock);
+ nvme_init_queue(nvmeq, 0);
+ spin_unlock(&nvmeq->q_lock);
- free_q:
- nvme_free_queue_mem(nvmeq);
return result;
}
@@ -1386,6 +1429,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
put_nvmeq(nvmeq);
if (length != (io.nblocks + 1) << ns->lba_shift)
status = -ENOMEM;
+ else if (!nvmeq || nvmeq->q_suspended)
+ status = -EBUSY;
else
status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
@@ -1536,9 +1581,12 @@ static int nvme_kthread(void *data)
if (!nvmeq)
continue;
spin_lock_irq(&nvmeq->q_lock);
+ if (nvmeq->q_suspended)
+ goto unlock;
nvme_process_cq(nvmeq);
nvme_cancel_ios(nvmeq, true);
nvme_resubmit_bios(nvmeq);
+ unlock:
spin_unlock_irq(&nvmeq->q_lock);
}
}
@@ -1726,7 +1774,23 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
nr_io_queues = vecs;
result = queue_request_irq(dev, dev->queues[0], "nvme admin");
- /* XXX: handle failure here */
+ if (result)
+ goto free_queues;
+
+ /* Free previously allocated queues that are no longer usable */
+ spin_lock(&dev_list_lock);
+ for (i = dev->queue_count - 1; i > nr_io_queues; i--) {
+ struct nvme_queue *nvmeq = dev->queues[i];
+
+ spin_lock(&nvmeq->q_lock);
+ nvme_cancel_ios(nvmeq, false);
+ spin_unlock(&nvmeq->q_lock);
+
+ nvme_free_queue_mem(nvmeq);
+ dev->queue_count--;
+ dev->queues[i] = NULL;
+ }
+ spin_unlock(&dev_list_lock);
cpu = cpumask_first(cpu_online_mask);
for (i = 0; i < nr_io_queues; i++) {
@@ -1736,11 +1800,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1,
NVME_Q_DEPTH);
- for (i = 0; i < nr_io_queues; i++) {
- dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i);
- if (IS_ERR(dev->queues[i + 1]))
- return PTR_ERR(dev->queues[i + 1]);
- dev->queue_count++;
+ for (i = dev->queue_count - 1; i < nr_io_queues; i++) {
+ dev->queues[i + 1] = nvme_alloc_queue(dev, i + 1, q_depth, i);
+ if (!dev->queues[i + 1]) {
+ result = -ENOMEM;
+ goto free_queues;
+ }
}
for (; i < num_possible_cpus(); i++) {
@@ -1748,15 +1813,20 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
dev->queues[i + 1] = dev->queues[target + 1];
}
- return 0;
-}
+ for (i = 1; i < dev->queue_count; i++) {
+ result = nvme_create_queue(dev->queues[i], i);
+ if (result) {
+ for (--i; i > 0; i--)
+ nvme_disable_queue(dev, i);
+ goto free_queues;
+ }
+ }
-static void nvme_free_queues(struct nvme_dev *dev)
-{
- int i;
+ return 0;
- for (i = dev->queue_count - 1; i >= 0; i--)
- nvme_free_queue(dev, i);
+ free_queues:
+ nvme_free_queues(dev);
+ return result;
}
/*
@@ -1889,6 +1959,10 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
static int nvme_dev_remove(struct nvme_dev *dev)
{
struct nvme_ns *ns, *next;
+ int i;
+
+ for (i = dev->queue_count - 1; i >= 0; i--)
+ nvme_disable_queue(dev, i);
spin_lock(&dev_list_lock);
list_del(&dev->node);
@@ -2039,7 +2113,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
result = nvme_configure_admin_queue(dev);
if (result)
goto unmap;
- dev->queue_count++;
spin_lock(&dev_list_lock);
list_add(&dev->node, &dev_list);
--
1.7.0.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCHv3 3/5] NVMe: Separate controller init from disk discovery
2013-06-24 20:30 [PATCHv3 0/5] NVMe: Power management patch set Keith Busch
2013-06-24 20:30 ` [PATCHv3 1/5] NVMe: Group pci related actions in functions Keith Busch
2013-06-24 20:30 ` [PATCHv3 2/5] NVMe: Separate queue alloc/free from create/delete Keith Busch
@ 2013-06-24 20:30 ` Keith Busch
2013-06-24 20:30 ` [PATCHv3 4/5] NVMe: Use normal shutdown Keith Busch
2013-06-24 20:30 ` [PATCHv3 5/5] NVMe: Add pci suspend/resume driver callbacks Keith Busch
4 siblings, 0 replies; 6+ messages in thread
From: Keith Busch @ 2013-06-24 20:30 UTC (permalink / raw)
This combines the controller initialization into one function, removing
IO queue setup from namespace discovery, and creates symetric functions
for device removal. The controller start and shutdown functions can now
be called from resume/suspend context as well as probe/remove.
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
drivers/block/nvme-core.c | 78 +++++++++++++++++++++++++++-----------------
1 files changed, 48 insertions(+), 30 deletions(-)
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index e16d962..21e7e72 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1845,10 +1845,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
dma_addr_t dma_addr;
int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
- res = nvme_setup_io_queues(dev);
- if (res)
- return res;
-
mem = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr,
GFP_KERNEL);
if (!mem)
@@ -1956,27 +1952,30 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
pci_disable_device(dev->pci_dev);
}
-static int nvme_dev_remove(struct nvme_dev *dev)
+static void nvme_dev_shutdown(struct nvme_dev *dev)
{
- struct nvme_ns *ns, *next;
int i;
for (i = dev->queue_count - 1; i >= 0; i--)
nvme_disable_queue(dev, i);
spin_lock(&dev_list_lock);
- list_del(&dev->node);
+ if (dev->node.next != LIST_POISON1)
+ list_del(&dev->node);
spin_unlock(&dev_list_lock);
+ nvme_dev_unmap(dev);
+}
+
+static void nvme_dev_remove(struct nvme_dev *dev)
+{
+ struct nvme_ns *ns, *next;
+
list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
list_del(&ns->list);
del_gendisk(ns->disk);
nvme_ns_free(ns);
}
-
- nvme_free_queues(dev);
-
- return 0;
}
static int nvme_setup_prp_pools(struct nvme_dev *dev)
@@ -2036,7 +2035,8 @@ static void nvme_free_dev(struct kref *kref)
{
struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
nvme_dev_remove(dev);
- nvme_dev_unmap(dev);
+ nvme_dev_shutdown(dev);
+ nvme_free_queues(dev);
nvme_release_instance(dev);
nvme_release_prp_pools(dev);
kfree(dev->queues);
@@ -2079,6 +2079,37 @@ static const struct file_operations nvme_dev_fops = {
.compat_ioctl = nvme_dev_ioctl,
};
+static int nvme_dev_start(struct nvme_dev *dev)
+{
+ int result;
+
+ result = nvme_dev_map(dev);
+ if (result)
+ return result;
+
+ result = nvme_configure_admin_queue(dev);
+ if (result)
+ goto unmap;
+
+ spin_lock(&dev_list_lock);
+ list_add(&dev->node, &dev_list);
+ spin_unlock(&dev_list_lock);
+
+ result = nvme_setup_io_queues(dev);
+ if (result)
+ goto disable;
+
+ return 0;
+
+ disable:
+ spin_lock(&dev_list_lock);
+ list_del(&dev->node);
+ spin_unlock(&dev_list_lock);
+ unmap:
+ nvme_dev_unmap(dev);
+ return result;
+}
+
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
int result = -ENOMEM;
@@ -2106,21 +2137,13 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (result)
goto release;
- result = nvme_dev_map(dev);
+ result = nvme_dev_start(dev);
if (result)
goto release_pools;
- result = nvme_configure_admin_queue(dev);
- if (result)
- goto unmap;
-
- spin_lock(&dev_list_lock);
- list_add(&dev->node, &dev_list);
- spin_unlock(&dev_list_lock);
-
result = nvme_dev_add(dev);
if (result)
- goto delete;
+ goto shutdown;
scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance);
dev->miscdev.minor = MISC_DYNAMIC_MINOR;
@@ -2136,15 +2159,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
remove:
nvme_dev_remove(dev);
- delete:
- spin_lock(&dev_list_lock);
- list_del(&dev->node);
- spin_unlock(&dev_list_lock);
-
- nvme_free_queues(dev);
- unmap:
- nvme_dev_unmap(dev);
+ shutdown:
+ nvme_dev_shutdown(dev);
release_pools:
+ nvme_free_queues(dev);
nvme_release_prp_pools(dev);
release:
nvme_release_instance(dev);
--
1.7.0.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCHv3 4/5] NVMe: Use normal shutdown
2013-06-24 20:30 [PATCHv3 0/5] NVMe: Power management patch set Keith Busch
` (2 preceding siblings ...)
2013-06-24 20:30 ` [PATCHv3 3/5] NVMe: Separate controller init from disk discovery Keith Busch
@ 2013-06-24 20:30 ` Keith Busch
2013-06-24 20:30 ` [PATCHv3 5/5] NVMe: Add pci suspend/resume driver callbacks Keith Busch
4 siblings, 0 replies; 6+ messages in thread
From: Keith Busch @ 2013-06-24 20:30 UTC (permalink / raw)
The NVMe spec recommends using the shutdown normal sequence when safely
taking the controller offline instead of hitting CC.EN on the next
start-up to reset the controller. The spec recommends a minimum of 1
second for the shutdown complete. This patch waits 2 seconds to be on
the safe side.
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
drivers/block/nvme-core.c | 26 ++++++++++++++++++++++++++
include/linux/nvme.h | 2 ++
2 files changed, 28 insertions(+), 0 deletions(-)
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 21e7e72..f943d49 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1225,6 +1225,30 @@ static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap)
return nvme_wait_ready(dev, cap, true);
}
+static int nvme_shutdown_ctrl(struct nvme_dev *dev)
+{
+ unsigned long timeout;
+ u32 cc;
+
+ cc = (readl(&dev->bar->cc) & ~NVME_CC_SHN_MASK) | NVME_CC_SHN_NORMAL;
+ writel(cc, &dev->bar->cc);
+
+ timeout = 2 * HZ + jiffies;
+ while ((readl(&dev->bar->csts) & NVME_CSTS_SHST_MASK) !=
+ NVME_CSTS_SHST_CMPLT) {
+ msleep(100);
+ if (fatal_signal_pending(current))
+ return -EINTR;
+ if (time_after(jiffies, timeout)) {
+ dev_err(&dev->pci_dev->dev,
+ "Device shutdown incomplete; abort shutdown\n");
+ return -ENODEV;
+ }
+ }
+
+ return 0;
+}
+
static int nvme_configure_admin_queue(struct nvme_dev *dev)
{
int result;
@@ -1964,6 +1988,8 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
list_del(&dev->node);
spin_unlock(&dev_list_lock);
+ if (dev->bar)
+ nvme_shutdown_ctrl(dev);
nvme_dev_unmap(dev);
}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 5d7c079..474b49b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -50,6 +50,7 @@ enum {
NVME_CC_SHN_NONE = 0 << 14,
NVME_CC_SHN_NORMAL = 1 << 14,
NVME_CC_SHN_ABRUPT = 2 << 14,
+ NVME_CC_SHN_MASK = 3 << 14,
NVME_CC_IOSQES = 6 << 16,
NVME_CC_IOCQES = 4 << 20,
NVME_CSTS_RDY = 1 << 0,
@@ -57,6 +58,7 @@ enum {
NVME_CSTS_SHST_NORMAL = 0 << 2,
NVME_CSTS_SHST_OCCUR = 1 << 2,
NVME_CSTS_SHST_CMPLT = 2 << 2,
+ NVME_CSTS_SHST_MASK = 3 << 2,
};
struct nvme_id_power_state {
--
1.7.0.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCHv3 5/5] NVMe: Add pci suspend/resume driver callbacks
2013-06-24 20:30 [PATCHv3 0/5] NVMe: Power management patch set Keith Busch
` (3 preceding siblings ...)
2013-06-24 20:30 ` [PATCHv3 4/5] NVMe: Use normal shutdown Keith Busch
@ 2013-06-24 20:30 ` Keith Busch
4 siblings, 0 replies; 6+ messages in thread
From: Keith Busch @ 2013-06-24 20:30 UTC (permalink / raw)
Used for going in and out of low power states.
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
drivers/block/nvme-core.c | 30 ++++++++++++++++++++++++++----
1 files changed, 26 insertions(+), 4 deletions(-)
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index f943d49..600cbbe 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -2212,8 +2212,29 @@ static void nvme_remove(struct pci_dev *pdev)
#define nvme_link_reset NULL
#define nvme_slot_reset NULL
#define nvme_error_resume NULL
-#define nvme_suspend NULL
-#define nvme_resume NULL
+
+static int nvme_suspend(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct nvme_dev *ndev = pci_get_drvdata(pdev);
+
+ nvme_dev_shutdown(ndev);
+ return 0;
+}
+
+static int nvme_resume(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct nvme_dev *ndev = pci_get_drvdata(pdev);
+ int ret;
+
+ ret = nvme_dev_start(ndev);
+ if (ret)
+ nvme_free_queues(ndev);
+ return ret;
+}
+
+SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
static const struct pci_error_handlers nvme_err_handler = {
.error_detected = nvme_error_detected,
@@ -2237,8 +2258,9 @@ static struct pci_driver nvme_driver = {
.id_table = nvme_id_table,
.probe = nvme_probe,
.remove = nvme_remove,
- .suspend = nvme_suspend,
- .resume = nvme_resume,
+ .driver = {
+ .pm = &nvme_dev_pm_ops,
+ },
.err_handler = &nvme_err_handler,
};
--
1.7.0.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2013-06-24 20:30 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-06-24 20:30 [PATCHv3 0/5] NVMe: Power management patch set Keith Busch
2013-06-24 20:30 ` [PATCHv3 1/5] NVMe: Group pci related actions in functions Keith Busch
2013-06-24 20:30 ` [PATCHv3 2/5] NVMe: Separate queue alloc/free from create/delete Keith Busch
2013-06-24 20:30 ` [PATCHv3 3/5] NVMe: Separate controller init from disk discovery Keith Busch
2013-06-24 20:30 ` [PATCHv3 4/5] NVMe: Use normal shutdown Keith Busch
2013-06-24 20:30 ` [PATCHv3 5/5] NVMe: Add pci suspend/resume driver callbacks Keith Busch
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).