All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] NVMe: Automatic namespace rescan
@ 2015-05-14 20:01 Keith Busch
  2015-05-15 14:37 ` Matthew Wilcox
  0 siblings, 1 reply; 5+ messages in thread
From: Keith Busch @ 2015-05-14 20:01 UTC (permalink / raw)


This rescans device namespaces everytime the controller is reset,
or if the controller posts an asynchronous event indicating namespace
attributes have changed. The namespace scanning work will revalidate
existing namespaces for any attribute changes, such as block format or
capacity, as well as look for new namespaces that may be allocated or
attached to this controller.

If namespaces are deleted, this does not go so far as to delete gendisks;
instead, its capacity will be set to 0.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/block/nvme-core.c |   66 +++++++++++++++++++++++++++++++++++++++------
 include/linux/nvme.h      |    1 +
 include/uapi/linux/nvme.h |    5 ++++
 3 files changed, 64 insertions(+), 8 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 77aa061..f17f9af 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -307,9 +307,16 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
 
 	if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
 		++nvmeq->dev->event_limit;
-	if (status == NVME_SC_SUCCESS)
-		dev_warn(nvmeq->q_dmadev,
-			"async event result %08x\n", result);
+	if (status == NVME_SC_SUCCESS) {
+		dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result);
+		if ((result & 0x3) == NVME_AER_TYPE_NOTICE &&
+		    ((result >> 8) & 0xff) == NVME_AER_INFO_NS_CHANGE) {
+			struct nvme_dev *dev = nvmeq->dev;
+			dev_info(nvmeq->q_dmadev,
+				"namespace change event, rescan\n");
+			schedule_work(&dev->scan_work);
+		}
+	}
 }
 
 static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -2162,6 +2169,50 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
 	kfree(ns);
 }
 
+static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid)
+{
+	struct nvme_ns *ns;
+
+	list_for_each_entry(ns, &dev->namespaces, list)
+		if (ns->ns_id == nsid)
+			return ns;
+	return NULL;
+}
+
+static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
+{
+	struct nvme_ns *ns;
+	unsigned i;
+
+	for (i = 1; i <= nn; i++) {
+		ns = nvme_find_ns(dev, i);
+		if (ns)
+			revalidate_disk(ns->disk);
+		else
+			nvme_alloc_ns(dev, i);
+	}
+}
+
+static void nvme_dev_scan(struct work_struct *work)
+{
+	struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
+	unsigned long nn;
+	void *mem;
+	struct nvme_id_ctrl *ctrl;
+	dma_addr_t dma_addr;
+
+	mem = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr, GFP_KERNEL);
+	if (!mem)
+		return;
+	if (nvme_identify(dev, 0, 1, dma_addr))
+		goto free;
+	ctrl = mem;
+	nn = le32_to_cpup(&ctrl->nn);
+	nvme_scan_namespaces(dev, nn);
+ free:
+	dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr);
+}
+
 static void nvme_create_io_queues(struct nvme_dev *dev)
 {
 	unsigned i;
@@ -2283,7 +2334,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
 {
 	struct pci_dev *pdev = dev->pci_dev;
 	int res;
-	unsigned nn, i;
 	struct nvme_id_ctrl *ctrl;
 	void *mem;
 	dma_addr_t dma_addr;
@@ -2301,7 +2351,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	}
 
 	ctrl = mem;
-	nn = le32_to_cpup(&ctrl->nn);
 	dev->oncs = le16_to_cpup(&ctrl->oncs);
 	dev->abort_limit = ctrl->acl + 1;
 	dev->vwc = ctrl->vwc;
@@ -2337,9 +2386,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	if (blk_mq_alloc_tag_set(&dev->tagset))
 		return 0;
 
-	for (i = 1; i <= nn; i++)
-		nvme_alloc_ns(dev, i);
-
+	schedule_work(&dev->scan_work);
 	return 0;
 }
 
@@ -2911,6 +2958,7 @@ static int nvme_dev_resume(struct nvme_dev *dev)
 		spin_unlock(&dev_list_lock);
 	} else {
 		nvme_unfreeze_queues(dev);
+		schedule_work(&dev->scan_work);
 		nvme_set_irq_hints(dev);
 	}
 	return 0;
@@ -2989,6 +3037,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	get_device(dev->device);
 
 	INIT_LIST_HEAD(&dev->node);
+	INIT_WORK(&dev->scan_work, nvme_dev_scan);
 	INIT_WORK(&dev->probe_work, nvme_async_probe);
 	schedule_work(&dev->probe_work);
 	return 0;
@@ -3054,6 +3103,7 @@ static void nvme_remove(struct pci_dev *pdev)
 	spin_unlock(&dev_list_lock);
 
 	pci_set_drvdata(pdev, NULL);
+	flush_work(&dev->scan_work);
 	flush_work(&dev->probe_work);
 	flush_work(&dev->reset_work);
 	nvme_dev_remove(dev);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 8dbd05e..539e5e5 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -92,6 +92,7 @@ struct nvme_dev {
 	work_func_t reset_workfn;
 	struct work_struct reset_work;
 	struct work_struct probe_work;
+	struct work_struct scan_work;
 	char name[12];
 	char serial[20];
 	char model[40];
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index aef9a81..b05d481 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -179,6 +179,11 @@ enum {
 	NVME_SMART_CRIT_VOLATILE_MEMORY	= 1 << 4,
 };
 
+enum {
+	NVME_AER_TYPE_NOTICE	= 2,
+	NVME_AER_INFO_NS_CHANGE	= 0,
+};
+
 struct nvme_lba_range_type {
 	__u8			type;
 	__u8			attributes;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH] NVMe: Automatic namespace rescan
  2015-05-14 20:01 [PATCH] NVMe: Automatic namespace rescan Keith Busch
@ 2015-05-15 14:37 ` Matthew Wilcox
  2015-05-15 15:06   ` Keith Busch
  0 siblings, 1 reply; 5+ messages in thread
From: Matthew Wilcox @ 2015-05-15 14:37 UTC (permalink / raw)


On Thu, May 14, 2015@02:01:47PM -0600, Keith Busch wrote:
> @@ -307,9 +307,16 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
>  
>  	if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
>  		++nvmeq->dev->event_limit;
> -	if (status == NVME_SC_SUCCESS)
> -		dev_warn(nvmeq->q_dmadev,
> -			"async event result %08x\n", result);
> +	if (status == NVME_SC_SUCCESS) {
> +		dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result);
> +		if ((result & 0x3) == NVME_AER_TYPE_NOTICE &&
> +		    ((result >> 8) & 0xff) == NVME_AER_INFO_NS_CHANGE) {
> +			struct nvme_dev *dev = nvmeq->dev;
> +			dev_info(nvmeq->q_dmadev,
> +				"namespace change event, rescan\n");
> +			schedule_work(&dev->scan_work);
> +		}
> +	}
>  }
>  
>  static void abort_completion(struct nvme_queue *nvmeq, void *ctx,

I don't think we want the dev_warn() and the dev_info() for the same event.
How about ...

	if (status != NVME_SC_SUCCESS)
		return;

	switch (result & 0xff07) {
	case NVME_AER_NOTICE_NS_CHANGED:
		dev_info(nvmeq->q_dmadev, "rescanning\n");
		schedule_work(&nvmeq->dev->scan_work);
	default:
		dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result);
	}

with NVME_AER_NOTICE_NS_CHANGED being an enum with value 0x0002

> +static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid)
> +{
> +	struct nvme_ns *ns;
> +
> +	list_for_each_entry(ns, &dev->namespaces, list)
> +		if (ns->ns_id == nsid)
> +			return ns;
> +	return NULL;
> +}

Pondering if it's worth keeping the list sorted so we can break out early
if the namespace isn't in the list?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] NVMe: Automatic namespace rescan
  2015-05-15 14:37 ` Matthew Wilcox
@ 2015-05-15 15:06   ` Keith Busch
  2015-05-15 21:59     ` Keith Busch
  0 siblings, 1 reply; 5+ messages in thread
From: Keith Busch @ 2015-05-15 15:06 UTC (permalink / raw)


On Fri, 15 May 2015, Matthew Wilcox wrote:
> On Thu, May 14, 2015@02:01:47PM -0600, Keith Busch wrote:
>> @@ -307,9 +307,16 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
>
> I don't think we want the dev_warn() and the dev_info() for the same event.
> How about ...
>
> 	if (status != NVME_SC_SUCCESS)
> 		return;
>
> 	switch (result & 0xff07) {
> 	case NVME_AER_NOTICE_NS_CHANGED:
> 		dev_info(nvmeq->q_dmadev, "rescanning\n");
> 		schedule_work(&nvmeq->dev->scan_work);
> 	default:
> 		dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result);
> 	}
>
> with NVME_AER_NOTICE_NS_CHANGED being an enum with value 0x0002

Brilliant!

>> +static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid)
>> +{
>> +	struct nvme_ns *ns;
>> +
>> +	list_for_each_entry(ns, &dev->namespaces, list)
>> +		if (ns->ns_id == nsid)
>> +			return ns;
>> +	return NULL;
>> +}
>
> Pondering if it's worth keeping the list sorted so we can break out early
> if the namespace isn't in the list?

The list is actually already sorted since this doesn't allow gaps in
NSIDs. If a namespace is not attached, the driver creates a 0 capacity
block device for it and appends it to the list, so it's always in
ascending order.

Is this a bad idea? Let's say a controller supports 128 namespaces,
but only NSID 128 is attached, we'd see 127 zero capacity /dev/nvme#n#
block devs.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] NVMe: Automatic namespace rescan
  2015-05-15 15:06   ` Keith Busch
@ 2015-05-15 21:59     ` Keith Busch
  2015-05-17 15:04       ` Brandon Schulz
  0 siblings, 1 reply; 5+ messages in thread
From: Keith Busch @ 2015-05-15 21:59 UTC (permalink / raw)


On Fri, 15 May 2015, Keith Busch wrote:
> On Fri, 15 May 2015, Matthew Wilcox wrote:
>> Pondering if it's worth keeping the list sorted so we can break out early
>> if the namespace isn't in the list?
>
> The list is actually already sorted since this doesn't allow gaps in
> NSIDs. If a namespace is not attached, the driver creates a 0 capacity
> block device for it and appends it to the list, so it's always in
> ascending order.
>
> Is this a bad idea? Let's say a controller supports 128 namespaces,
> but only NSID 128 is attached, we'd see 127 zero capacity /dev/nvme#n#
> block devs.

Decided we don't want a bunch of zero capacity drives if there are gaps
in attached NSIDs. There could be millions after all. But it's not so
simple since the nvmeq's hctx points to hctx from a single namespace's
request_queue, and that's not right. I'll take a stab at decoupling that
next week. In the mean time, this patch is dead.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] NVMe: Automatic namespace rescan
  2015-05-15 21:59     ` Keith Busch
@ 2015-05-17 15:04       ` Brandon Schulz
  0 siblings, 0 replies; 5+ messages in thread
From: Brandon Schulz @ 2015-05-17 15:04 UTC (permalink / raw)


Agree with your decision that we probably don?t want a bunch of zero
capacity drives if there are gaps in the namespace IDs.  I?d like to see a
version of your original patch get in - we have been carrying a patch
against nvme-legacy internally that does a similar rescan-after-reset but
haven?t had the time to prepare a patch against the current tree yet.

If someone deletes a namespace (vs. detaching it) why wouldn?t you want to
remove the gendisk as well?  This question is  based on your comment in
the original patch: "If namespaces are deleted, this does not go so far as
to delete gendisks; instead, its capacity will be set to 0."  I believe
users would expect to see the device representing the namespace go away if
they deleted the namespace.

Brandon

On 5/15/15, 4:59 PM, "Keith Busch" <keith.busch@intel.com> wrote:

>On Fri, 15 May 2015, Keith Busch wrote:
>> On Fri, 15 May 2015, Matthew Wilcox wrote:
>>> Pondering if it's worth keeping the list sorted so we can break out
>>>early
>>> if the namespace isn't in the list?
>>
>> The list is actually already sorted since this doesn't allow gaps in
>> NSIDs. If a namespace is not attached, the driver creates a 0 capacity
>> block device for it and appends it to the list, so it's always in
>> ascending order.
>>
>> Is this a bad idea? Let's say a controller supports 128 namespaces,
>> but only NSID 128 is attached, we'd see 127 zero capacity /dev/nvme#n#
>> block devs.
>
>Decided we don't want a bunch of zero capacity drives if there are gaps
>in attached NSIDs. There could be millions after all. But it's not so
>simple since the nvmeq's hctx points to hctx from a single namespace's
>request_queue, and that's not right. I'll take a stab at decoupling that
>next week. In the mean time, this patch is dead.
>
>_______________________________________________
>Linux-nvme mailing list
>Linux-nvme at lists.infradead.org
>http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-05-17 15:04 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-05-14 20:01 [PATCH] NVMe: Automatic namespace rescan Keith Busch
2015-05-15 14:37 ` Matthew Wilcox
2015-05-15 15:06   ` Keith Busch
2015-05-15 21:59     ` Keith Busch
2015-05-17 15:04       ` Brandon Schulz

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.