linux-acpi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] misc updates for Address Range Scrub
@ 2016-09-29  0:10 Vishal Verma
       [not found] ` <1475107811-8880-1-git-send-email-vishal.l.verma-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 7+ messages in thread
From: Vishal Verma @ 2016-09-29  0:10 UTC (permalink / raw)
  To: linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw
  Cc: Rafael J. Wysocki, linux-acpi-u79uwXL29TY76Z2rM5mHXA

Patch 1 changes the default behaviour on machine check exceptions to
just adding the error address to badblocks accounting instead of starting
a full ARS. The old behaviour can be enabled via sysfs.

Patch 2 and 3 fix a problem where stale badblocks could show up after an
on-demand ARS or an MCE triggered scrub because when clearing poison, we
didn't clear the internal nvdimm_bus->poison_list.


Vishal Verma (3):
  nfit: don't start a full scrub by default for an MCE
  pmem: reduce kmap_atomic sections to the memcpys only
  libnvdimm: clear the internal poison_list when clearing badblocks

 drivers/acpi/nfit/core.c  | 23 +++++++++++++--
 drivers/acpi/nfit/mce.c   | 24 ++++++++++++----
 drivers/acpi/nfit/nfit.h  |  6 ++++
 drivers/nvdimm/bus.c      |  2 ++
 drivers/nvdimm/core.c     | 73 ++++++++++++++++++++++++++++++++++++++++++++---
 drivers/nvdimm/pmem.c     | 28 ++++++++++++++----
 include/linux/libnvdimm.h |  2 ++
 7 files changed, 141 insertions(+), 17 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] nfit: don't start a full scrub by default for an MCE
       [not found] ` <1475107811-8880-1-git-send-email-vishal.l.verma-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
@ 2016-09-29  0:10   ` Vishal Verma
  2016-09-29  0:53     ` Dan Williams
  2016-09-29  0:10   ` [PATCH 2/3] pmem: reduce kmap_atomic sections to the memcpys only Vishal Verma
  2016-09-29  0:10   ` [PATCH 3/3] libnvdimm: clear the internal poison_list when clearing badblocks Vishal Verma
  2 siblings, 1 reply; 7+ messages in thread
From: Vishal Verma @ 2016-09-29  0:10 UTC (permalink / raw)
  To: linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw
  Cc: Rafael J. Wysocki, linux-acpi-u79uwXL29TY76Z2rM5mHXA

Starting a full Address Range Scrub (ARS) on hitting a memory error
machine check exception may not always be desirable. Provide a way
through sysfs to toggle the behavior between just adding the address
(cache line) where the MCE happened to the poison list and doing a full
scrub. The former (selective insertion of the address) is done
unconditionally.

Cc: linux-acpi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Cc: Linda Knippers <linda.knippers-ZPxbGqLxI0U@public.gmane.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Vishal Verma <vishal.l.verma-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 drivers/acpi/nfit/core.c | 23 ++++++++++++++++++++---
 drivers/acpi/nfit/mce.c  | 24 +++++++++++++++++++-----
 drivers/acpi/nfit/nfit.h |  6 ++++++
 3 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 80cc7c0..ec1069e 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -901,6 +901,14 @@ static ssize_t scrub_show(struct device *dev,
 	return rc;
 }
 
+/*
+ * The 'scrub' attribute can only have following values written to it:
+ * '1': Start an on-demand scrub, and enable a full scrub to happen if a
+ *      machine check exception for a memory error is received.
+ * '2': Switch to the default mode where a machine check will only insert
+ *      the address on which the memory error was received into the poison
+ *      and badblocks lists.
+ */
 static ssize_t scrub_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t size)
 {
@@ -911,15 +919,24 @@ static ssize_t scrub_store(struct device *dev,
 	rc = kstrtol(buf, 0, &val);
 	if (rc)
 		return rc;
-	if (val != 1)
-		return -EINVAL;
 
 	device_lock(dev);
 	nd_desc = dev_get_drvdata(dev);
 	if (nd_desc) {
 		struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 
-		rc = acpi_nfit_ars_rescan(acpi_desc);
+		switch (val) {
+		case MCE_SCRUB_ON:
+			rc = acpi_nfit_ars_rescan(acpi_desc);
+			acpi_desc->scrub_mode = MCE_SCRUB_ON;
+			break;
+		case MCE_SCRUB_OFF:
+			acpi_desc->scrub_mode = MCE_SCRUB_OFF;
+			break;
+		default:
+			rc = -EINVAL;
+			break;
+		}
 	}
 	device_unlock(dev);
 	if (rc)
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index 161f915..05005e2 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -14,6 +14,7 @@
  */
 #include <linux/notifier.h>
 #include <linux/acpi.h>
+#include <linux/nd.h>
 #include <asm/mce.h>
 #include "nfit.h"
 
@@ -62,12 +63,25 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
 		}
 		mutex_unlock(&acpi_desc->init_mutex);
 
-		/*
-		 * We can ignore an -EBUSY here because if an ARS is already
-		 * in progress, just let that be the last authoritative one
-		 */
-		if (found_match)
+		if (!found_match)
+			continue;
+
+		/* If this fails due to an -ENOMEM, there is little we can do */
+		nvdimm_bus_add_poison(acpi_desc->nvdimm_bus,
+				ALIGN(mce->addr, L1_CACHE_BYTES),
+				L1_CACHE_BYTES);
+		nvdimm_region_notify(nfit_spa->nd_region,
+				NVDIMM_REVALIDATE_POISON);
+
+		if (acpi_desc->scrub_mode == MCE_SCRUB_ON) {
+			/*
+			 * We can ignore an -EBUSY here because if an ARS is
+			 * already in progress, just let that be the last
+			 * authoritative one
+			 */
 			acpi_nfit_ars_rescan(acpi_desc);
+		}
+		break;
 	}
 
 	mutex_unlock(&acpi_desc_lock);
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index e894ded..487dc0a 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -152,6 +152,7 @@ struct acpi_nfit_desc {
 	struct list_head list;
 	struct kernfs_node *scrub_count_state;
 	unsigned int scrub_count;
+	unsigned int scrub_mode;
 	unsigned int cancel:1;
 	unsigned long dimm_cmd_force_en;
 	unsigned long bus_cmd_force_en;
@@ -159,6 +160,11 @@ struct acpi_nfit_desc {
 			void *iobuf, u64 len, int rw);
 };
 
+enum scrub_mode {
+	MCE_SCRUB_ON = 1,
+	MCE_SCRUB_OFF = 2,
+};
+
 enum nd_blk_mmio_selector {
 	BDW,
 	DCR,
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] pmem: reduce kmap_atomic sections to the memcpys only
       [not found] ` <1475107811-8880-1-git-send-email-vishal.l.verma-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
  2016-09-29  0:10   ` [PATCH 1/3] nfit: don't start a full scrub by default for an MCE Vishal Verma
@ 2016-09-29  0:10   ` Vishal Verma
  2016-09-29  0:10   ` [PATCH 3/3] libnvdimm: clear the internal poison_list when clearing badblocks Vishal Verma
  2 siblings, 0 replies; 7+ messages in thread
From: Vishal Verma @ 2016-09-29  0:10 UTC (permalink / raw)
  To: linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw
  Cc: Rafael J. Wysocki, linux-acpi-u79uwXL29TY76Z2rM5mHXA

pmem_do_bvec used to kmap_atomic at the begin, and only unmap at the
end. Things like nvdimm_clear_poison may want to do nvdimm subsystem
bookkeeping operations that may involve taking locks or doing memory
allocations, and we can't do that from the atomic context. Reduce the
atomic context to just what needs it - the memcpy to/from pmem.

Cc: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Signed-off-by: Vishal Verma <vishal.l.verma-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 drivers/nvdimm/pmem.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 571a6c7..ca038d8 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -66,13 +66,32 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
 	invalidate_pmem(pmem->virt_addr + offset, len);
 }
 
+static void map_and_copy_to_pmem(void *pmem_addr, struct page *page,
+		unsigned int off, unsigned int len)
+{
+	void *mem = kmap_atomic(page);
+
+	memcpy_to_pmem(pmem_addr, mem + off, len);
+	kunmap_atomic(mem);
+}
+
+static int map_and_copy_from_pmem(struct page *page, unsigned int off,
+		void *pmem_addr, unsigned int len)
+{
+	int rc;
+	void *mem = kmap_atomic(page);
+
+	rc = memcpy_from_pmem(mem + off, pmem_addr, len);
+	kunmap_atomic(mem);
+	return rc;
+}
+
 static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 			unsigned int len, unsigned int off, bool is_write,
 			sector_t sector)
 {
 	int rc = 0;
 	bool bad_pmem = false;
-	void *mem = kmap_atomic(page);
 	phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
 	void *pmem_addr = pmem->virt_addr + pmem_off;
 
@@ -83,7 +102,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 		if (unlikely(bad_pmem))
 			rc = -EIO;
 		else {
-			rc = memcpy_from_pmem(mem + off, pmem_addr, len);
+			rc = map_and_copy_from_pmem(page, off, pmem_addr, len);
 			flush_dcache_page(page);
 		}
 	} else {
@@ -102,14 +121,13 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 		 * after clear poison.
 		 */
 		flush_dcache_page(page);
-		memcpy_to_pmem(pmem_addr, mem + off, len);
+		map_and_copy_to_pmem(pmem_addr, page, off, len);
 		if (unlikely(bad_pmem)) {
 			pmem_clear_poison(pmem, pmem_off, len);
-			memcpy_to_pmem(pmem_addr, mem + off, len);
+			map_and_copy_to_pmem(pmem_addr, page, off, len);
 		}
 	}
 
-	kunmap_atomic(mem);
 	return rc;
 }
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] libnvdimm: clear the internal poison_list when clearing badblocks
       [not found] ` <1475107811-8880-1-git-send-email-vishal.l.verma-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
  2016-09-29  0:10   ` [PATCH 1/3] nfit: don't start a full scrub by default for an MCE Vishal Verma
  2016-09-29  0:10   ` [PATCH 2/3] pmem: reduce kmap_atomic sections to the memcpys only Vishal Verma
@ 2016-09-29  0:10   ` Vishal Verma
  2 siblings, 0 replies; 7+ messages in thread
From: Vishal Verma @ 2016-09-29  0:10 UTC (permalink / raw)
  To: linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw
  Cc: Rafael J. Wysocki, linux-acpi-u79uwXL29TY76Z2rM5mHXA

nvdimm_clear_poison cleared the user-visible badblocks, and sent
commands to the NVDIMM to clear the areas marked as 'poison', but it
neglected to clear the same areas from the internal poison_list which is
used to marshal ARS results before sorting them by namespace. As a
result, once on-demand ARS functionality was added:

37b137f nfit, libnvdimm: allow an ARS scrub to be triggered on demand

A scrub triggered from either sysfs or an MCE was found to be adding
stale entries that had been cleared from gendisk->badblocks, but were
still present in nvdimm_bus->poison_list.

This adds the missing step of clearing poison_list entries when clearing
poison, so that it is in sync with badblocks.

Fixes: 37b137f nfit, libnvdimm: allow an ARS scrub to be triggered on demand
Cc: Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Vishal Verma <vishal.l.verma-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 drivers/nvdimm/bus.c      |  2 ++
 drivers/nvdimm/core.c     | 73 ++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/libnvdimm.h |  2 ++
 3 files changed, 73 insertions(+), 4 deletions(-)

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 935866f..a8b6949 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -217,6 +217,8 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
 		return rc;
 	if (cmd_rc < 0)
 		return cmd_rc;
+
+	nvdimm_clear_from_poison_list(nvdimm_bus, phys, len);
 	return clear_err.cleared;
 }
 EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 715583f..42e40db 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -541,11 +541,12 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region,
 }
 EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
 
-static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
+static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length,
+			gfp_t flags)
 {
 	struct nd_poison *pl;
 
-	pl = kzalloc(sizeof(*pl), GFP_KERNEL);
+	pl = kzalloc(sizeof(*pl), flags);
 	if (!pl)
 		return -ENOMEM;
 
@@ -561,7 +562,7 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
 	struct nd_poison *pl;
 
 	if (list_empty(&nvdimm_bus->poison_list))
-		return add_poison(nvdimm_bus, addr, length);
+		return add_poison(nvdimm_bus, addr, length, GFP_KERNEL);
 
 	/*
 	 * There is a chance this is a duplicate, check for those first.
@@ -581,7 +582,7 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
 	 * as any overlapping ranges will get resolved when the list is consumed
 	 * and converted to badblocks
 	 */
-	return add_poison(nvdimm_bus, addr, length);
+	return add_poison(nvdimm_bus, addr, length, GFP_KERNEL);
 }
 
 int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
@@ -596,6 +597,70 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
 }
 EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
 
+void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus,
+		phys_addr_t start, unsigned int len)
+{
+	struct list_head *poison_list = &nvdimm_bus->poison_list;
+	u64 clr_end = start + len - 1;
+	struct nd_poison *pl, *next;
+
+	nvdimm_bus_lock(&nvdimm_bus->dev);
+	WARN_ON_ONCE(list_empty(poison_list));
+
+	/*
+	 * [start, clr_end] is the poison interval being cleared.
+	 * [pl->start, pl_end] is the poison_list entry we're comparing
+	 * the above interval against. The poison list entry may need
+	 * to be modified (update either start or length), deleted, or
+	 * split into two based on the overlap characteristics
+	 */
+
+	list_for_each_entry_safe(pl, next, poison_list, list) {
+		u64 pl_end = pl->start + pl->length - 1;
+
+		/* Skip intervals with no intersection */
+		if (pl_end < start)
+			continue;
+		if (pl->start >  clr_end)
+			continue;
+		/* Delete completely overlapped poison entries */
+		if ((pl->start >= start) && (pl_end <= clr_end)) {
+			list_del(&pl->list);
+			kfree(pl);
+			continue;
+		}
+		/* Adjust start point of partially cleared entries */
+		if ((start <= pl->start) && (clr_end > pl->start)) {
+			pl->length -= clr_end - pl->start + 1;
+			pl->start = clr_end + 1;
+			continue;
+		}
+		/* Adjust pl->length for partial clearing at the tail end */
+		if ((pl->start < start) && (pl_end <= clr_end)) {
+			/* pl->start remains the same */
+			pl->length = start - pl->start;
+			continue;
+		}
+		/*
+		 * If clearing in the middle of an entry, we split it into
+		 * two by modifying the current entry to represent one half of
+		 * the split, and adding a new entry for the second half.
+		 */
+		if ((pl->start < start) && (pl_end > clr_end)) {
+			u64 new_start = clr_end + 1;
+			u64 new_len = pl_end - new_start + 1;
+
+			/* Add new entry covering the right half */
+			add_poison(nvdimm_bus, new_start, new_len, GFP_NOIO);
+			/* Adjust this entry to cover the left half */
+			pl->length = start - pl->start;
+			continue;
+		}
+	}
+	nvdimm_bus_unlock(&nvdimm_bus->dev);
+}
+EXPORT_SYMBOL_GPL(nvdimm_clear_from_poison_list);
+
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
 {
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index b519e13..bbfce62 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -129,6 +129,8 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
 }
 
 int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
+void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus,
+		phys_addr_t start, unsigned int len);
 struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
 		struct nvdimm_bus_descriptor *nfit_desc);
 void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] nfit: don't start a full scrub by default for an MCE
  2016-09-29  0:10   ` [PATCH 1/3] nfit: don't start a full scrub by default for an MCE Vishal Verma
@ 2016-09-29  0:53     ` Dan Williams
       [not found]       ` <CAPcyv4hsZQYsw=d5tn=yY-tBzMnTT2YP+ZJHd4EtLpxxf-S_ow-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 7+ messages in thread
From: Dan Williams @ 2016-09-29  0:53 UTC (permalink / raw)
  To: Vishal Verma
  Cc: linux-nvdimm@lists.01.org, Ross Zwisler, Linda Knippers,
	Linux ACPI, Rafael J. Wysocki

On Wed, Sep 28, 2016 at 5:10 PM, Vishal Verma <vishal.l.verma@intel.com> wrote:
> Starting a full Address Range Scrub (ARS) on hitting a memory error
> machine check exception may not always be desirable. Provide a way
> through sysfs to toggle the behavior between just adding the address
> (cache line) where the MCE happened to the poison list and doing a full
> scrub. The former (selective insertion of the address) is done
> unconditionally.


>
> Cc: linux-acpi@vger.kernel.org
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Linda Knippers <linda.knippers@hpe.com>
> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
> ---
>  drivers/acpi/nfit/core.c | 23 ++++++++++++++++++++---
>  drivers/acpi/nfit/mce.c  | 24 +++++++++++++++++++-----
>  drivers/acpi/nfit/nfit.h |  6 ++++++
>  3 files changed, 45 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
> index 80cc7c0..ec1069e 100644
> --- a/drivers/acpi/nfit/core.c
> +++ b/drivers/acpi/nfit/core.c
> @@ -901,6 +901,14 @@ static ssize_t scrub_show(struct device *dev,

I think scrub_show() should display the current auto-scrub mode, right?

>         return rc;
>  }
>
> +/*
> + * The 'scrub' attribute can only have following values written to it:
> + * '1': Start an on-demand scrub, and enable a full scrub to happen if a
> + *      machine check exception for a memory error is received.
> + * '2': Switch to the default mode where a machine check will only insert
> + *      the address on which the memory error was received into the poison
> + *      and badblocks lists.
> + */
>  static ssize_t scrub_store(struct device *dev,
>                 struct device_attribute *attr, const char *buf, size_t size)
>  {
> @@ -911,15 +919,24 @@ static ssize_t scrub_store(struct device *dev,
>         rc = kstrtol(buf, 0, &val);
>         if (rc)
>                 return rc;
> -       if (val != 1)
> -               return -EINVAL;
>
>         device_lock(dev);
>         nd_desc = dev_get_drvdata(dev);
>         if (nd_desc) {
>                 struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
>
> -               rc = acpi_nfit_ars_rescan(acpi_desc);
> +               switch (val) {
> +               case MCE_SCRUB_ON:
> +                       rc = acpi_nfit_ars_rescan(acpi_desc);
> +                       acpi_desc->scrub_mode = MCE_SCRUB_ON;
> +                       break;
> +               case MCE_SCRUB_OFF:
> +                       acpi_desc->scrub_mode = MCE_SCRUB_OFF;
> +                       break;

Shouldn't this still kick off a scrub?  It's awkward that the only way
to run a scrub is to put the driver into automatic re-scrub mode.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] nfit: don't start a full scrub by default for an MCE
       [not found]       ` <CAPcyv4hsZQYsw=d5tn=yY-tBzMnTT2YP+ZJHd4EtLpxxf-S_ow-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2016-09-29  2:26         ` Dan Williams
  0 siblings, 0 replies; 7+ messages in thread
From: Dan Williams @ 2016-09-29  2:26 UTC (permalink / raw)
  To: Vishal Verma
  Cc: Linux ACPI, Rafael J. Wysocki,
	linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org

On Wed, Sep 28, 2016 at 5:53 PM, Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> wrote:
> On Wed, Sep 28, 2016 at 5:10 PM, Vishal Verma <vishal.l.verma-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> wrote:
>> Starting a full Address Range Scrub (ARS) on hitting a memory error
>> machine check exception may not always be desirable. Provide a way
>> through sysfs to toggle the behavior between just adding the address
>> (cache line) where the MCE happened to the poison list and doing a full
>> scrub. The former (selective insertion of the address) is done
>> unconditionally.
>
>
>>
>> Cc: linux-acpi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> Cc: Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
>> Cc: Linda Knippers <linda.knippers-ZPxbGqLxI0U@public.gmane.org>
>> Cc: Rafael J. Wysocki <rafael.j.wysocki-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
>> Signed-off-by: Vishal Verma <vishal.l.verma-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
>> ---
>>  drivers/acpi/nfit/core.c | 23 ++++++++++++++++++++---
>>  drivers/acpi/nfit/mce.c  | 24 +++++++++++++++++++-----
>>  drivers/acpi/nfit/nfit.h |  6 ++++++
>>  3 files changed, 45 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
>> index 80cc7c0..ec1069e 100644
>> --- a/drivers/acpi/nfit/core.c
>> +++ b/drivers/acpi/nfit/core.c
>> @@ -901,6 +901,14 @@ static ssize_t scrub_show(struct device *dev,
>
> I think scrub_show() should display the current auto-scrub mode, right?
>
>>         return rc;
>>  }
>>
>> +/*
>> + * The 'scrub' attribute can only have following values written to it:
>> + * '1': Start an on-demand scrub, and enable a full scrub to happen if a
>> + *      machine check exception for a memory error is received.
>> + * '2': Switch to the default mode where a machine check will only insert
>> + *      the address on which the memory error was received into the poison
>> + *      and badblocks lists.
>> + */
>>  static ssize_t scrub_store(struct device *dev,
>>                 struct device_attribute *attr, const char *buf, size_t size)
>>  {
>> @@ -911,15 +919,24 @@ static ssize_t scrub_store(struct device *dev,
>>         rc = kstrtol(buf, 0, &val);
>>         if (rc)
>>                 return rc;
>> -       if (val != 1)
>> -               return -EINVAL;
>>
>>         device_lock(dev);
>>         nd_desc = dev_get_drvdata(dev);
>>         if (nd_desc) {
>>                 struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
>>
>> -               rc = acpi_nfit_ars_rescan(acpi_desc);
>> +               switch (val) {
>> +               case MCE_SCRUB_ON:
>> +                       rc = acpi_nfit_ars_rescan(acpi_desc);
>> +                       acpi_desc->scrub_mode = MCE_SCRUB_ON;
>> +                       break;
>> +               case MCE_SCRUB_OFF:
>> +                       acpi_desc->scrub_mode = MCE_SCRUB_OFF;
>> +                       break;
>
> Shouldn't this still kick off a scrub?  It's awkward that the only way
> to run a scrub is to put the driver into automatic re-scrub mode.

Hmm, although it will probably still be awkward if we make it do a
scrub in this case.  Might be better to have a separate "auto_scrub"
property that sets what to do on latent error detection.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 3/3] libnvdimm: clear the internal poison_list when clearing badblocks
       [not found] ` <1475277571-10152-1-git-send-email-vishal.l.verma-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
@ 2016-09-30 23:19   ` Vishal Verma
  0 siblings, 0 replies; 7+ messages in thread
From: Vishal Verma @ 2016-09-30 23:19 UTC (permalink / raw)
  To: linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw
  Cc: Rafael J. Wysocki, linux-acpi-u79uwXL29TY76Z2rM5mHXA

nvdimm_clear_poison cleared the user-visible badblocks, and sent
commands to the NVDIMM to clear the areas marked as 'poison', but it
neglected to clear the same areas from the internal poison_list which is
used to marshal ARS results before sorting them by namespace. As a
result, once on-demand ARS functionality was added:

37b137f nfit, libnvdimm: allow an ARS scrub to be triggered on demand

A scrub triggered from either sysfs or an MCE was found to be adding
stale entries that had been cleared from gendisk->badblocks, but were
still present in nvdimm_bus->poison_list. Additionally, the stale entries
could be triggered into producing stale disk->badblocks by simply disabling
and re-enabling the namespace or region.

This adds the missing step of clearing poison_list entries when clearing
poison, so that it is always in sync with badblocks.

Fixes: 37b137f nfit, libnvdimm: allow an ARS scrub to be triggered on demand
Cc: Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Vishal Verma <vishal.l.verma-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 drivers/nvdimm/bus.c      |  2 ++
 drivers/nvdimm/core.c     | 73 ++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/libnvdimm.h |  2 ++
 3 files changed, 73 insertions(+), 4 deletions(-)

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 935866f..a8b6949 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -217,6 +217,8 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
 		return rc;
 	if (cmd_rc < 0)
 		return cmd_rc;
+
+	nvdimm_clear_from_poison_list(nvdimm_bus, phys, len);
 	return clear_err.cleared;
 }
 EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 715583f..42e40db 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -541,11 +541,12 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region,
 }
 EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
 
-static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
+static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length,
+			gfp_t flags)
 {
 	struct nd_poison *pl;
 
-	pl = kzalloc(sizeof(*pl), GFP_KERNEL);
+	pl = kzalloc(sizeof(*pl), flags);
 	if (!pl)
 		return -ENOMEM;
 
@@ -561,7 +562,7 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
 	struct nd_poison *pl;
 
 	if (list_empty(&nvdimm_bus->poison_list))
-		return add_poison(nvdimm_bus, addr, length);
+		return add_poison(nvdimm_bus, addr, length, GFP_KERNEL);
 
 	/*
 	 * There is a chance this is a duplicate, check for those first.
@@ -581,7 +582,7 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
 	 * as any overlapping ranges will get resolved when the list is consumed
 	 * and converted to badblocks
 	 */
-	return add_poison(nvdimm_bus, addr, length);
+	return add_poison(nvdimm_bus, addr, length, GFP_KERNEL);
 }
 
 int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
@@ -596,6 +597,70 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
 }
 EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
 
+void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus,
+		phys_addr_t start, unsigned int len)
+{
+	struct list_head *poison_list = &nvdimm_bus->poison_list;
+	u64 clr_end = start + len - 1;
+	struct nd_poison *pl, *next;
+
+	nvdimm_bus_lock(&nvdimm_bus->dev);
+	WARN_ON_ONCE(list_empty(poison_list));
+
+	/*
+	 * [start, clr_end] is the poison interval being cleared.
+	 * [pl->start, pl_end] is the poison_list entry we're comparing
+	 * the above interval against. The poison list entry may need
+	 * to be modified (update either start or length), deleted, or
+	 * split into two based on the overlap characteristics
+	 */
+
+	list_for_each_entry_safe(pl, next, poison_list, list) {
+		u64 pl_end = pl->start + pl->length - 1;
+
+		/* Skip intervals with no intersection */
+		if (pl_end < start)
+			continue;
+		if (pl->start >  clr_end)
+			continue;
+		/* Delete completely overlapped poison entries */
+		if ((pl->start >= start) && (pl_end <= clr_end)) {
+			list_del(&pl->list);
+			kfree(pl);
+			continue;
+		}
+		/* Adjust start point of partially cleared entries */
+		if ((start <= pl->start) && (clr_end > pl->start)) {
+			pl->length -= clr_end - pl->start + 1;
+			pl->start = clr_end + 1;
+			continue;
+		}
+		/* Adjust pl->length for partial clearing at the tail end */
+		if ((pl->start < start) && (pl_end <= clr_end)) {
+			/* pl->start remains the same */
+			pl->length = start - pl->start;
+			continue;
+		}
+		/*
+		 * If clearing in the middle of an entry, we split it into
+		 * two by modifying the current entry to represent one half of
+		 * the split, and adding a new entry for the second half.
+		 */
+		if ((pl->start < start) && (pl_end > clr_end)) {
+			u64 new_start = clr_end + 1;
+			u64 new_len = pl_end - new_start + 1;
+
+			/* Add new entry covering the right half */
+			add_poison(nvdimm_bus, new_start, new_len, GFP_NOIO);
+			/* Adjust this entry to cover the left half */
+			pl->length = start - pl->start;
+			continue;
+		}
+	}
+	nvdimm_bus_unlock(&nvdimm_bus->dev);
+}
+EXPORT_SYMBOL_GPL(nvdimm_clear_from_poison_list);
+
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
 {
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index b519e13..bbfce62 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -129,6 +129,8 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
 }
 
 int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
+void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus,
+		phys_addr_t start, unsigned int len);
 struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
 		struct nvdimm_bus_descriptor *nfit_desc);
 void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-09-30 23:19 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-09-29  0:10 [PATCH 0/3] misc updates for Address Range Scrub Vishal Verma
     [not found] ` <1475107811-8880-1-git-send-email-vishal.l.verma-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2016-09-29  0:10   ` [PATCH 1/3] nfit: don't start a full scrub by default for an MCE Vishal Verma
2016-09-29  0:53     ` Dan Williams
     [not found]       ` <CAPcyv4hsZQYsw=d5tn=yY-tBzMnTT2YP+ZJHd4EtLpxxf-S_ow-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-09-29  2:26         ` Dan Williams
2016-09-29  0:10   ` [PATCH 2/3] pmem: reduce kmap_atomic sections to the memcpys only Vishal Verma
2016-09-29  0:10   ` [PATCH 3/3] libnvdimm: clear the internal poison_list when clearing badblocks Vishal Verma
  -- strict thread matches above, loose matches on Subject: below --
2016-09-30 23:19 [PATCH 0/3] misc updates for Address Range Scrub Vishal Verma
     [not found] ` <1475277571-10152-1-git-send-email-vishal.l.verma-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2016-09-30 23:19   ` [PATCH 3/3] libnvdimm: clear the internal poison_list when clearing badblocks Vishal Verma

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).