* [PATCH 1/4] acpi: add find error record in BERT function
[not found] ` <152236282506.35558.2067249639136170490.stgit-Cxk7aZI4ujnJARH06PadV2t3HXsI98Cx0E9HWUfgJXw@public.gmane.org>
@ 2018-03-29 22:37 ` Dave Jiang
[not found] ` <152236302712.35558.17322719540329044966.stgit-Cxk7aZI4ujnJARH06PadV2t3HXsI98Cx0E9HWUfgJXw@public.gmane.org>
2018-03-29 22:37 ` [PATCH 2/4] acpi/libnvdimm: search through BERT records and add to nvdimm badblocks Dave Jiang
` (3 subsequent siblings)
4 siblings, 1 reply; 11+ messages in thread
From: Dave Jiang @ 2018-03-29 22:37 UTC (permalink / raw)
To: dan.j.williams-ral2JQCrhuEAvxtiuMwx3w
Cc: tony.luck-ral2JQCrhuEAvxtiuMwx3w,
linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw, rjw-LthD3rsA81gm4RdzfppkhA,
linux-acpi-u79uwXL29TY76Z2rM5mHXA, Ying Huang,
lenb-DgEjT+Ai2ygdnm+yROfE0A
Adding helper function for searching through BERT records and matching
memory based errors that matches in the given resource range given. A
callback function is passed in from the caller to process the matched
memory records. This is in preparation for adding bad memory ranges
fir nvdimm from the BERT.
Signed-off-by: Dave Jiang <dave.jiang-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Cc: Ying Huang <ying.huang-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
drivers/acpi/apei/bert.c | 137 ++++++++++++++++++++++++++++++++++++++++++----
include/linux/acpi.h | 10 +++
2 files changed, 134 insertions(+), 13 deletions(-)
diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
index 12771fcf0417..9569c15bd616 100644
--- a/drivers/acpi/apei/bert.c
+++ b/drivers/acpi/apei/bert.c
@@ -26,6 +26,7 @@
#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/io.h>
+#include <acpi/ghes.h>
#include "apei-internal.h"
@@ -34,33 +35,36 @@
static int bert_disable;
-static void __init bert_print_all(struct acpi_bert_region *region,
- unsigned int region_len)
+static int bert_process_region(struct acpi_bert_region *region,
+ unsigned int region_len,
+ int (*process)(struct acpi_hest_generic_status *estatus,
+ void *data), void *data)
{
struct acpi_hest_generic_status *estatus =
(struct acpi_hest_generic_status *)region;
int remain = region_len;
u32 estatus_len;
+ int rc;
if (!estatus->block_status)
- return;
+ return -ENXIO;
while (remain > sizeof(struct acpi_bert_region)) {
if (cper_estatus_check(estatus)) {
pr_err(FW_BUG "Invalid error record.\n");
- return;
+ return -ENXIO;
}
estatus_len = cper_estatus_len(estatus);
if (remain < estatus_len) {
- pr_err(FW_BUG "Truncated status block (length: %u).\n",
- estatus_len);
- return;
+ pr_err(FW_BUG "Truncated status block (len: %u).\n",
+ estatus_len);
+ return -ENXIO;
}
- pr_info_once("Error records from previous boot:\n");
-
- cper_estatus_print(KERN_INFO HW_ERR, estatus);
+ rc = process(estatus, data);
+ if (rc < 0)
+ return rc;
/*
* Because the boot error source is "one-time polled" type,
@@ -72,10 +76,22 @@ static void __init bert_print_all(struct acpi_bert_region *region,
estatus = (void *)estatus + estatus_len;
/* No more error records. */
if (!estatus->block_status)
- return;
+ return -ENXIO;
remain -= estatus_len;
}
+
+ return 0;
+}
+
+static int __init bert_print(struct acpi_hest_generic_status *estatus,
+ void *data)
+{
+ pr_info_once("Error records from previous boot:\n");
+
+ cper_estatus_print(KERN_INFO HW_ERR, estatus);
+
+ return 0;
}
static int __init setup_bert_disable(char *str)
@@ -86,7 +102,7 @@ static int __init setup_bert_disable(char *str)
}
__setup("bert_disable", setup_bert_disable);
-static int __init bert_check_table(struct acpi_table_bert *bert_tab)
+static int bert_check_table(struct acpi_table_bert *bert_tab)
{
if (bert_tab->header.length < sizeof(struct acpi_table_bert) ||
bert_tab->region_length < sizeof(struct acpi_bert_region))
@@ -138,7 +154,8 @@ static int __init bert_init(void)
goto out_fini;
boot_error_region = ioremap_cache(bert_tab->address, region_len);
if (boot_error_region) {
- bert_print_all(boot_error_region, region_len);
+ bert_process_region(boot_error_region, region_len,
+ bert_print, NULL);
iounmap(boot_error_region);
} else {
rc = -ENOMEM;
@@ -152,3 +169,97 @@ static int __init bert_init(void)
}
late_initcall(bert_init);
+
+struct mem_err_cb_ctx
+{
+ void (*cb)(void *data, u64 addr, u64 len);
+ void *data;
+ u64 addr;
+ u64 len;
+};
+
+static int bert_process_mem_err(struct acpi_hest_generic_status *estatus,
+ void *data)
+{
+ struct mem_err_cb_ctx *ctx = data;
+ u16 severity;
+ u64 end = ctx->addr + ctx->len - 1;
+ struct acpi_hest_generic_data *gdata;
+ int found = 0;
+
+ severity = estatus->error_severity;
+ if (severity != CPER_SEV_CORRECTED) {
+ apei_estatus_for_each_section(estatus, gdata) {
+ guid_t *sec_type =
+ (guid_t *)gdata->section_type;
+ struct cper_sec_mem_err *mem_err =
+ acpi_hest_get_payload(gdata);
+
+ if (!guid_equal(sec_type,
+ &CPER_SEC_PLATFORM_MEM))
+ continue;
+
+ if (!(mem_err->validation_bits &
+ CPER_MEM_VALID_PA))
+ continue;
+
+ if (ctx->addr > mem_err->physical_addr ||
+ end < mem_err->physical_addr)
+ continue;
+
+ ctx->cb(ctx->data, mem_err->physical_addr,
+ L1_CACHE_BYTES);
+ found++;
+ }
+ }
+
+ return found;
+}
+
+int bert_find_mem_error_record(void (*cb)(void *data, u64 addr, u64 len),
+ void *data, u64 addr, u64 len)
+{
+ acpi_status status;
+ int rc;
+ unsigned int region_len;
+ struct acpi_bert_region *bert_region;
+ struct acpi_table_bert *bert_tab;
+ struct mem_err_cb_ctx ctx = {
+ .cb = cb,
+ .data = data,
+ .addr = addr,
+ .len = len,
+ };
+
+ if (acpi_disabled)
+ return 0;
+
+ status = acpi_get_table(ACPI_SIG_BERT, 0,
+ (struct acpi_table_header **)&bert_tab);
+ if (status == AE_NOT_FOUND)
+ return 0;
+
+ if (ACPI_FAILURE(status))
+ return -EINVAL;
+
+ rc = bert_check_table(bert_tab);
+ if (rc)
+ return rc;
+
+ region_len = bert_tab->region_length;
+ bert_region = acpi_os_map_memory(bert_tab->address, region_len);
+ if (!bert_region) {
+ rc = -ENOMEM;
+ goto put_table;
+ }
+
+ rc = bert_process_region(bert_region, region_len,
+ bert_process_mem_err, &ctx);
+
+ acpi_os_unmap_memory(bert_region, region_len);
+put_table:
+ acpi_put_table((struct acpi_table_header *)bert_tab);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(bert_find_mem_error_record);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 968173ec2726..57ed7b39f386 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1146,6 +1146,10 @@ int __acpi_probe_device_table(struct acpi_probe_entry *start, int nr);
(&ACPI_PROBE_TABLE_END(t) - \
&ACPI_PROBE_TABLE(t))); \
})
+
+int bert_find_mem_error_record(
+ void (*cb)(void *data, u64 addr, u64 len),
+ void *data, u64 addr, u64 len);
#else
static inline int acpi_dev_get_property(struct acpi_device *adev,
const char *name, acpi_object_type type,
@@ -1247,6 +1251,12 @@ acpi_graph_get_remote_endpoint(const struct fwnode_handle *fwnode,
(void *) data }
#define acpi_probe_device_table(t) ({ int __r = 0; __r;})
+int bert_find_mem_error_record(
+ void (*cb)(void *data, u64 addr, u64 len),
+ void *data, u64 addr, u64 len)
+{
+ return -EOPNOTSUPP;
+}
#endif
#ifdef CONFIG_ACPI_TABLE_UPGRADE
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 2/4] acpi/libnvdimm: search through BERT records and add to nvdimm badblocks
[not found] ` <152236282506.35558.2067249639136170490.stgit-Cxk7aZI4ujnJARH06PadV2t3HXsI98Cx0E9HWUfgJXw@public.gmane.org>
2018-03-29 22:37 ` [PATCH 1/4] acpi: add find error record in BERT function Dave Jiang
@ 2018-03-29 22:37 ` Dave Jiang
2018-03-29 22:37 ` [PATCH 3/4] acpi/nfit: removing ARS timeout and change scrubbing to delayed work Dave Jiang
` (2 subsequent siblings)
4 siblings, 0 replies; 11+ messages in thread
From: Dave Jiang @ 2018-03-29 22:37 UTC (permalink / raw)
To: dan.j.williams-ral2JQCrhuEAvxtiuMwx3w
Cc: linux-acpi-u79uwXL29TY76Z2rM5mHXA,
tony.luck-ral2JQCrhuEAvxtiuMwx3w, rjw-LthD3rsA81gm4RdzfppkhA,
lenb-DgEjT+Ai2ygdnm+yROfE0A, linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw
To avoid hitting bad address in nvdimm from previous boot, we will search
through the BERT records to find the matching address and add them to the
nvdimm_bus badblocks.
Signed-off-by: Dave Jiang <dave.jiang-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
drivers/acpi/nfit/core.c | 22 ++++++++++++++++++++++
drivers/nvdimm/core.c | 6 ++++++
include/linux/libnvdimm.h | 1 +
3 files changed, 29 insertions(+)
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index bbe48ad20886..3e3b95298a21 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2639,6 +2639,15 @@ static bool nfit_spa_is_volatile(struct acpi_nfit_system_address *spa)
nfit_spa_type(spa) == NFIT_SPA_VOLATILE);
}
+static void acpi_nfit_bert_callback(void *data, u64 addr, u64 len)
+{
+ struct nd_region *nd_region = (struct nd_region *)data;
+ struct nvdimm_bus *nvdimm_bus = nvdimm_region_to_bus(nd_region);
+
+ nvdimm_bus_add_badrange(nvdimm_bus, ALIGN(addr, L1_CACHE_BYTES), len);
+ nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON);
+}
+
static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
struct nfit_spa *nfit_spa)
{
@@ -2735,6 +2744,19 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
if (rc)
dev_err(acpi_desc->dev, "failed to register spa range %d\n",
nfit_spa->spa->range_index);
+
+ /* If we have a region, we can check the BERT */
+ if (nfit_spa->nd_region) {
+ int count = bert_find_mem_error_record(
+ acpi_nfit_bert_callback,
+ (void *)nfit_spa->nd_region, spa->address,
+ spa->length);
+
+ if (count > 0)
+ dev_dbg(acpi_desc->dev, "%d BERT records added\n",
+ count);
+ }
+
return rc;
}
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 1dc527660637..7dbdb80e72be 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -27,6 +27,12 @@
LIST_HEAD(nvdimm_bus_list);
DEFINE_MUTEX(nvdimm_bus_list_mutex);
+struct nvdimm_bus *nvdimm_region_to_bus(struct nd_region *nd_region)
+{
+ return walk_to_nvdimm_bus(&nd_region->dev);
+}
+EXPORT_SYMBOL(nvdimm_region_to_bus);
+
void nvdimm_bus_lock(struct device *dev)
{
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index ff855ed965fb..0c3cb02e5706 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -200,6 +200,7 @@ u64 nd_fletcher64(void *addr, size_t len, bool le);
void nvdimm_flush(struct nd_region *nd_region);
int nvdimm_has_flush(struct nd_region *nd_region);
int nvdimm_has_cache(struct nd_region *nd_region);
+struct nvdimm_bus *nvdimm_region_to_bus(struct nd_region *nd_region);
#ifdef CONFIG_ARCH_HAS_PMEM_API
#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 3/4] acpi/nfit: removing ARS timeout and change scrubbing to delayed work
[not found] ` <152236282506.35558.2067249639136170490.stgit-Cxk7aZI4ujnJARH06PadV2t3HXsI98Cx0E9HWUfgJXw@public.gmane.org>
2018-03-29 22:37 ` [PATCH 1/4] acpi: add find error record in BERT function Dave Jiang
2018-03-29 22:37 ` [PATCH 2/4] acpi/libnvdimm: search through BERT records and add to nvdimm badblocks Dave Jiang
@ 2018-03-29 22:37 ` Dave Jiang
2018-03-29 22:37 ` [PATCH 4/4] acpi/nfit: allow knob to disable ARS being issued at kernel boot Dave Jiang
2018-03-30 15:04 ` [PATCH 0/4] Adding support to parse BERT for libnvdimm Kani, Toshi
4 siblings, 0 replies; 11+ messages in thread
From: Dave Jiang @ 2018-03-29 22:37 UTC (permalink / raw)
To: dan.j.williams-ral2JQCrhuEAvxtiuMwx3w
Cc: linux-acpi-u79uwXL29TY76Z2rM5mHXA,
tony.luck-ral2JQCrhuEAvxtiuMwx3w, rjw-LthD3rsA81gm4RdzfppkhA,
lenb-DgEjT+Ai2ygdnm+yROfE0A, linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw
With the introduction of BERT parsing, we have added the poison regions to
badblocks and no longer need to wait until scrubbing ARS to complete
for the bad areas before we surface the regions. Changing acpi_nfit_scrub()
to delayed work. Instead of keep polling with a timeout we will just
schedule to try again at a later time. The timeout will be doubled every
time we hit busy.
Signed-off-by: Dave Jiang <dave.jiang-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
drivers/acpi/nfit/core.c | 233 ++++++++++++++++++----------------------------
drivers/acpi/nfit/nfit.h | 13 ++-
2 files changed, 101 insertions(+), 145 deletions(-)
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 3e3b95298a21..668d040bf108 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -35,10 +35,6 @@ static bool force_enable_dimms;
module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
-static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
-module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
-
/* after three payloads of overflow, it's dead jim */
static unsigned int scrub_overflow_abort = 3;
module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
@@ -1251,7 +1247,8 @@ static ssize_t scrub_show(struct device *dev,
struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
- (work_busy(&acpi_desc->work)) ? "+\n" : "\n");
+ (work_busy(&acpi_desc->dwork.work)) ?
+ "+\n" : "\n");
}
device_unlock(dev);
return rc;
@@ -2819,86 +2816,6 @@ static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc,
return 0;
}
-static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
- struct nfit_spa *nfit_spa)
-{
- struct acpi_nfit_system_address *spa = nfit_spa->spa;
- unsigned int overflow_retry = scrub_overflow_abort;
- u64 init_ars_start = 0, init_ars_len = 0;
- struct device *dev = acpi_desc->dev;
- unsigned int tmo = scrub_timeout;
- int rc;
-
- if (!nfit_spa->ars_required || !nfit_spa->nd_region)
- return;
-
- rc = ars_start(acpi_desc, nfit_spa);
- /*
- * If we timed out the initial scan we'll still be busy here,
- * and will wait another timeout before giving up permanently.
- */
- if (rc < 0 && rc != -EBUSY)
- return;
-
- do {
- u64 ars_start, ars_len;
-
- if (acpi_desc->cancel)
- break;
- rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
- if (rc == -ENOTTY)
- break;
- if (rc == -EBUSY && !tmo) {
- dev_warn(dev, "range %d ars timeout, aborting\n",
- spa->range_index);
- break;
- }
-
- if (rc == -EBUSY) {
- /*
- * Note, entries may be appended to the list
- * while the lock is dropped, but the workqueue
- * being active prevents entries being deleted /
- * freed.
- */
- mutex_unlock(&acpi_desc->init_mutex);
- ssleep(1);
- tmo--;
- mutex_lock(&acpi_desc->init_mutex);
- continue;
- }
-
- /* we got some results, but there are more pending... */
- if (rc == -ENOSPC && overflow_retry--) {
- if (!init_ars_len) {
- init_ars_len = acpi_desc->ars_status->length;
- init_ars_start = acpi_desc->ars_status->address;
- }
- rc = ars_continue(acpi_desc);
- }
-
- if (rc < 0) {
- dev_warn(dev, "range %d ars continuation failed\n",
- spa->range_index);
- break;
- }
-
- if (init_ars_len) {
- ars_start = init_ars_start;
- ars_len = init_ars_len;
- } else {
- ars_start = acpi_desc->ars_status->address;
- ars_len = acpi_desc->ars_status->length;
- }
- dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n",
- spa->range_index, ars_start, ars_len);
- /* notify the region about new poison entries */
- nvdimm_region_notify(nfit_spa->nd_region,
- NVDIMM_REVALIDATE_POISON);
- break;
- } while (1);
-}
-
static void acpi_nfit_scrub(struct work_struct *work)
{
struct device *dev;
@@ -2907,37 +2824,60 @@ static void acpi_nfit_scrub(struct work_struct *work)
u64 init_scrub_address = 0;
bool init_ars_done = false;
struct acpi_nfit_desc *acpi_desc;
- unsigned int tmo = scrub_timeout;
unsigned int overflow_retry = scrub_overflow_abort;
+ int ars_needed = 0;
- acpi_desc = container_of(work, typeof(*acpi_desc), work);
+ acpi_desc = container_of(work, typeof(*acpi_desc), dwork.work);
dev = acpi_desc->dev;
/*
- * We scrub in 2 phases. The first phase waits for any platform
- * firmware initiated scrubs to complete and then we go search for the
- * affected spa regions to mark them scanned. In the second phase we
- * initiate a directed scrub for every range that was not scrubbed in
- * phase 1. If we're called for a 'rescan', we harmlessly pass through
- * the first phase, but really only care about running phase 2, where
- * regions can be notified of new poison.
+ * We can register all regions right away at init since BERT will
+ * prevent us from hitting the problem areas.
*/
+ list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+ if (!nfit_spa->nd_region)
+ acpi_nfit_register_region(acpi_desc, nfit_spa);
+ }
- /* process platform firmware initiated scrubs */
retry:
mutex_lock(&acpi_desc->init_mutex);
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
struct nd_cmd_ars_status *ars_status;
- struct acpi_nfit_system_address *spa;
- u64 ars_start, ars_len;
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
+ u64 astart, ars_len;
int rc;
if (acpi_desc->cancel)
- break;
+ goto out;
- if (nfit_spa->nd_region)
+ if (nfit_spa->ars_state == NFIT_ARS_STATE_COMPLETE)
+ continue;
+
+ if (nfit_spa->ars_state == NFIT_ARS_STATE_UNSUPPORTED)
continue;
+ if (nfit_spa->ars_state == NFIT_ARS_STATE_REQUESTED) {
+ dev_dbg(dev, "range %d starting ARS\n",
+ spa->range_index);
+ rc = ars_start(acpi_desc, nfit_spa);
+ if (rc == -EBUSY) {
+ queue_delayed_work(nfit_wq, &acpi_desc->dwork,
+ acpi_desc->scrub_timeout * HZ);
+ /*
+ * Increase timeout for next time around.
+ * We'll max it at 30mins.
+ */
+ acpi_desc->scrub_timeout =
+ min_t(unsigned int,
+ acpi_desc->scrub_timeout * 2,
+ 1800);
+ goto out;
+ }
+ if (rc < 0)
+ goto out;
+ nfit_spa->ars_state = NFIT_ARS_STATE_IN_PROGRESS;
+ }
+
if (init_ars_done) {
/*
* No need to re-query, we're now just
@@ -2951,22 +2891,26 @@ static void acpi_nfit_scrub(struct work_struct *work)
if (rc == -ENOTTY) {
/* no ars capability, just register spa and move on */
acpi_nfit_register_region(acpi_desc, nfit_spa);
+ nfit_spa->ars_state = NFIT_ARS_STATE_UNSUPPORTED;
continue;
}
- if (rc == -EBUSY && !tmo) {
- /* fallthrough to directed scrub in phase 2 */
- dev_warn(dev, "timeout awaiting ars results, continuing...\n");
- break;
- } else if (rc == -EBUSY) {
- mutex_unlock(&acpi_desc->init_mutex);
- ssleep(1);
- tmo--;
- goto retry;
+ if (rc == -EBUSY) {
+ nfit_spa->ars_state = NFIT_ARS_STATE_IN_PROGRESS;
+ queue_delayed_work(nfit_wq, &acpi_desc->dwork,
+ acpi_desc->scrub_timeout * HZ);
+ /*
+ * Increase timeout for next time around. We'll max
+ * it at 30mins
+ */
+ acpi_desc->scrub_timeout = min_t(unsigned int,
+ acpi_desc->scrub_timeout * 2, 1800);
+ goto out;
}
/* we got some results, but there are more pending... */
if (rc == -ENOSPC && overflow_retry--) {
+ nfit_spa->ars_state = NFIT_ARS_STATE_IN_PROGRESS;
ars_status = acpi_desc->ars_status;
/*
* Record the original scrub range, so that we
@@ -2985,57 +2929,55 @@ static void acpi_nfit_scrub(struct work_struct *work)
}
if (rc < 0) {
- /*
- * Initial scrub failed, we'll give it one more
- * try below...
- */
- break;
+ nfit_spa->ars_state = NFIT_ARS_STATE_IDLE;
+ dev_warn(dev, "range %d ars continuation failed\n",
+ spa->range_index);
+ goto out;
}
/* We got some final results, record completed ranges */
ars_status = acpi_desc->ars_status;
if (init_scrub_length) {
- ars_start = init_scrub_address;
- ars_len = ars_start + init_scrub_length;
+ astart = init_scrub_address;
+ ars_len = astart + init_scrub_length;
} else {
- ars_start = ars_status->address;
+ astart = ars_status->address;
ars_len = ars_status->length;
}
- spa = nfit_spa->spa;
if (!init_ars_done) {
init_ars_done = true;
- dev_dbg(dev, "init scrub %#llx + %#llx complete\n",
- ars_start, ars_len);
+ dev_dbg(dev, "Scrub %#llx + %#llx complete\n",
+ astart, ars_len);
}
- if (ars_start <= spa->address && ars_start + ars_len
- >= spa->address + spa->length)
- acpi_nfit_register_region(acpi_desc, nfit_spa);
+ nfit_spa->ars_state = NFIT_ARS_STATE_COMPLETE;
+ acpi_desc->scrub_timeout = 1;
+ if (nfit_spa->nd_region)
+ nvdimm_region_notify(nfit_spa->nd_region,
+ NVDIMM_REVALIDATE_POISON);
}
- /*
- * For all the ranges not covered by an initial scrub we still
- * want to see if there are errors, but it's ok to discover them
- * asynchronously.
- */
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
- /*
- * Flag all the ranges that still need scrubbing, but
- * register them now to make data available.
- */
- if (!nfit_spa->nd_region) {
- nfit_spa->ars_required = 1;
- acpi_nfit_register_region(acpi_desc, nfit_spa);
+ if (nfit_spa->ars_state == NFIT_ARS_STATE_IDLE) {
+ dev_dbg(dev, "range %d set for ARS\n",
+ nfit_spa->spa->range_index);
+ nfit_spa->ars_state = NFIT_ARS_STATE_REQUESTED;
+ ars_needed++;
}
}
- acpi_desc->init_complete = 1;
- list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
- acpi_nfit_async_scrub(acpi_desc, nfit_spa);
+ if (ars_needed) {
+ queue_delayed_work(nfit_wq, &acpi_desc->dwork,
+ acpi_desc->scrub_timeout * HZ);
+ goto out;
+ }
+
+ acpi_desc->init_complete = 1;
acpi_desc->scrub_count++;
acpi_desc->ars_start_flags = 0;
if (acpi_desc->scrub_count_state)
sysfs_notify_dirent(acpi_desc->scrub_count_state);
+out:
mutex_unlock(&acpi_desc->init_mutex);
}
@@ -3054,7 +2996,7 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
acpi_desc->ars_start_flags = 0;
if (!acpi_desc->cancel)
- queue_work(nfit_wq, &acpi_desc->work);
+ queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
return 0;
}
@@ -3251,7 +3193,7 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
* just needs guarantees that any ars it initiates are not
* interrupted by any intervening start reqeusts from userspace.
*/
- if (work_busy(&acpi_desc->work))
+ if (work_busy(&acpi_desc->dwork.work))
return -EBUSY;
return 0;
@@ -3262,7 +3204,7 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags)
struct device *dev = acpi_desc->dev;
struct nfit_spa *nfit_spa;
- if (work_busy(&acpi_desc->work))
+ if (work_busy(&acpi_desc->dwork.work))
return -EBUSY;
mutex_lock(&acpi_desc->init_mutex);
@@ -3277,10 +3219,13 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags)
if (nfit_spa_type(spa) != NFIT_SPA_PM)
continue;
- nfit_spa->ars_required = 1;
+ if (nfit_spa->ars_state == NFIT_ARS_STATE_UNSUPPORTED)
+ continue;
+
+ nfit_spa->ars_state = NFIT_ARS_STATE_REQUESTED;
}
acpi_desc->ars_start_flags = flags;
- queue_work(nfit_wq, &acpi_desc->work);
+ queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
dev_dbg(dev, "%s: ars_scan triggered\n", __func__);
mutex_unlock(&acpi_desc->init_mutex);
@@ -3311,7 +3256,8 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
INIT_LIST_HEAD(&acpi_desc->dimms);
INIT_LIST_HEAD(&acpi_desc->list);
mutex_init(&acpi_desc->init_mutex);
- INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
+ acpi_desc->scrub_timeout = 1;
+ INIT_DELAYED_WORK(&acpi_desc->dwork, acpi_nfit_scrub);
}
EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
@@ -3335,6 +3281,7 @@ void acpi_nfit_shutdown(void *data)
mutex_lock(&acpi_desc->init_mutex);
acpi_desc->cancel = 1;
+ cancel_delayed_work_sync(&acpi_desc->dwork);
mutex_unlock(&acpi_desc->init_mutex);
/*
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 50d36e166d70..e1dcbbdc5adb 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -117,10 +117,18 @@ enum nfit_dimm_notifiers {
NFIT_NOTIFY_DIMM_HEALTH = 0x81,
};
+enum nfit_ars_state {
+ NFIT_ARS_STATE_IDLE = 0,
+ NFIT_ARS_STATE_REQUESTED,
+ NFIT_ARS_STATE_IN_PROGRESS,
+ NFIT_ARS_STATE_COMPLETE,
+ NFIT_ARS_STATE_UNSUPPORTED,
+};
+
struct nfit_spa {
struct list_head list;
struct nd_region *nd_region;
- unsigned int ars_required:1;
+ enum nfit_ars_state ars_state;
u32 clear_err_unit;
u32 max_ars;
struct acpi_nfit_system_address spa[0];
@@ -192,7 +200,7 @@ struct acpi_nfit_desc {
u8 ars_start_flags;
struct nd_cmd_ars_status *ars_status;
size_t ars_status_size;
- struct work_struct work;
+ struct delayed_work dwork;
struct list_head list;
struct kernfs_node *scrub_count_state;
unsigned int scrub_count;
@@ -203,6 +211,7 @@ struct acpi_nfit_desc {
unsigned long bus_cmd_force_en;
unsigned long bus_nfit_cmd_force_en;
unsigned int platform_cap;
+ unsigned int scrub_timeout;
int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
void *iobuf, u64 len, int rw);
};
^ permalink raw reply related [flat|nested] 11+ messages in thread