[PATCH 15/17] ACPI, APEI, GHES, Error records content based throttle

public inbox for linux-acpi@vger.kernel.org
 help / color / mirror / Atom feed

From: Huang Ying <ying.huang@intel.com>
To: Len Brown <lenb@kernel.org>
Cc: linux-kernel@vger.kernel.org, Andi Kleen <andi@firstfloor.org>,
	Tony Luck <tony.luck@intel.com>,
	ying.huang@intel.com, linux-acpi@vger.kernel.org
Subject: [PATCH 15/17] ACPI, APEI, GHES, Error records content based throttle
Date: Wed, 13 Jul 2011 13:14:26 +0800	[thread overview]
Message-ID: <1310534068-30547-16-git-send-email-ying.huang@intel.com> (raw)
In-Reply-To: <1310534068-30547-1-git-send-email-ying.huang@intel.com>

printk is used by GHES to report hardware errors.  Ratelimit is
enforced on the printk to avoid too many hardware error reports in
kernel log.  Because there may be thousands or even millions of
corrected hardware errors during system running.

Currently, a simple scheme is used.  That is, the total number of
hardware error reporting is ratelimited.  This may cause some issues
in practice.

For example, there are two kinds of hardware errors occurred in
system.  One is corrected memory error, because the fault memory
address is accessed frequently, there may be hundreds error report
per-second.  The other is corrected PCIe AER error, it will be
reported once per-second.  Because they share one ratelimit control
structure, it is highly possible that only memory error is reported.

To avoid the above issue, an error record content based throttle
algorithm is implemented in the patch.  Where after the first
successful reporting, all error records that are same are throttled for
some time, to let other kinds of error records have the opportunity to
be reported.

In above example, the memory errors will be throttled for some time,
after being printked.  Then the PCIe AER error will be printked
successfully.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 drivers/acpi/apei/ghes.c |  184 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 177 insertions(+), 7 deletions(-)

--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -60,6 +60,21 @@
 
 #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
 
+/* This is just an estimation for memory pool allocation */
+#define GHES_ESTATUS_CACHE_AVG_SIZE	512
+
+#define GHES_ESTATUS_CACHES_SIZE	4
+
+#define GHES_ESTATUS_IN_CACHE_MAX_NSEC	(10 * NSEC_PER_SEC)
+/* Prevent too many caches are allocated because of RCU */
+#define GHES_ESTATUS_CACHE_ALLOCED_MAX	(GHES_ESTATUS_CACHES_SIZE * 3 / 2)
+
+#define GHES_ESTATUS_CACHE_LEN(estatus_len)			\
+	(sizeof(struct ghes_estatus_cache) + (estatus_len))
+#define GHES_ESTATUS_FROM_CACHE(estatus_cache)			\
+	((struct acpi_hest_generic_status *)			\
+	 ((struct ghes_estatus_cache *)(estatus_cache) + 1))
+
 #define GHES_ESTATUS_NODE_LEN(estatus_len)			\
 	(sizeof(struct ghes_estatus_node) + (estatus_len))
 #define GHES_ESTATUS_FROM_NODE(estatus_node)				\
@@ -94,6 +109,14 @@ struct ghes_estatus_node {
 	struct acpi_hest_generic *generic;
 };
 
+struct ghes_estatus_cache {
+	u32 estatus_len;
+	atomic_t count;
+	struct acpi_hest_generic *generic;
+	unsigned long long time_in;
+	struct rcu_head rcu;
+};
+
 int ghes_disable;
 module_param_named(disable, ghes_disable, bool, 0);
 
@@ -154,6 +177,9 @@ static unsigned long ghes_estatus_pool_s
 static struct llist_head ghes_estatus_llist;
 static struct irq_work ghes_proc_irq_work;
 
+struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
+static atomic_t ghes_estatus_cache_alloced;
+
 static int ghes_ioremap_init(void)
 {
 	ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
@@ -458,9 +484,9 @@ static void __ghes_print_estatus(const c
 	apei_estatus_print(pfx, estatus);
 }
 
-static void ghes_print_estatus(const char *pfx,
-			       const struct acpi_hest_generic *generic,
-			       const struct acpi_hest_generic_status *estatus)
+static int ghes_print_estatus(const char *pfx,
+			      const struct acpi_hest_generic *generic,
+			      const struct acpi_hest_generic_status *estatus)
 {
 	/* Not more than 2 messages every 5 seconds */
 	static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
@@ -471,8 +497,137 @@ static void ghes_print_estatus(const cha
 		ratelimit = &ratelimit_corrected;
 	else
 		ratelimit = &ratelimit_uncorrected;
-	if (__ratelimit(ratelimit))
+	if (__ratelimit(ratelimit)) {
 		__ghes_print_estatus(pfx, generic, estatus);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * GHES error status reporting throttle, to report more kinds of
+ * errors, instead of just most frequently occurred errors.
+ */
+static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
+{
+	u32 len;
+	int i, cached = 0;
+	unsigned long long now;
+	struct ghes_estatus_cache *cache;
+	struct acpi_hest_generic_status *cache_estatus;
+
+	len = apei_estatus_len(estatus);
+	rcu_read_lock();
+	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
+		cache = rcu_dereference(ghes_estatus_caches[i]);
+		if (cache == NULL)
+			continue;
+		if (len != cache->estatus_len)
+			continue;
+		cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
+		if (memcmp(estatus, cache_estatus, len))
+			continue;
+		atomic_inc(&cache->count);
+		now = sched_clock();
+		if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
+			cached = 1;
+		break;
+	}
+	rcu_read_unlock();
+	return cached;
+}
+
+static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
+	struct acpi_hest_generic *generic,
+	struct acpi_hest_generic_status *estatus)
+{
+	int alloced;
+	u32 len, cache_len;
+	struct ghes_estatus_cache *cache;
+	struct acpi_hest_generic_status *cache_estatus;
+
+	alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
+	if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
+		atomic_dec(&ghes_estatus_cache_alloced);
+		return NULL;
+	}
+	len = apei_estatus_len(estatus);
+	cache_len = GHES_ESTATUS_CACHE_LEN(len);
+	cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
+	if (!cache) {
+		atomic_dec(&ghes_estatus_cache_alloced);
+		return NULL;
+	}
+	cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
+	memcpy(cache_estatus, estatus, len);
+	cache->estatus_len = len;
+	atomic_set(&cache->count, 0);
+	cache->generic = generic;
+	cache->time_in = sched_clock();
+	return cache;
+}
+
+static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
+{
+	u32 len;
+
+	len = apei_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
+	len = GHES_ESTATUS_CACHE_LEN(len);
+	gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
+	atomic_dec(&ghes_estatus_cache_alloced);
+}
+
+static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
+{
+	struct ghes_estatus_cache *cache;
+
+	cache = container_of(head, struct ghes_estatus_cache, rcu);
+	ghes_estatus_cache_free(cache);
+}
+
+static void ghes_estatus_cache_add(
+	struct acpi_hest_generic *generic,
+	struct acpi_hest_generic_status *estatus)
+{
+	int i, slot = -1, count;
+	unsigned long long now, duration, period, max_period = 0;
+	struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
+
+	new_cache = ghes_estatus_cache_alloc(generic, estatus);
+	if (new_cache == NULL)
+		return;
+	rcu_read_lock();
+	now = sched_clock();
+	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
+		cache = rcu_dereference(ghes_estatus_caches[i]);
+		if (cache == NULL) {
+			slot = i;
+			slot_cache = NULL;
+			break;
+		}
+		duration = now - cache->time_in;
+		if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
+			slot = i;
+			slot_cache = cache;
+			break;
+		}
+		count = atomic_read(&cache->count);
+		period = duration / (count + 1);
+		if (period > max_period) {
+			max_period = period;
+			slot = i;
+			slot_cache = cache;
+		}
+	}
+	/* new_cache must be put into array after its contents are written */
+	smp_wmb();
+	if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
+				  slot_cache, new_cache) == slot_cache) {
+		if (slot_cache)
+			call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
+	} else
+		ghes_estatus_cache_free(new_cache);
+	rcu_read_unlock();
 }
 
 static int ghes_proc(struct ghes *ghes)
@@ -482,9 +637,11 @@ static int ghes_proc(struct ghes *ghes)
 	rc = ghes_read_estatus(ghes, 0);
 	if (rc)
 		goto out;
-	ghes_print_estatus(NULL, ghes->generic, ghes->estatus);
+	if (!ghes_estatus_cached(ghes->estatus)) {
+		if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
+			ghes_estatus_cache_add(ghes->generic, ghes->estatus);
+	}
 	ghes_do_proc(ghes->estatus);
-
 out:
 	ghes_clear_estatus(ghes);
 	return 0;
@@ -546,6 +703,7 @@ static void ghes_proc_in_irq(struct irq_
 {
 	struct llist_node *llnode, *next, *tail = NULL;
 	struct ghes_estatus_node *estatus_node;
+	struct acpi_hest_generic *generic;
 	struct acpi_hest_generic_status *estatus;
 	u32 len, node_len;
 
@@ -569,7 +727,11 @@ static void ghes_proc_in_irq(struct irq_
 		len = apei_estatus_len(estatus);
 		node_len = GHES_ESTATUS_NODE_LEN(len);
 		ghes_do_proc(estatus);
-		ghes_print_estatus(NULL, estatus_node->generic, estatus);
+		if (!ghes_estatus_cached(estatus)) {
+			generic = estatus_node->generic;
+			if (ghes_print_estatus(NULL, generic, estatus))
+				ghes_estatus_cache_add(generic, estatus);
+		}
 		gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
 			      node_len);
 		llnode = next;
@@ -622,6 +784,8 @@ static int ghes_notify_nmi(struct notifi
 		if (!(ghes->flags & GHES_TO_CLEAR))
 			continue;
 #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+		if (ghes_estatus_cached(ghes->estatus))
+			goto next;
 		/* Save estatus for further processing in IRQ context */
 		len = apei_estatus_len(ghes->estatus);
 		node_len = GHES_ESTATUS_NODE_LEN(len);
@@ -633,6 +797,7 @@ static int ghes_notify_nmi(struct notifi
 			memcpy(estatus, ghes->estatus, len);
 			llist_add(&estatus_node->llnode, &ghes_estatus_llist);
 		}
+next:
 #endif
 		ghes_clear_estatus(ghes);
 	}
@@ -847,6 +1012,11 @@ static int __init ghes_init(void)
 	if (rc)
 		goto err_ioremap_exit;
 
+	rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
+				      GHES_ESTATUS_CACHE_ALLOCED_MAX);
+	if (rc)
+		goto err_pool_exit;
+
 	rc = platform_driver_register(&ghes_platform_driver);
 	if (rc)
 		goto err_pool_exit;

next prev parent reply	other threads:[~2011-07-13  5:14 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-13  5:14 [PATCH 00/17] ACPI, APEI, Patches for 3.1 Huang Ying
2011-07-13  5:14 ` [PATCH 01/17] ACPI, APEI, HEST, Detect duplicated hardware error source ID Huang Ying
2011-07-14  3:28   ` Len Brown
2011-07-13  5:14 ` [PATCH 02/17] ACPI, APEI, ERST, Prevent erst_dbg from loading if ERST is disabled Huang Ying
2011-07-14  3:30   ` Len Brown
2011-07-13  5:14 ` [PATCH 03/17] ACPI, APEI, ERST, Fix erst-dbg long record reading issue Huang Ying
2011-07-14  3:32   ` Len Brown
2011-07-13  5:14 ` [PATCH 04/17] ACPI, APEI, GHES, Do not ratelimit fatal error printk before panic Huang Ying
2011-07-14  3:33   ` Len Brown
2011-07-13  5:14 ` [PATCH 05/17] ACPI, APEI, Add apei_exec_run_optional Huang Ying
2011-07-13  5:14 ` [PATCH 06/17] ACPI, APEI, Use apei_exec_run_optional in APEI EINJ and ERST Huang Ying
2011-07-13  5:14 ` [PATCH 07/17] ACPI, APEI, GHES, Prevent GHES to be built as module Huang Ying
2011-07-14  3:36   ` Len Brown
2011-07-13  5:14 ` [PATCH 08/17] ACPI, APEI, GHES, Support disable GHES at boot time Huang Ying
2011-07-14 14:47   ` Don Zickus
2011-07-15  0:31     ` Huang Ying
2011-07-15 13:33       ` Don Zickus
2011-07-13  5:14 ` [PATCH 09/17] ACPI, APEI, Add APEI bit support in generic _OSC call Huang Ying
2011-07-14 14:48   ` Don Zickus
2011-07-13  5:14 ` [PATCH 10/17] ACPI, APEI, Add WHEA _OSC support Huang Ying
2011-07-14 14:50   ` Don Zickus
2011-07-13  5:14 ` [PATCH 11/17] Add Kconfig option ARCH_HAVE_NMI_SAFE_CMPXCHG Huang Ying
2011-07-13  5:14 ` [PATCH 12/17] lib, Add lock-less NULL terminated single list Huang Ying
2011-07-13  5:14 ` [PATCH 13/17] lib, Make gen_pool memory allocator lockless Huang Ying
2011-07-13  5:14 ` [PATCH 14/17] ACPI, APEI, GHES, printk support for recoverable error via NMI Huang Ying
2011-07-13  5:14 ` Huang Ying [this message]
2011-07-13  5:14 ` [PATCH 16/17] HWPoison: add memory_failure_queue() Huang Ying
2011-07-13  5:14 ` [PATCH 17/17] ACPI, APEI, GHES: Add hardware memory error recovery support Huang Ying

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1310534068-30547-16-git-send-email-ying.huang@intel.com \
    --to=ying.huang@intel.com \
    --cc=andi@firstfloor.org \
    --cc=lenb@kernel.org \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox