* [Linux-ia64] Re: [PATCH] CMC polling
@ 2003-03-07 21:36 Alex Williamson
0 siblings, 0 replies; only message in thread
From: Alex Williamson @ 2003-03-07 21:36 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1: Type: text/plain, Size: 543 bytes --]
Here's a bugfix update to my previous patch. I was mistakenly
using smp_call_function w/ interrupts disabled. There's a definite
danger of deadlock under those circumstances. I've attached a new
version of the last patch as well as an interdiff between the two.
Let me know if there are any other issues. Thanks,
Alex
--
Alex Williamson Linux Development Lab
alex_williamson@hp.com Hewlett Packard
970-898-9173 Fort Collins, CO
[-- Attachment #2: cmc_polling_mca_updates_v2.diff --]
[-- Type: text/plain, Size: 8955 bytes --]
--- arch/ia64/kernel/mca.c~ 2003-03-03 11:41:09.000000000 -0700
+++ arch/ia64/kernel/mca.c 2003-03-07 12:07:53.000000000 -0700
@@ -45,6 +45,7 @@
#include <linux/timer.h>
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/smp.h>
#include <asm/machvec.h>
#include <asm/page.h>
@@ -53,6 +54,7 @@
#include <asm/sal.h>
#include <asm/mca.h>
+#include <asm/processor.h>
#include <asm/irq.h>
#include <asm/hw_irq.h>
@@ -110,8 +112,16 @@
#define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */
#define MIN_CPE_POLL_INTERVAL (2*60*HZ) /* 2 minutes */
+#define CMC_POLL_INTERVAL (1*60*HZ) /* 1 minute */
+#define CMC_HISTORY_LENGTH 5
static struct timer_list cpe_poll_timer;
+static struct timer_list cmc_poll_timer;
+/*
+ * Start with this in the wrong state so we won't play w/ timers
+ * before the system is ready.
+ */
+static int cmc_polling_enabled = 1;
/*
* ia64_mca_log_sal_error_record
@@ -160,7 +170,7 @@
void
ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
{
- IA64_MCA_DEBUG("ia64_mca_cpe_int_handler: received interrupt. vector = %#x\n", cpe_irq);
+ IA64_MCA_DEBUG("ia64_mca_cpe_int_handler: received interrupt. CPU:%d vector = %#x\n", smp_processor_id(), cpe_irq);
/* Get the CMC error record and log it */
ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE, 0);
@@ -331,6 +341,60 @@
smp_processor_id(), ia64_get_cmcv());
}
+/*
+ * ia64_mca_cmc_vector_disable
+ *
+ * Mask the corrected machine check vector register in the processor.
+ * This function is invoked on a per-processor basis.
+ *
+ * Inputs
+ * dummy(unused)
+ *
+ * Outputs
+ * None
+ */
+void
+ia64_mca_cmc_vector_disable (void *dummy)
+{
+ cmcv_reg_t cmcv;
+
+ cmcv = (cmcv_reg_t)ia64_get_cmcv();
+
+ cmcv.cmcv_mask = 1; /* Mask/disable interrupt */
+ ia64_set_cmcv(cmcv.cmcv_regval);
+
+ IA64_MCA_DEBUG("ia64_mca_cmc_vector_disable: CPU %d corrected "
+ "machine check vector %#x disabled.\n",
+ smp_processor_id(), cmcv.cmcv_vector);
+}
+
+/*
+ * ia64_mca_cmc_vector_enable
+ *
+ * Unmask the corrected machine check vector register in the processor.
+ * This function is invoked on a per-processor basis.
+ *
+ * Inputs
+ * dummy(unused)
+ *
+ * Outputs
+ * None
+ */
+void
+ia64_mca_cmc_vector_enable (void *dummy)
+{
+ cmcv_reg_t cmcv;
+
+ cmcv = (cmcv_reg_t)ia64_get_cmcv();
+
+ cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */
+ ia64_set_cmcv(cmcv.cmcv_regval);
+
+ IA64_MCA_DEBUG("ia64_mca_cmc_vector_enable: CPU %d corrected "
+ "machine check vector %#x enabled.\n",
+ smp_processor_id(), cmcv.cmcv_vector);
+}
+
#if defined(MCA_TEST)
@@ -780,11 +844,68 @@
void
ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
{
+ static unsigned long cmc_history[CMC_HISTORY_LENGTH];
+ static int index;
+ static spinlock_t cmc_history_lock = SPIN_LOCK_UNLOCKED;
+
IA64_MCA_DEBUG("ia64_mca_cmc_int_handler: received interrupt vector = %#x on CPU %d\n",
cmc_irq, smp_processor_id());
/* Get the CMC error record and log it */
ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC, 0);
+
+ spin_lock(&cmc_history_lock);
+ if (!cmc_polling_enabled) {
+ int i, count = 1; /* we know 1 happened now */
+ unsigned long now = jiffies;
+
+ for (i = 0; i < CMC_HISTORY_LENGTH; i++) {
+ if (now - cmc_history[i] <= HZ)
+ count++;
+ }
+
+ IA64_MCA_DEBUG(KERN_INFO "CMC threshold %d/%d\n", count, CMC_HISTORY_LENGTH);
+ if (count >= CMC_HISTORY_LENGTH) {
+ /*
+ * CMC threshold exceeded, clear the history
+ * so we have a fresh start when we return
+ */
+ for (index = 0 ; index < CMC_HISTORY_LENGTH; index++)
+ cmc_history[index] = 0;
+ index = 0;
+
+ /* Switch to polling mode */
+ cmc_polling_enabled = 1;
+
+ /*
+ * Unlock & enable interrupts before
+ * smp_call_function or risk deadlock
+ */
+ spin_unlock(&cmc_history_lock);
+ ia64_mca_cmc_vector_disable(NULL);
+
+ local_irq_enable();
+ smp_call_function(ia64_mca_cmc_vector_disable, NULL, 1, 1);
+
+ /*
+ * Corrected errors will still be corrected, but
+ * make sure there's a log somewhere that indicates
+ * something is generating more than we can handle.
+ */
+ printk(KERN_WARNING "ia64_mca_cmc_int_handler: WARNING: Switching to polling CMC handler, error records may be lost\n");
+
+
+ mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
+
+ /* lock already released, get out now */
+ return;
+ } else {
+ cmc_history[index++] = now;
+ if (index == CMC_HISTORY_LENGTH)
+ index = 0;
+ }
+ }
+ spin_unlock(&cmc_history_lock);
}
/*
@@ -797,6 +918,7 @@
{
spinlock_t isl_lock;
int isl_index;
+ unsigned long isl_count;
ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */
} ia64_state_log_t;
@@ -813,11 +935,78 @@
#define IA64_LOG_NEXT_INDEX(it) ia64_state_log[it].isl_index
#define IA64_LOG_CURR_INDEX(it) 1 - ia64_state_log[it].isl_index
#define IA64_LOG_INDEX_INC(it) \
- ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index
+ {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \
+ ia64_state_log[it].isl_count++;}
#define IA64_LOG_INDEX_DEC(it) \
ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index
#define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)]))
#define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]))
+#define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count
+
+/*
+ * ia64_mca_cmc_int_caller
+ *
+ * Call CMC interrupt handler, only purpose is to have a
+ * smp_call_function callable entry.
+ *
+ * Inputs : dummy(unused)
+ * Outputs : None
+ * */
+static void
+ia64_mca_cmc_int_caller(void *dummy)
+{
+ ia64_mca_cmc_int_handler(0, NULL, NULL);
+}
+
+/*
+ * ia64_mca_cmc_poll
+ *
+ * Poll for Corrected Machine Checks (CMCs)
+ *
+ * Inputs : dummy(unused)
+ * Outputs : None
+ *
+ */
+static void
+ia64_mca_cmc_poll (unsigned long dummy)
+{
+ int start_count;
+
+ start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC);
+
+ /* Call the interrupt handler */
+ smp_call_function(ia64_mca_cmc_int_caller, NULL, 1, 1);
+ local_irq_disable();
+ ia64_mca_cmc_int_caller(NULL);
+ local_irq_enable();
+
+ /*
+ * If no log recored, switch out of polling mode.
+ */
+ if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) {
+ printk(KERN_WARNING "ia64_mca_cmc_poll: Returning to interrupt driven CMC handler\n");
+ cmc_polling_enabled = 0;
+ smp_call_function(ia64_mca_cmc_vector_enable, NULL, 1, 1);
+ ia64_mca_cmc_vector_enable(NULL);
+ } else {
+ mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
+ }
+}
+
+/*
+ * ia64_mca_cpe_int_caller
+ *
+ * Call CPE interrupt handler, only purpose is to have a
+ * smp_call_function callable entry.
+ *
+ * Inputs : dummy(unused)
+ * Outputs : None
+ * */
+static void
+ia64_mca_cpe_int_caller(void *dummy)
+{
+ ia64_mca_cpe_int_handler(0, NULL, NULL);
+}
/*
* ia64_mca_cpe_poll
@@ -832,19 +1021,22 @@
static void
ia64_mca_cpe_poll (unsigned long dummy)
{
- int start_index;
+ int start_count;
static int poll_time = MAX_CPE_POLL_INTERVAL;
- start_index = IA64_LOG_CURR_INDEX(SAL_INFO_TYPE_CPE);
+ start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE);
/* Call the interrupt handler */
- ia64_mca_cpe_int_handler(0, NULL, NULL);
+ smp_call_function(ia64_mca_cpe_int_caller, NULL, 1, 1);
+ local_irq_disable();
+ ia64_mca_cpe_int_caller(NULL);
+ local_irq_enable();
/*
* If a log was recorded, increase our polling frequency,
* otherwise, backoff.
*/
- if (start_index != IA64_LOG_CURR_INDEX(SAL_INFO_TYPE_CPE)) {
+ if (start_count != IA64_LOG_COUNT(SAL_INFO_TYPE_CPE)) {
poll_time = max(MIN_CPE_POLL_INTERVAL, poll_time/2);
} else {
poll_time = min(MAX_CPE_POLL_INTERVAL, poll_time * 2);
@@ -865,11 +1057,19 @@
static int __init
ia64_mca_late_init(void)
{
- if (acpi_request_vector(ACPI_INTERRUPT_CPEI) < 0) {
- init_timer(&cpe_poll_timer);
- cpe_poll_timer.function = ia64_mca_cpe_poll;
- ia64_mca_cpe_poll(0);
- }
+ init_timer(&cmc_poll_timer);
+ cmc_poll_timer.function = ia64_mca_cmc_poll;
+
+ /* Reset to the correct state */
+ cmc_polling_enabled = 0;
+
+ init_timer(&cpe_poll_timer);
+ cpe_poll_timer.function = ia64_mca_cpe_poll;
+
+ /* If platform doesn't support CPEI, get the timer going. */
+ if (acpi_request_vector(ACPI_INTERRUPT_CPEI) < 0)
+ ia64_mca_cpe_poll(0UL);
+
return 0;
}
@@ -1077,7 +1277,7 @@
{
prfunc("+Err Record ID: %d SAL Rev: %2x.%02x\n", lh->id,
lh->revision.major, lh->revision.minor);
- prfunc("+Time: %02x/%02x/%02x%02x %02d:%02d:%02d Severity %d\n",
+ prfunc("+Time: %02x/%02x/%02x%02x %02x:%02x:%02x Severity %d\n",
lh->timestamp.slh_month, lh->timestamp.slh_day,
lh->timestamp.slh_century, lh->timestamp.slh_year,
lh->timestamp.slh_hour, lh->timestamp.slh_minute,
[-- Attachment #3: cmc_polling_mca_updates_v1-v2.diff --]
[-- Type: text/plain, Size: 1312 bytes --]
diff -u arch/ia64/kernel/mca.c arch/ia64/kernel/mca.c
--- arch/ia64/kernel/mca.c 2003-03-03 11:41:23.000000000 -0700
+++ arch/ia64/kernel/mca.c 2003-03-07 12:07:53.000000000 -0700
@@ -877,11 +877,16 @@
/* Switch to polling mode */
cmc_polling_enabled = 1;
- /* Unlock before smp_call_function or risk deadlock */
+ /*
+ * Unlock & enable interrupts before
+ * smp_call_function or risk deadlock
+ */
spin_unlock(&cmc_history_lock);
- smp_call_function(ia64_mca_cmc_vector_disable, NULL, 1, 1);
ia64_mca_cmc_vector_disable(NULL);
+ local_irq_enable();
+ smp_call_function(ia64_mca_cmc_vector_disable, NULL, 1, 1);
+
/*
* Corrected errors will still be corrected, but
* make sure there's a log somewhere that indicates
@@ -970,8 +975,8 @@
start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC);
/* Call the interrupt handler */
- local_irq_disable();
smp_call_function(ia64_mca_cmc_int_caller, NULL, 1, 1);
+ local_irq_disable();
ia64_mca_cmc_int_caller(NULL);
local_irq_enable();
@@ -1022,8 +1027,8 @@
start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE);
/* Call the interrupt handler */
- local_irq_disable();
smp_call_function(ia64_mca_cpe_int_caller, NULL, 1, 1);
+ local_irq_disable();
ia64_mca_cpe_int_caller(NULL);
local_irq_enable();
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2003-03-07 21:36 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-03-07 21:36 [Linux-ia64] Re: [PATCH] CMC polling Alex Williamson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.