* [Linux-ia64] Re: [PATCH] CMC polling
@ 2003-03-07 21:36 Alex Williamson
0 siblings, 0 replies; only message in thread
From: Alex Williamson @ 2003-03-07 21:36 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1: Type: text/plain, Size: 543 bytes --]
Here's a bugfix update to my previous patch. I was mistakenly
using smp_call_function w/ interrupts disabled. There's a definite
danger of deadlock under those circumstances. I've attached a new
version of the last patch as well as an interdiff between the two.
Let me know if there are any other issues. Thanks,
Alex
--
Alex Williamson Linux Development Lab
alex_williamson@hp.com Hewlett Packard
970-898-9173 Fort Collins, CO
[-- Attachment #2: cmc_polling_mca_updates_v2.diff --]
[-- Type: text/plain, Size: 8955 bytes --]
--- arch/ia64/kernel/mca.c~ 2003-03-03 11:41:09.000000000 -0700
+++ arch/ia64/kernel/mca.c 2003-03-07 12:07:53.000000000 -0700
@@ -45,6 +45,7 @@
#include <linux/timer.h>
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/smp.h>
#include <asm/machvec.h>
#include <asm/page.h>
@@ -53,6 +54,7 @@
#include <asm/sal.h>
#include <asm/mca.h>
+#include <asm/processor.h>
#include <asm/irq.h>
#include <asm/hw_irq.h>
@@ -110,8 +112,16 @@
#define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */
#define MIN_CPE_POLL_INTERVAL (2*60*HZ) /* 2 minutes */
+#define CMC_POLL_INTERVAL (1*60*HZ) /* 1 minute */
+#define CMC_HISTORY_LENGTH 5
static struct timer_list cpe_poll_timer;
+static struct timer_list cmc_poll_timer;
+/*
+ * Start with this in the wrong state so we won't play w/ timers
+ * before the system is ready.
+ */
+static int cmc_polling_enabled = 1;
/*
* ia64_mca_log_sal_error_record
@@ -160,7 +170,7 @@
void
ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
{
- IA64_MCA_DEBUG("ia64_mca_cpe_int_handler: received interrupt. vector = %#x\n", cpe_irq);
+ IA64_MCA_DEBUG("ia64_mca_cpe_int_handler: received interrupt. CPU:%d vector = %#x\n", smp_processor_id(), cpe_irq);
/* Get the CMC error record and log it */
ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE, 0);
@@ -331,6 +341,60 @@
smp_processor_id(), ia64_get_cmcv());
}
+/*
+ * ia64_mca_cmc_vector_disable
+ *
+ * Mask the corrected machine check vector register in the processor.
+ * This function is invoked on a per-processor basis.
+ *
+ * Inputs
+ * dummy(unused)
+ *
+ * Outputs
+ * None
+ */
+void
+ia64_mca_cmc_vector_disable (void *dummy)
+{
+ cmcv_reg_t cmcv;
+
+ cmcv = (cmcv_reg_t)ia64_get_cmcv();
+
+ cmcv.cmcv_mask = 1; /* Mask/disable interrupt */
+ ia64_set_cmcv(cmcv.cmcv_regval);
+
+ IA64_MCA_DEBUG("ia64_mca_cmc_vector_disable: CPU %d corrected "
+ "machine check vector %#x disabled.\n",
+ smp_processor_id(), cmcv.cmcv_vector);
+}
+
+/*
+ * ia64_mca_cmc_vector_enable
+ *
+ * Unmask the corrected machine check vector register in the processor.
+ * This function is invoked on a per-processor basis.
+ *
+ * Inputs
+ * dummy(unused)
+ *
+ * Outputs
+ * None
+ */
+void
+ia64_mca_cmc_vector_enable (void *dummy)
+{
+ cmcv_reg_t cmcv;
+
+ cmcv = (cmcv_reg_t)ia64_get_cmcv();
+
+ cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */
+ ia64_set_cmcv(cmcv.cmcv_regval);
+
+ IA64_MCA_DEBUG("ia64_mca_cmc_vector_enable: CPU %d corrected "
+ "machine check vector %#x enabled.\n",
+ smp_processor_id(), cmcv.cmcv_vector);
+}
+
#if defined(MCA_TEST)
@@ -780,11 +844,68 @@
void
ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
{
+ static unsigned long cmc_history[CMC_HISTORY_LENGTH];
+ static int index;
+ static spinlock_t cmc_history_lock = SPIN_LOCK_UNLOCKED;
+
IA64_MCA_DEBUG("ia64_mca_cmc_int_handler: received interrupt vector = %#x on CPU %d\n",
cmc_irq, smp_processor_id());
/* Get the CMC error record and log it */
ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC, 0);
+
+ spin_lock(&cmc_history_lock);
+ if (!cmc_polling_enabled) {
+ int i, count = 1; /* we know 1 happened now */
+ unsigned long now = jiffies;
+
+ for (i = 0; i < CMC_HISTORY_LENGTH; i++) {
+ if (now - cmc_history[i] <= HZ)
+ count++;
+ }
+
+ IA64_MCA_DEBUG(KERN_INFO "CMC threshold %d/%d\n", count, CMC_HISTORY_LENGTH);
+ if (count >= CMC_HISTORY_LENGTH) {
+ /*
+ * CMC threshold exceeded, clear the history
+ * so we have a fresh start when we return
+ */
+ for (index = 0 ; index < CMC_HISTORY_LENGTH; index++)
+ cmc_history[index] = 0;
+ index = 0;
+
+ /* Switch to polling mode */
+ cmc_polling_enabled = 1;
+
+ /*
+ * Unlock & enable interrupts before
+ * smp_call_function or risk deadlock
+ */
+ spin_unlock(&cmc_history_lock);
+ ia64_mca_cmc_vector_disable(NULL);
+
+ local_irq_enable();
+ smp_call_function(ia64_mca_cmc_vector_disable, NULL, 1, 1);
+
+ /*
+ * Corrected errors will still be corrected, but
+ * make sure there's a log somewhere that indicates
+ * something is generating more than we can handle.
+ */
+ printk(KERN_WARNING "ia64_mca_cmc_int_handler: WARNING: Switching to polling CMC handler, error records may be lost\n");
+
+
+ mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
+
+ /* lock already released, get out now */
+ return;
+ } else {
+ cmc_history[index++] = now;
+ if (index == CMC_HISTORY_LENGTH)
+ index = 0;
+ }
+ }
+ spin_unlock(&cmc_history_lock);
}
/*
@@ -797,6 +918,7 @@
{
spinlock_t isl_lock;
int isl_index;
+ unsigned long isl_count;
ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */
} ia64_state_log_t;
@@ -813,11 +935,78 @@
#define IA64_LOG_NEXT_INDEX(it) ia64_state_log[it].isl_index
#define IA64_LOG_CURR_INDEX(it) 1 - ia64_state_log[it].isl_index
#define IA64_LOG_INDEX_INC(it) \
- ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index
+ {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \
+ ia64_state_log[it].isl_count++;}
#define IA64_LOG_INDEX_DEC(it) \
ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index
#define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)]))
#define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]))
+#define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count
+
+/*
+ * ia64_mca_cmc_int_caller
+ *
+ * Call CMC interrupt handler, only purpose is to have a
+ * smp_call_function callable entry.
+ *
+ * Inputs : dummy(unused)
+ * Outputs : None
+ * */
+static void
+ia64_mca_cmc_int_caller(void *dummy)
+{
+ ia64_mca_cmc_int_handler(0, NULL, NULL);
+}
+
+/*
+ * ia64_mca_cmc_poll
+ *
+ * Poll for Corrected Machine Checks (CMCs)
+ *
+ * Inputs : dummy(unused)
+ * Outputs : None
+ *
+ */
+static void
+ia64_mca_cmc_poll (unsigned long dummy)
+{
+ int start_count;
+
+ start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC);
+
+ /* Call the interrupt handler */
+ smp_call_function(ia64_mca_cmc_int_caller, NULL, 1, 1);
+ local_irq_disable();
+ ia64_mca_cmc_int_caller(NULL);
+ local_irq_enable();
+
+ /*
+ * If no log recored, switch out of polling mode.
+ */
+ if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) {
+ printk(KERN_WARNING "ia64_mca_cmc_poll: Returning to interrupt driven CMC handler\n");
+ cmc_polling_enabled = 0;
+ smp_call_function(ia64_mca_cmc_vector_enable, NULL, 1, 1);
+ ia64_mca_cmc_vector_enable(NULL);
+ } else {
+ mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
+ }
+}
+
+/*
+ * ia64_mca_cpe_int_caller
+ *
+ * Call CPE interrupt handler, only purpose is to have a
+ * smp_call_function callable entry.
+ *
+ * Inputs : dummy(unused)
+ * Outputs : None
+ * */
+static void
+ia64_mca_cpe_int_caller(void *dummy)
+{
+ ia64_mca_cpe_int_handler(0, NULL, NULL);
+}
/*
* ia64_mca_cpe_poll
@@ -832,19 +1021,22 @@
static void
ia64_mca_cpe_poll (unsigned long dummy)
{
- int start_index;
+ int start_count;
static int poll_time = MAX_CPE_POLL_INTERVAL;
- start_index = IA64_LOG_CURR_INDEX(SAL_INFO_TYPE_CPE);
+ start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE);
/* Call the interrupt handler */
- ia64_mca_cpe_int_handler(0, NULL, NULL);
+ smp_call_function(ia64_mca_cpe_int_caller, NULL, 1, 1);
+ local_irq_disable();
+ ia64_mca_cpe_int_caller(NULL);
+ local_irq_enable();
/*
* If a log was recorded, increase our polling frequency,
* otherwise, backoff.
*/
- if (start_index != IA64_LOG_CURR_INDEX(SAL_INFO_TYPE_CPE)) {
+ if (start_count != IA64_LOG_COUNT(SAL_INFO_TYPE_CPE)) {
poll_time = max(MIN_CPE_POLL_INTERVAL, poll_time/2);
} else {
poll_time = min(MAX_CPE_POLL_INTERVAL, poll_time * 2);
@@ -865,11 +1057,19 @@
static int __init
ia64_mca_late_init(void)
{
- if (acpi_request_vector(ACPI_INTERRUPT_CPEI) < 0) {
- init_timer(&cpe_poll_timer);
- cpe_poll_timer.function = ia64_mca_cpe_poll;
- ia64_mca_cpe_poll(0);
- }
+ init_timer(&cmc_poll_timer);
+ cmc_poll_timer.function = ia64_mca_cmc_poll;
+
+ /* Reset to the correct state */
+ cmc_polling_enabled = 0;
+
+ init_timer(&cpe_poll_timer);
+ cpe_poll_timer.function = ia64_mca_cpe_poll;
+
+ /* If platform doesn't support CPEI, get the timer going. */
+ if (acpi_request_vector(ACPI_INTERRUPT_CPEI) < 0)
+ ia64_mca_cpe_poll(0UL);
+
return 0;
}
@@ -1077,7 +1277,7 @@
{
prfunc("+Err Record ID: %d SAL Rev: %2x.%02x\n", lh->id,
lh->revision.major, lh->revision.minor);
- prfunc("+Time: %02x/%02x/%02x%02x %02d:%02d:%02d Severity %d\n",
+ prfunc("+Time: %02x/%02x/%02x%02x %02x:%02x:%02x Severity %d\n",
lh->timestamp.slh_month, lh->timestamp.slh_day,
lh->timestamp.slh_century, lh->timestamp.slh_year,
lh->timestamp.slh_hour, lh->timestamp.slh_minute,
[-- Attachment #3: cmc_polling_mca_updates_v1-v2.diff --]
[-- Type: text/plain, Size: 1312 bytes --]
diff -u arch/ia64/kernel/mca.c arch/ia64/kernel/mca.c
--- arch/ia64/kernel/mca.c 2003-03-03 11:41:23.000000000 -0700
+++ arch/ia64/kernel/mca.c 2003-03-07 12:07:53.000000000 -0700
@@ -877,11 +877,16 @@
/* Switch to polling mode */
cmc_polling_enabled = 1;
- /* Unlock before smp_call_function or risk deadlock */
+ /*
+ * Unlock & enable interrupts before
+ * smp_call_function or risk deadlock
+ */
spin_unlock(&cmc_history_lock);
- smp_call_function(ia64_mca_cmc_vector_disable, NULL, 1, 1);
ia64_mca_cmc_vector_disable(NULL);
+ local_irq_enable();
+ smp_call_function(ia64_mca_cmc_vector_disable, NULL, 1, 1);
+
/*
* Corrected errors will still be corrected, but
* make sure there's a log somewhere that indicates
@@ -970,8 +975,8 @@
start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC);
/* Call the interrupt handler */
- local_irq_disable();
smp_call_function(ia64_mca_cmc_int_caller, NULL, 1, 1);
+ local_irq_disable();
ia64_mca_cmc_int_caller(NULL);
local_irq_enable();
@@ -1022,8 +1027,8 @@
start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE);
/* Call the interrupt handler */
- local_irq_disable();
smp_call_function(ia64_mca_cpe_int_caller, NULL, 1, 1);
+ local_irq_disable();
ia64_mca_cpe_int_caller(NULL);
local_irq_enable();
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2003-03-07 21:36 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-03-07 21:36 [Linux-ia64] Re: [PATCH] CMC polling Alex Williamson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox