* [Linux-ia64] [PATCH] CPE & CMC polling for 2.5
@ 2003-03-10 22:50 Alex Williamson
0 siblings, 0 replies; only message in thread
From: Alex Williamson @ 2003-03-10 22:50 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1: Type: text/plain, Size: 291 bytes --]
Here's versions of CPE and CMC polling against 2.5.64. Thanks,
Alex
--
Alex Williamson Linux Development Lab
alex_williamson@hp.com Hewlett Packard
970-898-9173 Fort Collins, CO
[-- Attachment #2: linux-2.5.64-mca-updates.diff --]
[-- Type: text/plain, Size: 9215 bytes --]
--- linux-2.5.64/arch/ia64/kernel/mca.c~ 2003-03-10 15:10:48.000000000 -0700
+++ linux-2.5.64/arch/ia64/kernel/mca.c 2003-03-10 15:40:44.000000000 -0700
@@ -42,6 +42,10 @@
#include <linux/smp_lock.h>
#include <linux/bootmem.h>
#include <linux/acpi.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
#include <asm/machvec.h>
#include <asm/page.h>
@@ -105,6 +109,19 @@
.name = "cpe_hndlr"
};
+#define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */
+#define MIN_CPE_POLL_INTERVAL (2*60*HZ) /* 2 minutes */
+#define CMC_POLL_INTERVAL (1*60*HZ) /* 1 minute */
+#define CMC_HISTORY_LENGTH 5
+
+static struct timer_list cpe_poll_timer;
+static struct timer_list cmc_poll_timer;
+/*
+ * Start with this in the wrong state so we won't play w/ timers
+ * before the system is ready.
+ */
+static int cmc_polling_enabled = 1;
+
/*
* ia64_mca_log_sal_error_record
*
@@ -152,7 +169,7 @@
void
ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
{
- IA64_MCA_DEBUG("ia64_mca_cpe_int_handler: received interrupt. vector = %#x\n", cpe_irq);
+ IA64_MCA_DEBUG("ia64_mca_cpe_int_handler: received interrupt. CPU:%d vector = %#x\n", smp_processor_id(), cpe_irq);
/* Get the CMC error record and log it */
ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE, 0);
@@ -295,6 +312,60 @@
smp_processor_id(), ia64_get_cmcv());
}
+/*
+ * ia64_mca_cmc_vector_disable
+ *
+ * Mask the corrected machine check vector register in the processor.
+ * This function is invoked on a per-processor basis.
+ *
+ * Inputs
+ * dummy(unused)
+ *
+ * Outputs
+ * None
+ */
+void
+ia64_mca_cmc_vector_disable (void *dummy)
+{
+ cmcv_reg_t cmcv;
+
+ cmcv = (cmcv_reg_t)ia64_get_cmcv();
+
+ cmcv.cmcv_mask = 1; /* Mask/disable interrupt */
+ ia64_set_cmcv(cmcv.cmcv_regval);
+
+ IA64_MCA_DEBUG("ia64_mca_cmc_vector_disable: CPU %d corrected "
+ "machine check vector %#x disabled.\n",
+ smp_processor_id(), cmcv.cmcv_vector);
+}
+
+/*
+ * ia64_mca_cmc_vector_enable
+ *
+ * Unmask the corrected machine check vector register in the processor.
+ * This function is invoked on a per-processor basis.
+ *
+ * Inputs
+ * dummy(unused)
+ *
+ * Outputs
+ * None
+ */
+void
+ia64_mca_cmc_vector_enable (void *dummy)
+{
+ cmcv_reg_t cmcv;
+
+ cmcv = (cmcv_reg_t)ia64_get_cmcv();
+
+ cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */
+ ia64_set_cmcv(cmcv.cmcv_regval);
+
+ IA64_MCA_DEBUG("ia64_mca_cmc_vector_enable: CPU %d corrected "
+ "machine check vector %#x enabled.\n",
+ smp_processor_id(), cmcv.cmcv_vector);
+}
+
#if defined(MCA_TEST)
@@ -494,9 +565,7 @@
setup_irq(irq, &mca_cpe_irqaction);
}
ia64_mca_register_cpev(cpev);
- } else
- printk(KERN_ERR
- "ia64_mca_init: Failed to get routed CPEI vector from ACPI.\n");
+ }
}
/* Initialize the areas set aside by the OS to buffer the
@@ -751,11 +820,68 @@
void
ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
{
+ static unsigned long cmc_history[CMC_HISTORY_LENGTH];
+ static int index;
+ static spinlock_t cmc_history_lock = SPIN_LOCK_UNLOCKED;
+
IA64_MCA_DEBUG("ia64_mca_cmc_int_handler: received interrupt vector = %#x on CPU %d\n",
cmc_irq, smp_processor_id());
/* Get the CMC error record and log it */
ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC, 0);
+
+ spin_lock(&cmc_history_lock);
+ if (!cmc_polling_enabled) {
+ int i, count = 1; /* we know 1 happened now */
+ unsigned long now = jiffies;
+
+ for (i = 0; i < CMC_HISTORY_LENGTH; i++) {
+ if (now - cmc_history[i] <= HZ)
+ count++;
+ }
+
+ IA64_MCA_DEBUG(KERN_INFO "CMC threshold %d/%d\n", count, CMC_HISTORY_LENGTH);
+ if (count >= CMC_HISTORY_LENGTH) {
+ /*
+ * CMC threshold exceeded, clear the history
+ * so we have a fresh start when we return
+ */
+ for (index = 0 ; index < CMC_HISTORY_LENGTH; index++)
+ cmc_history[index] = 0;
+ index = 0;
+
+ /* Switch to polling mode */
+ cmc_polling_enabled = 1;
+
+ /*
+ * Unlock & enable interrupts before
+ * smp_call_function or risk deadlock
+ */
+ spin_unlock(&cmc_history_lock);
+ ia64_mca_cmc_vector_disable(NULL);
+
+ local_irq_enable();
+ smp_call_function(ia64_mca_cmc_vector_disable, NULL, 1, 1);
+
+ /*
+ * Corrected errors will still be corrected, but
+ * make sure there's a log somewhere that indicates
+ * something is generating more than we can handle.
+ */
+ printk(KERN_WARNING "ia64_mca_cmc_int_handler: WARNING: Switching to polling CMC handler, error records may be lost\n");
+
+
+ mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
+
+ /* lock already released, get out now */
+ return;
+ } else {
+ cmc_history[index++] = now;
+ if (index == CMC_HISTORY_LENGTH)
+ index = 0;
+ }
+ }
+ spin_unlock(&cmc_history_lock);
}
/*
@@ -768,6 +894,7 @@
{
spinlock_t isl_lock;
int isl_index;
+ unsigned long isl_count;
ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */
} ia64_state_log_t;
@@ -784,11 +911,145 @@
#define IA64_LOG_NEXT_INDEX(it) ia64_state_log[it].isl_index
#define IA64_LOG_CURR_INDEX(it) 1 - ia64_state_log[it].isl_index
#define IA64_LOG_INDEX_INC(it) \
- ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index
+ {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \
+ ia64_state_log[it].isl_count++;}
#define IA64_LOG_INDEX_DEC(it) \
ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index
#define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)]))
#define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]))
+#define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count
+
+/*
+ * ia64_mca_cmc_int_caller
+ *
+ * Call CMC interrupt handler, only purpose is to have a
+ * smp_call_function callable entry.
+ *
+ * Inputs : dummy(unused)
+ * Outputs : None
+ * */
+static void
+ia64_mca_cmc_int_caller(void *dummy)
+{
+ ia64_mca_cmc_int_handler(0, NULL, NULL);
+}
+
+/*
+ * ia64_mca_cmc_poll
+ *
+ * Poll for Corrected Machine Checks (CMCs)
+ *
+ * Inputs : dummy(unused)
+ * Outputs : None
+ *
+ */
+static void
+ia64_mca_cmc_poll (unsigned long dummy)
+{
+ int start_count;
+
+ start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC);
+
+ /* Call the interrupt handler */
+ smp_call_function(ia64_mca_cmc_int_caller, NULL, 1, 1);
+ local_irq_disable();
+ ia64_mca_cmc_int_caller(NULL);
+ local_irq_enable();
+
+ /*
+ * If no log recored, switch out of polling mode.
+ */
+ if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) {
+ printk(KERN_WARNING "ia64_mca_cmc_poll: Returning to interrupt driven CMC handler\n");
+ cmc_polling_enabled = 0;
+ smp_call_function(ia64_mca_cmc_vector_enable, NULL, 1, 1);
+ ia64_mca_cmc_vector_enable(NULL);
+ } else {
+ mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
+ }
+}
+
+/*
+ * ia64_mca_cpe_int_caller
+ *
+ * Call CPE interrupt handler, only purpose is to have a
+ * smp_call_function callable entry.
+ *
+ * Inputs : dummy(unused)
+ * Outputs : None
+ * */
+static void
+ia64_mca_cpe_int_caller(void *dummy)
+{
+ ia64_mca_cpe_int_handler(0, NULL, NULL);
+}
+
+/*
+ * ia64_mca_cpe_poll
+ *
+ * Poll for Corrected Platform Errors (CPEs), dynamically adjust
+ * polling interval based on occurance of an event.
+ *
+ * Inputs : dummy(unused)
+ * Outputs : None
+ *
+ */
+static void
+ia64_mca_cpe_poll (unsigned long dummy)
+{
+ int start_count;
+ static int poll_time = MAX_CPE_POLL_INTERVAL;
+
+ start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE);
+
+ /* Call the interrupt handler */
+ smp_call_function(ia64_mca_cpe_int_caller, NULL, 1, 1);
+ local_irq_disable();
+ ia64_mca_cpe_int_caller(NULL);
+ local_irq_enable();
+
+ /*
+ * If a log was recorded, increase our polling frequency,
+ * otherwise, backoff.
+ */
+ if (start_count != IA64_LOG_COUNT(SAL_INFO_TYPE_CPE)) {
+ poll_time = max(MIN_CPE_POLL_INTERVAL, poll_time/2);
+ } else {
+ poll_time = min(MAX_CPE_POLL_INTERVAL, poll_time * 2);
+ }
+ mod_timer(&cpe_poll_timer, jiffies + poll_time);
+}
+
+/*
+ * ia64_mca_late_init
+ *
+ * Opportunity to setup things that require initialization later
+ * than ia64_mca_init. Setup a timer to poll for CPEs if the
+ * platform doesn't support an interrupt driven mechanism.
+ *
+ * Inputs : None
+ * Outputs : Status
+ */
+static int __init
+ia64_mca_late_init(void)
+{
+ init_timer(&cmc_poll_timer);
+ cmc_poll_timer.function = ia64_mca_cmc_poll;
+
+ /* Reset to the correct state */
+ cmc_polling_enabled = 0;
+
+ init_timer(&cpe_poll_timer);
+ cpe_poll_timer.function = ia64_mca_cpe_poll;
+
+ /* If platform doesn't support CPEI, get the timer going. */
+ if (acpi_request_vector(ACPI_INTERRUPT_CPEI) < 0)
+ ia64_mca_cpe_poll(0UL);
+
+ return 0;
+}
+
+device_initcall(ia64_mca_late_init);
/*
* C portion of the OS INIT handler
@@ -949,7 +1210,6 @@
return total_len;
} else {
IA64_LOG_UNLOCK(sal_info_type);
- prfunc("ia64_log_get: No SAL error record available for type %d\n", sal_info_type);
return 0;
}
}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2003-03-10 22:50 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-03-10 22:50 [Linux-ia64] [PATCH] CPE & CMC polling for 2.5 Alex Williamson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox