From: Chen Gong <gong.chen@linux.intel.com>
To: Chen Gong <gong.chen@linux.intel.com>
Cc: tony.luck@intel.com, bp@amd64.org, linux-next@vger.kernel.org
Subject: Re: [PATCH] x86: auto poll/interrupt mode switch for CMC to stop CMC storm
Date: Wed, 23 May 2012 10:30:46 +0800 [thread overview]
Message-ID: <4FBC4BD6.3080103@linux.intel.com> (raw)
In-Reply-To: <1337740225-26673-1-git-send-email-gong.chen@linux.intel.com>
于 2012/5/23 10:30, Chen Gong 写道:
> This idea is inspired from IA64 implementation. It is like NAPI for
> network stack. When CMCI is too many to handle, this interrupt can
> be disabled and then poll mode will take over the events handle.
> When no more events happen in the system, CMC interrupt can be
> enabled automatically.
>
> Signed-off-by: Chen Gong <gong.chen@linux.intel.com> ---
> arch/x86/kernel/cpu/mcheck/mce.c | 83
> +++++++++++++++++++++++++++++++++++++- 1 file changed, 81
> insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c
> b/arch/x86/kernel/cpu/mcheck/mce.c index d086a09..6334f0d 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c +++
> b/arch/x86/kernel/cpu/mcheck/mce.c @@ -92,6 +92,7 @@ static char
> *mce_helper_argv[2] = { mce_helper, NULL };
>
> static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
>
> +static DEFINE_PER_CPU(struct timer_list, mce_timer); static
> DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing;
>
> @@ -100,8 +101,28 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) =
> { [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL };
>
> +#define CMC_POLL_INTERVAL (1 * 30) +#define CMC_STORM 5 +static
> DEFINE_PER_CPU(int, cmci_storm_warning); +static
> DEFINE_PER_CPU(unsigned long, first_cmci_jiffie); +static
> DEFINE_SPINLOCK(cmc_poll_lock); + +/* + * This variable tells
> whether we are in cmci-storm-happened mode. + * Start with this in
> the wrong state so we won't play w/ timers + * before the system is
> ready. + */ +static int cmci_storm_detected = 1; + static
> DEFINE_PER_CPU(struct work_struct, mce_work);
>
> +static void mce_disable_cmci(void *data); +static void
> mce_enable_ce(void *all); +static void cmc_disable_keventd(struct
> work_struct *dummy); +static void cmc_enable_keventd(struct
> work_struct *dummy); + +static DECLARE_WORK(cmc_disable_work,
> cmc_disable_keventd); +static DECLARE_WORK(cmc_enable_work,
> cmc_enable_keventd); /* * CPU/chipset specific EDAC code can
> register a notifier call here to print * MCE errors in a
> human-readable form. @@ -582,6 +603,37 @@ void
> machine_check_poll(enum mcp_flags flags, mce_banks_t *b) { struct
> mce m; int i; + unsigned long flag; + +
> spin_lock_irqsave(&cmc_poll_lock, flag); + if (cmci_storm_detected
> == 0) { + unsigned long now = jiffies; + int *count =
> &__get_cpu_var(cmci_storm_warning); + unsigned long *history =
> &__get_cpu_var(first_cmci_jiffie); + + if (time_before_eq(now,
> *history + HZ)) + (*count)++; + else { + *count = 0; +
> *history = now; + } + + if (*count >= CMC_STORM) { +
> cmci_storm_detected = 1; + /* If we're being hit with CMC
> interrupts, we won't + * ever execute the schedule_work() below.
> Need to + * disable CMC interrupts on this processor now. +
> */ + mce_disable_cmci(NULL); + if
> (!work_pending(&cmc_disable_work)) +
> schedule_work(&cmc_disable_work); +
> spin_unlock_irqrestore(&cmc_poll_lock, flag); +
> printk(KERN_WARNING "WARNING: Switching to polling "\ + "CMC
> handler; error records may be lost\n"); + goto out; + } + } +
> spin_unlock_irqrestore(&cmc_poll_lock, flag);
>
> percpu_inc(mce_poll_count);
>
> @@ -628,6 +680,7 @@ void machine_check_poll(enum mcp_flags flags,
> mce_banks_t *b) mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); }
>
> +out: /* * Don't clear MCG_STATUS here because it's only defined
> for * exceptions. @@ -1199,6 +1252,20 @@ static void
> mce_process_work(struct work_struct *dummy) memory_failure(pfn,
> MCE_VECTOR, 0); }
>
> +static void cmc_disable_keventd(struct work_struct *dummy) +{ +
> struct timer_list *t = __this_cpu_ptr(&mce_timer); + +
> on_each_cpu(mce_disable_cmci, NULL, 0); + mod_timer(t, jiffies +
> CMC_POLL_INTERVAL * HZ); +} + +static void
> cmc_enable_keventd(struct work_struct *dummy) +{ + /* don't
> re-initiate timer */ + on_each_cpu(mce_enable_ce, NULL, 0); +} +
> #ifdef CONFIG_X86_MCE_INTEL /*** * mce_log_therm_throt_event - Logs
> the thermal throttling event to mcelog @@ -1232,12 +1299,12 @@ void
> mce_log_therm_throt_event(__u64 status) static int check_interval =
> 5 * 60; /* 5 minutes */
>
> static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
> -static DEFINE_PER_CPU(struct timer_list, mce_timer);
>
> static void mce_start_timer(unsigned long data) { struct timer_list
> *t = &per_cpu(mce_timer, data); int *n; + unsigned long flags;
>
> WARN_ON(smp_processor_id() != data);
>
> @@ -1253,8 +1320,19 @@ static void mce_start_timer(unsigned long
> data) n = &__get_cpu_var(mce_next_interval); if (mce_notify_irq())
> *n = max(*n/2, HZ/100); - else + else { *n = min(*n*2,
> (int)round_jiffies_relative(check_interval*HZ)); + /* if no CMC
> event, switch out of polling mode */ +
> spin_lock_irqsave(&cmc_poll_lock, flags); + if
> (cmci_storm_detected == 1) { + printk(KERN_WARNING "Returning to
> interrupt driven "\ + "CMC handler\n"); + if
> (!work_pending(&cmc_enable_work)) +
> schedule_work(&cmc_enable_work); + cmci_storm_detected = 0; + }
> + spin_unlock_irqrestore(&cmc_poll_lock, flags); + }
>
> t->expires = jiffies + *n; add_timer_on(t, smp_processor_id()); @@
> -1547,6 +1625,7 @@ void __cpuinit mcheck_cpu_init(struct
> cpuinfo_x86 *c) __mcheck_cpu_init_generic();
> __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_timer(); +
> cmci_storm_detected = 0; INIT_WORK(&__get_cpu_var(mce_work),
> mce_process_work); init_irq_work(&__get_cpu_var(mce_irq_work),
> &mce_irq_work_cb); }
Oops, I send it to wrong LKML address. I will send this patch again.
prev parent reply other threads:[~2012-05-23 2:30 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-05-23 2:30 [PATCH] x86: auto poll/interrupt mode switch for CMC to stop CMC storm Chen Gong
2012-05-23 2:30 ` Chen Gong [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4FBC4BD6.3080103@linux.intel.com \
--to=gong.chen@linux.intel.com \
--cc=bp@amd64.org \
--cc=linux-next@vger.kernel.org \
--cc=tony.luck@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).