From: Chen Gong <gong.chen@linux.intel.com>
To: Chen Gong <gong.chen@linux.intel.com>
Cc: tony.luck@intel.com, bp@amd64.org, linux-next@vger.kernel.org
Subject: Re: [PATCH] x86: auto poll/interrupt mode switch for CMC to stop CMC storm
Date: Wed, 23 May 2012 10:30:46 +0800 [thread overview]
Message-ID: <4FBC4BD6.3080103@linux.intel.com> (raw)
In-Reply-To: <1337740225-26673-1-git-send-email-gong.chen@linux.intel.com>
于 2012/5/23 10:30, Chen Gong 写道:
> This idea is inspired from IA64 implementation. It is like NAPI for
> network stack. When CMCI is too many to handle, this interrupt can
> be disabled and then poll mode will take over the events handle.
> When no more events happen in the system, CMC interrupt can be
> enabled automatically.
>
> Signed-off-by: Chen Gong <gong.chen@linux.intel.com> ---
> arch/x86/kernel/cpu/mcheck/mce.c | 83
> +++++++++++++++++++++++++++++++++++++- 1 file changed, 81
> insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c
> b/arch/x86/kernel/cpu/mcheck/mce.c index d086a09..6334f0d 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c +++
> b/arch/x86/kernel/cpu/mcheck/mce.c @@ -92,6 +92,7 @@ static char
> *mce_helper_argv[2] = { mce_helper, NULL };
>
> static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
>
> +static DEFINE_PER_CPU(struct timer_list, mce_timer); static
> DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing;
>
> @@ -100,8 +101,28 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) =
> { [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL };
>
> +#define CMC_POLL_INTERVAL (1 * 30) +#define CMC_STORM 5 +static
> DEFINE_PER_CPU(int, cmci_storm_warning); +static
> DEFINE_PER_CPU(unsigned long, first_cmci_jiffie); +static
> DEFINE_SPINLOCK(cmc_poll_lock); + +/* + * This variable tells
> whether we are in cmci-storm-happened mode. + * Start with this in
> the wrong state so we won't play w/ timers + * before the system is
> ready. + */ +static int cmci_storm_detected = 1; + static
> DEFINE_PER_CPU(struct work_struct, mce_work);
>
> +static void mce_disable_cmci(void *data); +static void
> mce_enable_ce(void *all); +static void cmc_disable_keventd(struct
> work_struct *dummy); +static void cmc_enable_keventd(struct
> work_struct *dummy); + +static DECLARE_WORK(cmc_disable_work,
> cmc_disable_keventd); +static DECLARE_WORK(cmc_enable_work,
> cmc_enable_keventd); /* * CPU/chipset specific EDAC code can
> register a notifier call here to print * MCE errors in a
> human-readable form. @@ -582,6 +603,37 @@ void
> machine_check_poll(enum mcp_flags flags, mce_banks_t *b) { struct
> mce m; int i; + unsigned long flag; + +
> spin_lock_irqsave(&cmc_poll_lock, flag); + if (cmci_storm_detected
> == 0) { + unsigned long now = jiffies; + int *count =
> &__get_cpu_var(cmci_storm_warning); + unsigned long *history =
> &__get_cpu_var(first_cmci_jiffie); + + if (time_before_eq(now,
> *history + HZ)) + (*count)++; + else { + *count = 0; +
> *history = now; + } + + if (*count >= CMC_STORM) { +
> cmci_storm_detected = 1; + /* If we're being hit with CMC
> interrupts, we won't + * ever execute the schedule_work() below.
> Need to + * disable CMC interrupts on this processor now. +
> */ + mce_disable_cmci(NULL); + if
> (!work_pending(&cmc_disable_work)) +
> schedule_work(&cmc_disable_work); +
> spin_unlock_irqrestore(&cmc_poll_lock, flag); +
> printk(KERN_WARNING "WARNING: Switching to polling "\ + "CMC
> handler; error records may be lost\n"); + goto out; + } + } +
> spin_unlock_irqrestore(&cmc_poll_lock, flag);
>
> percpu_inc(mce_poll_count);
>
> @@ -628,6 +680,7 @@ void machine_check_poll(enum mcp_flags flags,
> mce_banks_t *b) mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); }
>
> +out: /* * Don't clear MCG_STATUS here because it's only defined
> for * exceptions. @@ -1199,6 +1252,20 @@ static void
> mce_process_work(struct work_struct *dummy) memory_failure(pfn,
> MCE_VECTOR, 0); }
>
> +static void cmc_disable_keventd(struct work_struct *dummy) +{ +
> struct timer_list *t = __this_cpu_ptr(&mce_timer); + +
> on_each_cpu(mce_disable_cmci, NULL, 0); + mod_timer(t, jiffies +
> CMC_POLL_INTERVAL * HZ); +} + +static void
> cmc_enable_keventd(struct work_struct *dummy) +{ + /* don't
> re-initiate timer */ + on_each_cpu(mce_enable_ce, NULL, 0); +} +
> #ifdef CONFIG_X86_MCE_INTEL /*** * mce_log_therm_throt_event - Logs
> the thermal throttling event to mcelog @@ -1232,12 +1299,12 @@ void
> mce_log_therm_throt_event(__u64 status) static int check_interval =
> 5 * 60; /* 5 minutes */
>
> static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
> -static DEFINE_PER_CPU(struct timer_list, mce_timer);
>
> static void mce_start_timer(unsigned long data) { struct timer_list
> *t = &per_cpu(mce_timer, data); int *n; + unsigned long flags;
>
> WARN_ON(smp_processor_id() != data);
>
> @@ -1253,8 +1320,19 @@ static void mce_start_timer(unsigned long
> data) n = &__get_cpu_var(mce_next_interval); if (mce_notify_irq())
> *n = max(*n/2, HZ/100); - else + else { *n = min(*n*2,
> (int)round_jiffies_relative(check_interval*HZ)); + /* if no CMC
> event, switch out of polling mode */ +
> spin_lock_irqsave(&cmc_poll_lock, flags); + if
> (cmci_storm_detected == 1) { + printk(KERN_WARNING "Returning to
> interrupt driven "\ + "CMC handler\n"); + if
> (!work_pending(&cmc_enable_work)) +
> schedule_work(&cmc_enable_work); + cmci_storm_detected = 0; + }
> + spin_unlock_irqrestore(&cmc_poll_lock, flags); + }
>
> t->expires = jiffies + *n; add_timer_on(t, smp_processor_id()); @@
> -1547,6 +1625,7 @@ void __cpuinit mcheck_cpu_init(struct
> cpuinfo_x86 *c) __mcheck_cpu_init_generic();
> __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_timer(); +
> cmci_storm_detected = 0; INIT_WORK(&__get_cpu_var(mce_work),
> mce_process_work); init_irq_work(&__get_cpu_var(mce_irq_work),
> &mce_irq_work_cb); }
Oops, I send it to wrong LKML address. I will send this patch again.
next prev parent reply other threads:[~2012-05-23 2:30 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-05-23 2:30 [PATCH] x86: auto poll/interrupt mode switch for CMC to stop CMC storm Chen Gong
2012-05-23 2:30 ` Chen Gong [this message]
-- strict thread matches above, loose matches on Subject: below --
2012-05-23 2:32 Chen Gong
2012-05-23 10:09 ` Thomas Gleixner
2012-05-23 17:01 ` Luck, Tony
2012-05-23 18:58 ` Thomas Gleixner
2012-05-23 20:53 ` Luck, Tony
2012-05-24 2:23 ` Chen Gong
2012-05-24 6:00 ` Borislav Petkov
2012-05-24 9:54 ` Chen Gong
2012-05-24 10:02 ` Thomas Gleixner
2012-05-24 10:01 ` Thomas Gleixner
2012-05-24 10:48 ` Borislav Petkov
2012-05-24 17:34 ` Borislav Petkov
2012-05-24 10:12 ` Thomas Gleixner
2012-05-24 16:27 ` Luck, Tony
2012-05-24 18:18 ` Thomas Gleixner
2012-05-23 10:11 ` Borislav Petkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4FBC4BD6.3080103@linux.intel.com \
--to=gong.chen@linux.intel.com \
--cc=bp@amd64.org \
--cc=linux-next@vger.kernel.org \
--cc=tony.luck@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.