From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
To: Huang Ying <ying.huang@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>, "H. Peter Anvin" <hpa@zytor.com>,
Andi Kleen <ak@linux.intel.com>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: Re: [BUGFIX -v7] x86, MCE: Fix bugs and issues of MCE log ring buffer
Date: Mon, 05 Oct 2009 16:07:15 +0900 [thread overview]
Message-ID: <4AC99B23.9090701@jp.fujitsu.com> (raw)
In-Reply-To: <4AC990E1.7030708@jp.fujitsu.com>
This is the diff between Huang's original change and the result of changes
by my patch set.
I'll going to explain what I changed from the original.
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 2d5c42a..4b5ef3c 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -52,7 +52,7 @@
> #define MCE_INJ_NMI_BROADCAST (1 << 2) /* do NMI broadcasting */
> #define MCE_INJ_EXCEPTION (1 << 3) /* raise as exception */
>
> -/* Fields are zero when not available */
> +/* MCE log entry. Fields are zero when not available. */
> struct mce {
> __u64 status;
> __u64 misc;
> @@ -63,12 +63,12 @@ struct mce {
> __u64 time; /* wall time_t when error was detected */
> __u8 cpuvendor; /* cpu vendor as encoded in system.h */
> __u8 inject_flags; /* software inject flags */
> - __u16 pad;
> + __u16 pad;
> __u32 cpuid; /* CPUID 1 EAX */
> - __u8 cs; /* code segment */
> + __u8 cs; /* code segment */
> __u8 bank; /* machine check bank */
> __u8 cpu; /* cpu number; obsolete; use extcpu now */
> - __u8 finished; /* entry is valid */
> + __u8 finished; /* 1 if write to entry is finished & entry is valid */
> __u32 extcpu; /* linux cpu number that detected the error */
> __u32 socketid; /* CPU socket ID */
> __u32 apicid; /* CPU initial apic ID */
> @@ -76,10 +76,9 @@ struct mce {
> };
>
> /*
> - * This structure contains all data related to the MCE log. Also
> - * carries a signature to make it easier to find from external
> - * debugging tools. Each entry is only valid when its finished flag
> - * is set.
> + * This structure contains all data related to the MCE log. Also carries
> + * a signature to make it easier to find from external debugging tools.
> + * Each entry is only valid when its finished flag is set.
> */
Small clean up.
>
> #define MCE_LOG_LEN 32
> @@ -93,12 +92,18 @@ static inline int mce_log_index(int n)
> struct mce_log_cpu;
>
> struct mce_log {
> - char signature[12]; /* "MACHINECHEC2" */
> - unsigned len; /* = MCE_LOG_LEN */
> - unsigned flags;
> - unsigned recordlen; /* length of struct mce */
> - unsigned nr_mcelog_cpus;
> + char signature[12]; /* "MACHINECHEC2" */
> +
> + /* points the table of per-CPU buffers */
> struct mce_log_cpu **mcelog_cpus;
> + unsigned int nr_mcelog_cpus; /* = num_possible_cpus() */
> +
> + /* spec of per-CPU buffer */
> + unsigned int header_len; /* offset of array "entry" */
> + unsigned int nr_record; /* array size (= MCE_LOG_LEN) */
> + unsigned int record_len; /* length of struct mce */
> +
> + unsigned flags;
> };
Add a member header_len, and renamed "len" to "nr_record" which is easier
to understand. Please refer the description of patch 6/10.
>
> #define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index 5db3f5b..fad3daa 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -122,54 +122,53 @@ EXPORT_PER_CPU_SYMBOL_GPL(mce_fake_banks);
> * separate MCEs from kernel messages to avoid bogus bug reports.
> */
>
> -static struct mce_log mcelog = {
> - .signature = MCE_LOG_SIGNATURE,
> - .len = MCE_LOG_LEN,
> - .recordlen = sizeof(struct mce),
> -};
> -
> struct mce_log_cpu {
> int head;
> int tail;
> - unsigned long flags;
> struct mce entry[MCE_LOG_LEN];
> };
Removed "flags" from per-CPU structure since mce_ioctl only cares global flags
in struct mce_log.
>
> DEFINE_PER_CPU(struct mce_log_cpu, mce_log_cpus);
>
> +static struct mce_log mcelog = {
> + .signature = MCE_LOG_SIGNATURE,
> + .header_len = offsetof(struct mce_log_cpu, entry),
> + .nr_record = MCE_LOG_LEN,
> + .record_len = sizeof(struct mce),
> +};
> +
> void mce_log(struct mce *mce)
> {
> struct mce_log_cpu *mcelog_cpu = &__get_cpu_var(mce_log_cpus);
> int head, ihead, tail, next;
>
> + /* mce->finished must be set to 0 before written to buffer */
> mce->finished = 0;
> - /*
> - * mce->finished must be set to 0 before written to ring
> - * buffer
> - */
> smp_wmb();
> +
> do {
> head = mcelog_cpu->head;
> tail = mcelog_cpu->tail;
> ihead = mce_log_index(head);
> +
> /*
> * When the buffer fills up discard new entries.
> * Assume that the earlier errors are the more
> * interesting.
> */
> if (ihead == mce_log_index(tail) && head != tail) {
> - set_bit(MCE_OVERFLOW, &mcelog_cpu->flags);
> + set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
Use global flags which is cared by mce_ioctl.
> return;
> }
> next = head == MCE_LOG_LIMIT ? 0 : head + 1;
> } while (cmpxchg_local(&mcelog_cpu->head, head, next) != head);
> +
> memcpy(mcelog_cpu->entry + ihead, mce, sizeof(struct mce));
> - /*
> - * ".finished" of MCE record in ring buffer must be set after
> - * copy
> - */
> +
> + /* ".finished" of MCE record in buffer must be set after copy */
> smp_wmb();
> mcelog_cpu->entry[ihead].finished = 1;
> +
> /* bit 0 of notify_user should be set after finished be set */
> smp_wmb();
> mce->finished = 1;
Changes from here to ....
> @@ -1195,19 +1194,40 @@ int mce_notify_irq(void)
> }
> EXPORT_SYMBOL_GPL(mce_notify_irq);
>
> -static int mce_banks_init(void)
> +static __cpuinit int mce_banks_init(void)
> {
> int i;
>
> mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
> if (!mce_banks)
> return -ENOMEM;
> +
> for (i = 0; i < banks; i++) {
> struct mce_bank *b = &mce_banks[i];
>
> b->ctl = -1ULL;
> b->init = 1;
> }
> +
> + return 0;
> +}
> +
> +/*
> + * Initialize MCE per-CPU log buffer
> + */
> +static __cpuinit int mce_log_init(void)
> +{
> + int cpu;
> +
> + mcelog.nr_mcelog_cpus = num_possible_cpus();
> + mcelog.mcelog_cpus = kzalloc(sizeof(void *) * num_possible_cpus(),
> + GFP_KERNEL);
> + if (!mcelog.mcelog_cpus)
> + return -ENOMEM;
> +
> + for_each_possible_cpu(cpu)
> + mcelog.mcelog_cpus[cpu] = &per_cpu(mce_log_cpus, cpu);
> +
> return 0;
> }
>
> @@ -1234,6 +1254,7 @@ static int __cpuinit mce_cap_init(void)
> /* Don't support asymmetric configurations today */
> WARN_ON(banks != 0 && b != banks);
> banks = b;
> +
> if (!mce_banks) {
> int err = mce_banks_init();
>
> @@ -1241,6 +1262,13 @@ static int __cpuinit mce_cap_init(void)
> return err;
> }
>
> + if (!mcelog.mcelog_cpus) {
> + int err = mce_log_init();
> +
> + if (err)
> + return err;
> + }
> +
> /* Use accurate RIP reporting if available. */
> if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
> rip_msr = MSR_IA32_MCG_EIP;
> @@ -1251,22 +1279,6 @@ static int __cpuinit mce_cap_init(void)
> return 0;
> }
>
> -/*
> - * Initialize MCE per-CPU log buffer
> - */
> -static __cpuinit void mce_log_init(void)
> -{
> - int cpu;
> -
> - if (mcelog.mcelog_cpus)
> - return;
> - mcelog.nr_mcelog_cpus = num_possible_cpus();
> - mcelog.mcelog_cpus = kzalloc(sizeof(void *) * num_possible_cpus(),
> - GFP_KERNEL);
> - for_each_possible_cpu(cpu)
> - mcelog.mcelog_cpus[cpu] = &per_cpu(mce_log_cpus, cpu);
> -}
> -
> static void mce_init(void)
> {
> mce_banks_t all_banks;
> @@ -1437,7 +1449,6 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
> mce_disabled = 1;
> return;
> }
> - mce_log_init();
>
> machine_check_vector = do_machine_check;
>
... here are what done in patch 10/10.
> @@ -1486,15 +1497,14 @@ static int mce_release(struct inode *inode, struct file *file)
> return 0;
> }
>
> -static ssize_t mce_read_cpu(struct mce_log_cpu *mcelog_cpu,
> - char __user *inubuf, size_t usize)
> +static ssize_t mce_read_cpu(int cpu, char __user *inubuf, size_t usize)
> {
> + struct mce_log_cpu *mcelog_cpu = &per_cpu(mce_log_cpus, cpu);
Changed 1st arg to cpu number.
> char __user *ubuf = inubuf;
> int head, tail, pos, i, err = 0;
>
> head = mcelog_cpu->head;
> tail = mcelog_cpu->tail;
> -
> if (head == tail)
> return 0;
>
> @@ -1503,13 +1513,14 @@ static ssize_t mce_read_cpu(struct mce_log_cpu *mcelog_cpu,
> i = mce_log_index(pos);
> if (!mcelog_cpu->entry[i].finished) {
> int timeout = WRITER_TIMEOUT_NS;
> +
> while (!mcelog_cpu->entry[i].finished) {
> if (timeout-- <= 0) {
> memset(mcelog_cpu->entry + i, 0,
> sizeof(struct mce));
> head = mcelog_cpu->head;
> printk(KERN_WARNING "mcelog: timeout "
> - "waiting for writer to finish!\n");
> + "waiting for writer to finish!\n");
> goto timeout;
> }
> ndelay(1);
> @@ -1538,11 +1549,6 @@ timeout:
> return err ? -EFAULT : ubuf - inubuf;
> }
>
> -static int mce_empty_cpu(struct mce_log_cpu *mcelog_cpu)
> -{
> - return mcelog_cpu->head == mcelog_cpu->tail;
> -}
> -
> static int mce_empty(void)
> {
> int cpu;
> @@ -1550,33 +1556,34 @@ static int mce_empty(void)
>
> for_each_possible_cpu(cpu) {
> mcelog_cpu = &per_cpu(mce_log_cpus, cpu);
> - if (!mce_empty_cpu(mcelog_cpu))
> + if (mcelog_cpu->head != mcelog_cpu->tail)
Inlined. Or it would be better to have static inlines in header file.
> return 0;
> }
> return 1;
> }
>
> +static DEFINE_MUTEX(mce_read_mutex);
> +
> static ssize_t mce_read(struct file *filp, char __user *inubuf, size_t usize,
> loff_t *off)
> {
> char __user *ubuf = inubuf;
> - struct mce_log_cpu *mcelog_cpu;
> - int cpu, new_mce, err = 0;
> - static DEFINE_MUTEX(mce_read_mutex);
> + int cpu, err = 0;
> +
> + /* Only supports full reads right now */
> + if (*off != 0 || usize < sizeof(struct mce))
> + return -EINVAL;
Picked up what implicitly dropped.
>
> mutex_lock(&mce_read_mutex);
> - do {
> - new_mce = 0;
> +
> + while (!mce_empty()) {
> for_each_possible_cpu(cpu) {
> if (usize < sizeof(struct mce))
> goto out;
> - mcelog_cpu = &per_cpu(mce_log_cpus, cpu);
> - err = mce_read_cpu(mcelog_cpu, ubuf,
> - sizeof(struct mce));
> + err = mce_read_cpu(cpu, ubuf, sizeof(struct mce));
> if (err > 0) {
> ubuf += sizeof(struct mce);
> usize -= sizeof(struct mce);
> - new_mce = 1;
> err = 0;
> } else if (err < 0)
> goto out;
> @@ -1586,9 +1593,10 @@ static ssize_t mce_read(struct file *filp, char __user *inubuf, size_t usize,
> cond_resched();
> mutex_lock(&mce_read_mutex);
> }
> - } while (new_mce || !mce_empty());
> + }
I could not catch the intent of "new_mce," so replaced do-while by
simple while statement.
> out:
> mutex_unlock(&mce_read_mutex);
> +
> return err ? err : ubuf - inubuf;
> }
>
That's all.
Thanks,
H.Seto
next prev parent reply other threads:[~2009-10-05 7:08 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-09-18 10:20 [BUGFIX -v7] x86, MCE: Fix bugs and issues of MCE log ring buffer Huang Ying
2009-09-18 11:09 ` Ingo Molnar
2009-09-21 5:37 ` Huang Ying
2009-09-22 13:39 ` [PATCH] x86: mce: New MCE logging design Ingo Molnar
2009-10-05 6:23 ` [BUGFIX -v7] x86, MCE: Fix bugs and issues of MCE log ring buffer Hidetoshi Seto
2009-10-05 6:33 ` [PATCH 01/10] x86, mce: remove tsc handling from mce_read Hidetoshi Seto
2009-10-05 6:34 ` [PATCH 02/10] x86, mce: mce_read can check args without mutex Hidetoshi Seto
2009-10-05 6:35 ` [PATCH 03/10] x86, mce: change writer timeout in mce_read Hidetoshi Seto
2009-10-05 6:36 ` [PATCH 04/10] x86, mce: use do-while in mce_log Hidetoshi Seto
2009-10-05 6:37 ` [PATCH 05/10] x86, mce: make mce_log buffer to per-CPU, prep Hidetoshi Seto
2009-10-05 6:38 ` [PATCH 06/10] x86, mce: make mce_log buffer to per-CPU Hidetoshi Seto
2009-10-05 7:06 ` Andi Kleen
2009-10-05 7:50 ` Hidetoshi Seto
2009-10-09 1:45 ` Huang Ying
2009-10-09 5:34 ` Hidetoshi Seto
2009-10-05 6:40 ` [PATCH 07/10] x86, mce: remove for-loop in mce_log Hidetoshi Seto
2009-10-05 6:41 ` [PATCH 08/10] x86, mce: change barriers " Hidetoshi Seto
2009-10-05 6:42 ` [PATCH 09/10] x86, mce: make mce_log buffer to ring buffer Hidetoshi Seto
2009-10-05 6:44 ` [PATCH 10/10] x86, mce: move mce_log_init() into mce_cap_init() Hidetoshi Seto
2009-10-05 7:07 ` Hidetoshi Seto [this message]
2009-10-05 8:51 ` [BUGFIX -v7] x86, MCE: Fix bugs and issues of MCE log ring buffer Frédéric Weisbecker
2009-10-05 15:16 ` Andi Kleen
2009-10-06 5:46 ` Hidetoshi Seto
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4AC99B23.9090701@jp.fujitsu.com \
--to=seto.hidetoshi@jp.fujitsu.com \
--cc=ak@linux.intel.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=ying.huang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.