linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
To: Huang Ying <ying.huang@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>, "H. Peter Anvin" <hpa@zytor.com>,
	Andi Kleen <ak@linux.intel.com>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: Re: [BUGFIX -v7] x86, MCE: Fix bugs and issues of MCE log ring buffer
Date: Mon, 05 Oct 2009 16:07:15 +0900	[thread overview]
Message-ID: <4AC99B23.9090701@jp.fujitsu.com> (raw)
In-Reply-To: <4AC990E1.7030708@jp.fujitsu.com>

This is the diff between Huang's original change and the result of changes
by my patch set.

I'll going to explain what I changed from the original.

> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 2d5c42a..4b5ef3c 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -52,7 +52,7 @@
>  #define MCE_INJ_NMI_BROADCAST	(1 << 2)	/* do NMI broadcasting */
>  #define MCE_INJ_EXCEPTION	(1 << 3)	/* raise as exception */
>  
> -/* Fields are zero when not available */
> +/* MCE log entry. Fields are zero when not available. */
>  struct mce {
>  	__u64 status;
>  	__u64 misc;
> @@ -63,12 +63,12 @@ struct mce {
>  	__u64 time;	/* wall time_t when error was detected */
>  	__u8  cpuvendor;	/* cpu vendor as encoded in system.h */
>  	__u8  inject_flags;	/* software inject flags */
> -	__u16  pad;
> +	__u16 pad;
>  	__u32 cpuid;	/* CPUID 1 EAX */
> -	__u8  cs;		/* code segment */
> +	__u8  cs;	/* code segment */
>  	__u8  bank;	/* machine check bank */
>  	__u8  cpu;	/* cpu number; obsolete; use extcpu now */
> -	__u8  finished;   /* entry is valid */
> +	__u8  finished;	/* 1 if write to entry is finished & entry is valid */
>  	__u32 extcpu;	/* linux cpu number that detected the error */
>  	__u32 socketid;	/* CPU socket ID */
>  	__u32 apicid;	/* CPU initial apic ID */
> @@ -76,10 +76,9 @@ struct mce {
>  };
>  
>  /*
> - * This structure contains all data related to the MCE log.  Also
> - * carries a signature to make it easier to find from external
> - * debugging tools.  Each entry is only valid when its finished flag
> - * is set.
> + * This structure contains all data related to the MCE log.  Also carries
> + * a signature to make it easier to find from external debugging tools.
> + * Each entry is only valid when its finished flag is set.
>   */

Small clean up.

>  
>  #define MCE_LOG_LEN		32
> @@ -93,12 +92,18 @@ static inline int mce_log_index(int n)
>  struct mce_log_cpu;
>  
>  struct mce_log {
> -	char signature[12]; /* "MACHINECHEC2" */
> -	unsigned len;	    /* = MCE_LOG_LEN */
> -	unsigned flags;
> -	unsigned recordlen;	/* length of struct mce */
> -	unsigned nr_mcelog_cpus;
> +	char signature[12];		/* "MACHINECHEC2" */
> +
> +	/* points the table of per-CPU buffers */
>  	struct mce_log_cpu **mcelog_cpus;
> +	unsigned int nr_mcelog_cpus;	/* = num_possible_cpus() */
> +
> +	/* spec of per-CPU buffer */
> +	unsigned int header_len; 	/* offset of array "entry" */
> +	unsigned int nr_record;		/* array size (= MCE_LOG_LEN) */
> +	unsigned int record_len;	/* length of struct mce */
> +
> +	unsigned flags;
>  };

Add a member header_len, and renamed "len" to "nr_record" which is easier
to understand.  Please refer the description of patch 6/10.

>  
>  #define MCE_OVERFLOW 0		/* bit 0 in flags means overflow */
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index 5db3f5b..fad3daa 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -122,54 +122,53 @@ EXPORT_PER_CPU_SYMBOL_GPL(mce_fake_banks);
>   * separate MCEs from kernel messages to avoid bogus bug reports.
>   */
>  
> -static struct mce_log mcelog = {
> -	.signature	= MCE_LOG_SIGNATURE,
> -	.len		= MCE_LOG_LEN,
> -	.recordlen	= sizeof(struct mce),
> -};
> -
>  struct mce_log_cpu {
>  	int head;
>  	int tail;
> -	unsigned long flags;
>  	struct mce entry[MCE_LOG_LEN];
>  };

Removed "flags" from per-CPU structure since mce_ioctl only cares global flags
in struct mce_log.

>  
>  DEFINE_PER_CPU(struct mce_log_cpu, mce_log_cpus);
>  
> +static struct mce_log mcelog = {
> +	.signature	= MCE_LOG_SIGNATURE,
> +	.header_len	= offsetof(struct mce_log_cpu, entry),
> +	.nr_record	= MCE_LOG_LEN,
> +	.record_len	= sizeof(struct mce),
> +};
> +
>  void mce_log(struct mce *mce)
>  {
>  	struct mce_log_cpu *mcelog_cpu = &__get_cpu_var(mce_log_cpus);
>  	int head, ihead, tail, next;
>  
> +	/* mce->finished must be set to 0 before written to buffer */
>  	mce->finished = 0;
> -	/*
> -	 * mce->finished must be set to 0 before written to ring
> -	 * buffer
> -	 */
>  	smp_wmb();
> +
>  	do {
>  		head = mcelog_cpu->head;
>  		tail = mcelog_cpu->tail;
>  		ihead = mce_log_index(head);
> +
>  		/*
>  		 * When the buffer fills up discard new entries.
>  		 * Assume that the earlier errors are the more
>  		 * interesting.
>  		 */
>  		if (ihead == mce_log_index(tail) && head != tail) {
> -			set_bit(MCE_OVERFLOW, &mcelog_cpu->flags);
> +			set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);

Use global flags which is cared by mce_ioctl.

>  			return;
>  		}
>  		next = head == MCE_LOG_LIMIT ? 0 : head + 1;
>  	} while (cmpxchg_local(&mcelog_cpu->head, head, next) != head);
> +
>  	memcpy(mcelog_cpu->entry + ihead, mce, sizeof(struct mce));
> -	/*
> -	 * ".finished" of MCE record in ring buffer must be set after
> -	 * copy
> -	 */
> +
> +	/* ".finished" of MCE record in buffer must be set after copy */
>  	smp_wmb();
>  	mcelog_cpu->entry[ihead].finished = 1;
> +
>  	/* bit 0 of notify_user should be set after finished be set */
>  	smp_wmb();
>  	mce->finished = 1;


Changes from here to ....

> @@ -1195,19 +1194,40 @@ int mce_notify_irq(void)
>  }
>  EXPORT_SYMBOL_GPL(mce_notify_irq);
>  
> -static int mce_banks_init(void)
> +static __cpuinit int mce_banks_init(void)
>  {
>  	int i;
>  
>  	mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
>  	if (!mce_banks)
>  		return -ENOMEM;
> +
>  	for (i = 0; i < banks; i++) {
>  		struct mce_bank *b = &mce_banks[i];
>  
>  		b->ctl = -1ULL;
>  		b->init = 1;
>  	}
> +
> +	return 0;
> +}
> +
> +/*
> + * Initialize MCE per-CPU log buffer
> + */
> +static __cpuinit int mce_log_init(void)
> +{
> +	int cpu;
> +
> +	mcelog.nr_mcelog_cpus = num_possible_cpus();
> +	mcelog.mcelog_cpus = kzalloc(sizeof(void *) * num_possible_cpus(),
> +				     GFP_KERNEL);
> +	if (!mcelog.mcelog_cpus)
> +		return -ENOMEM;
> +
> +	for_each_possible_cpu(cpu)
> +		mcelog.mcelog_cpus[cpu] = &per_cpu(mce_log_cpus, cpu);
> +
>  	return 0;
>  }
>  
> @@ -1234,6 +1254,7 @@ static int __cpuinit mce_cap_init(void)
>  	/* Don't support asymmetric configurations today */
>  	WARN_ON(banks != 0 && b != banks);
>  	banks = b;
> +
>  	if (!mce_banks) {
>  		int err = mce_banks_init();
>  
> @@ -1241,6 +1262,13 @@ static int __cpuinit mce_cap_init(void)
>  			return err;
>  	}
>  
> +	if (!mcelog.mcelog_cpus) {
> +		int err = mce_log_init();
> +
> +		if (err)
> +			return err;
> +	}
> +
>  	/* Use accurate RIP reporting if available. */
>  	if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
>  		rip_msr = MSR_IA32_MCG_EIP;
> @@ -1251,22 +1279,6 @@ static int __cpuinit mce_cap_init(void)
>  	return 0;
>  }
>  
> -/*
> - * Initialize MCE per-CPU log buffer
> - */
> -static __cpuinit void mce_log_init(void)
> -{
> -	int cpu;
> -
> -	if (mcelog.mcelog_cpus)
> -		return;
> -	mcelog.nr_mcelog_cpus = num_possible_cpus();
> -	mcelog.mcelog_cpus = kzalloc(sizeof(void *) * num_possible_cpus(),
> -				     GFP_KERNEL);
> -	for_each_possible_cpu(cpu)
> -		mcelog.mcelog_cpus[cpu] = &per_cpu(mce_log_cpus, cpu);
> -}
> -
>  static void mce_init(void)
>  {
>  	mce_banks_t all_banks;
> @@ -1437,7 +1449,6 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
>  		mce_disabled = 1;
>  		return;
>  	}
> -	mce_log_init();
>  
>  	machine_check_vector = do_machine_check;
>  

... here are what done in patch 10/10.

> @@ -1486,15 +1497,14 @@ static int mce_release(struct inode *inode, struct file *file)
>  	return 0;
>  }
>  
> -static ssize_t mce_read_cpu(struct mce_log_cpu *mcelog_cpu,
> -			    char __user *inubuf, size_t usize)
> +static ssize_t mce_read_cpu(int cpu, char __user *inubuf, size_t usize)
>  {
> +	struct mce_log_cpu *mcelog_cpu = &per_cpu(mce_log_cpus, cpu);

Changed 1st arg to cpu number.

>  	char __user *ubuf = inubuf;
>  	int head, tail, pos, i, err = 0;
>  
>  	head = mcelog_cpu->head;
>  	tail = mcelog_cpu->tail;
> -
>  	if (head == tail)
>  		return 0;
>  
> @@ -1503,13 +1513,14 @@ static ssize_t mce_read_cpu(struct mce_log_cpu *mcelog_cpu,
>  		i = mce_log_index(pos);
>  		if (!mcelog_cpu->entry[i].finished) {
>  			int timeout = WRITER_TIMEOUT_NS;
> +
>  			while (!mcelog_cpu->entry[i].finished) {
>  				if (timeout-- <= 0) {
>  					memset(mcelog_cpu->entry + i, 0,
>  					       sizeof(struct mce));
>  					head = mcelog_cpu->head;
>  					printk(KERN_WARNING "mcelog: timeout "
> -					       "waiting for writer to finish!\n");
> +					     "waiting for writer to finish!\n");
>  					goto timeout;
>  				}
>  				ndelay(1);
> @@ -1538,11 +1549,6 @@ timeout:
>  	return err ? -EFAULT : ubuf - inubuf;
>  }
>  
> -static int mce_empty_cpu(struct mce_log_cpu *mcelog_cpu)
> -{
> -	return mcelog_cpu->head == mcelog_cpu->tail;
> -}
> -
>  static int mce_empty(void)
>  {
>  	int cpu;
> @@ -1550,33 +1556,34 @@ static int mce_empty(void)
>  
>  	for_each_possible_cpu(cpu) {
>  		mcelog_cpu = &per_cpu(mce_log_cpus, cpu);
> -		if (!mce_empty_cpu(mcelog_cpu))
> +		if (mcelog_cpu->head != mcelog_cpu->tail)

Inlined.  Or it would be better to have static inlines in header file.

>  			return 0;
>  	}
>  	return 1;
>  }
>  
> +static DEFINE_MUTEX(mce_read_mutex);
> +
>  static ssize_t mce_read(struct file *filp, char __user *inubuf, size_t usize,
>  			loff_t *off)
>  {
>  	char __user *ubuf = inubuf;
> -	struct mce_log_cpu *mcelog_cpu;
> -	int cpu, new_mce, err = 0;
> -	static DEFINE_MUTEX(mce_read_mutex);
> +	int cpu, err = 0;
> +
> +	/* Only supports full reads right now */
> +	if (*off != 0 || usize < sizeof(struct mce))
> +		return -EINVAL;

Picked up what implicitly dropped.

>  
>  	mutex_lock(&mce_read_mutex);
> -	do {
> -		new_mce = 0;
> +
> +	while (!mce_empty()) {
>  		for_each_possible_cpu(cpu) {
>  			if (usize < sizeof(struct mce))
>  				goto out;
> -			mcelog_cpu = &per_cpu(mce_log_cpus, cpu);
> -			err = mce_read_cpu(mcelog_cpu, ubuf,
> -					   sizeof(struct mce));
> +			err = mce_read_cpu(cpu, ubuf, sizeof(struct mce));
>  			if (err > 0) {
>  				ubuf += sizeof(struct mce);
>  				usize -= sizeof(struct mce);
> -				new_mce = 1;
>  				err = 0;
>  			} else if (err < 0)
>  				goto out;
> @@ -1586,9 +1593,10 @@ static ssize_t mce_read(struct file *filp, char __user *inubuf, size_t usize,
>  			cond_resched();
>  			mutex_lock(&mce_read_mutex);
>  		}
> -	} while (new_mce || !mce_empty());
> +	}

I could not catch the intent of "new_mce," so replaced do-while by
simple while statement.

>  out:
>  	mutex_unlock(&mce_read_mutex);
> +
>  	return err ? err : ubuf - inubuf;
>  }
>  

That's all.

Thanks,
H.Seto


  parent reply	other threads:[~2009-10-05  7:08 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-18 10:20 [BUGFIX -v7] x86, MCE: Fix bugs and issues of MCE log ring buffer Huang Ying
2009-09-18 11:09 ` Ingo Molnar
2009-09-21  5:37   ` Huang Ying
2009-09-22 13:39     ` [PATCH] x86: mce: New MCE logging design Ingo Molnar
2009-10-05  6:23 ` [BUGFIX -v7] x86, MCE: Fix bugs and issues of MCE log ring buffer Hidetoshi Seto
2009-10-05  6:33   ` [PATCH 01/10] x86, mce: remove tsc handling from mce_read Hidetoshi Seto
2009-10-05  6:34   ` [PATCH 02/10] x86, mce: mce_read can check args without mutex Hidetoshi Seto
2009-10-05  6:35   ` [PATCH 03/10] x86, mce: change writer timeout in mce_read Hidetoshi Seto
2009-10-05  6:36   ` [PATCH 04/10] x86, mce: use do-while in mce_log Hidetoshi Seto
2009-10-05  6:37   ` [PATCH 05/10] x86, mce: make mce_log buffer to per-CPU, prep Hidetoshi Seto
2009-10-05  6:38   ` [PATCH 06/10] x86, mce: make mce_log buffer to per-CPU Hidetoshi Seto
2009-10-05  7:06     ` Andi Kleen
2009-10-05  7:50       ` Hidetoshi Seto
2009-10-09  1:45         ` Huang Ying
2009-10-09  5:34           ` Hidetoshi Seto
2009-10-05  6:40   ` [PATCH 07/10] x86, mce: remove for-loop in mce_log Hidetoshi Seto
2009-10-05  6:41   ` [PATCH 08/10] x86, mce: change barriers " Hidetoshi Seto
2009-10-05  6:42   ` [PATCH 09/10] x86, mce: make mce_log buffer to ring buffer Hidetoshi Seto
2009-10-05  6:44   ` [PATCH 10/10] x86, mce: move mce_log_init() into mce_cap_init() Hidetoshi Seto
2009-10-05  7:07   ` Hidetoshi Seto [this message]
2009-10-05  8:51   ` [BUGFIX -v7] x86, MCE: Fix bugs and issues of MCE log ring buffer Frédéric Weisbecker
2009-10-05 15:16     ` Andi Kleen
2009-10-06  5:46     ` Hidetoshi Seto

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4AC99B23.9090701@jp.fujitsu.com \
    --to=seto.hidetoshi@jp.fujitsu.com \
    --cc=ak@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).