public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86_64: early memtest to find bad ram
@ 2008-03-21  6:58 Yinghai Lu
  2008-03-21 12:03 ` Ingo Molnar
                   ` (2 more replies)
  0 siblings, 3 replies; 20+ messages in thread
From: Yinghai Lu @ 2008-03-21  6:58 UTC (permalink / raw)
  To: Andrew Morton, Ingo Molnar, H. Peter Anvin; +Cc: kernel list


do simple memtest after init_memory_mapping

use find_e820_area_size to find all ram range that is not reserved.

and do some simple bits test to find some bad ram.

if find some bad ram, use reserve_early to exclude that range.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -119,6 +119,40 @@ again:
 	return changed;
 }
 
+/* Check for already reserved areas */
+static inline int
+bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
+{
+	int i;
+	unsigned long addr = *addrp, last;
+	unsigned long size = *sizep;
+	int changed = 0;
+again:
+	last = addr + size;
+	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+		struct early_res *r = &early_res[i];
+		if (last > r->start && addr < r->start) {
+			size = r->start - addr;
+			changed = 1;
+			goto again;
+		}
+		if (last > r->end && addr < r->end) {
+			addr = round_up(r->end, align);
+			size = last - addr;
+			changed = 1;
+			goto again;
+		}
+		if (last <= r->end && addr >= r->start) {
+			(*sizep)++;
+			return 0;
+		}
+	}
+	if (changed) {
+		*addrp = addr;
+		*sizep = size;
+	}
+	return changed;
+}
 /*
  * This function checks if any part of the range <start,end> is mapped
  * with type.
@@ -195,7 +229,7 @@ unsigned long __init find_e820_area(unsi
 		ei_last = ei->addr + ei->size;
 		if (addr < start)
 			addr = round_up(start, align);
-		if (addr > ei_last)
+		if (addr >= ei_last)
 			continue;
 		while (bad_addr(&addr, size, align) && addr+size <= ei_last)
 			;
@@ -210,6 +244,40 @@ unsigned long __init find_e820_area(unsi
 }
 
 /*
+ * Find next free range after *start
+ */
+unsigned long __init find_e820_area_size(unsigned long start, unsigned long *sizep, unsigned long align)
+{
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		unsigned long addr, last;
+		unsigned long ei_last;
+
+		if (ei->type != E820_RAM)
+			continue;
+		addr = round_up(ei->addr, align);
+		ei_last = ei->addr + ei->size;
+//		printk(KERN_DEBUG "find_e820_area_size : e820 %d [%llx, %lx]\n", i, ei->addr, ei_last);
+		if (addr < start)
+			addr = round_up(start, align);
+//		printk(KERN_DEBUG "find_e820_area_size : 0 [%lx, %lx]\n", addr, ei_last);
+		if (addr >= ei_last)
+			continue;
+		*sizep = ei_last - addr;
+		while (bad_addr_size(&addr, sizep, align) && addr+ *sizep <= ei_last)
+			;
+		last = addr + *sizep;
+//		printk(KERN_DEBUG "find_e820_area_size : 1 [%lx, %lx]\n", addr, last);
+		if (last > ei_last)
+			continue;
+		return addr;
+	}
+	return -1UL;
+
+}
+/*
  * Find the highest page frame number we have available
  */
 unsigned long __init e820_end_of_ram(void)
Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -427,6 +427,106 @@ static void __init init_gbpages(void)
 		direct_gbpages = 0;
 }
 
+static void __init memtest(unsigned long start_phys, unsigned long size, unsigned pattern)
+{
+	unsigned long i;
+	unsigned long *start;
+	unsigned long start_bad;
+	unsigned long last_bad;
+	unsigned long val;
+	unsigned long start_phys_aligned;
+	unsigned long count;
+	unsigned long incr;
+
+	switch (pattern) {
+	case 0:
+		val = 0UL;
+		break;
+	case 1:
+		val = -1UL;
+		break;
+	case 2:
+		val = 0x5555555555555555UL;
+		break;
+	case 3:
+		val = 0xaaaaaaaaaaaaaaaaUL;
+		break;
+	default:
+		return;
+	}
+
+	incr = sizeof(unsigned long);
+	start_phys_aligned = ALIGN(start_phys, incr);
+	count = (size - (start_phys_aligned - start_phys))/incr;
+	start = __va(start_phys_aligned);
+	start_bad = 0;
+	last_bad = 0;
+
+	for (i = 0; i < count; i++)
+		start[i] = val;
+	for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
+		if (*start != val) {
+			if (start_phys_aligned == last_bad + incr) {
+				last_bad += incr;
+			} else {
+				if (start_bad) {
+					printk(KERN_INFO "  %016lxx bad mem addr %016lx - %016lx reserved\n",
+						val, start_bad, last_bad + incr);
+					reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
+				}
+				start_bad = last_bad = start_phys_aligned;
+			}
+		}
+	}
+	if (start_bad) {
+		printk(KERN_INFO "  %016lx bad mem addr %016lx - %016lx reserved\n",
+			val, start_bad, last_bad + incr);
+		reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
+	}
+
+}
+
+static int __initdata memtest_pattern;
+static int __init parse_memtest(char *arg)
+{
+	if (arg)
+		memtest_pattern = simple_strtoul(arg, NULL, 0) + 1;
+	return 0;
+}
+
+early_param("memtest", parse_memtest);
+
+static void __init early_memtest(unsigned long start, unsigned long end)
+{
+	unsigned long t_start, t_size;
+	unsigned pattern;
+
+	if (memtest_pattern)
+		printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
+	for (pattern = 0; pattern < memtest_pattern; pattern++) {
+		t_start = start;
+		t_size = 0;
+		while (t_start < end) {
+			t_start = find_e820_area_size(t_start, &t_size, 1);
+
+			/* done ? */
+			if (t_start >= end)
+				break;
+			if (t_start + t_size > end)
+				t_size = end - t_start;
+
+			printk(KERN_CONT "\n  %016lx - %016lx pattern %d",
+				t_start, t_start + t_size, pattern);
+
+			memtest(t_start, t_size, pattern);
+
+			t_start += t_size;
+		}
+	}
+	if (memtest_pattern)
+		printk(KERN_CONT "\n");
+}
+
 /*
  * Setup the direct mapping of the physical memory at PAGE_OFFSET.
  * This runs before bootmem is initialized and gets pages directly from
@@ -435,8 +535,9 @@ static void __init init_gbpages(void)
 void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
 {
 	unsigned long next;
+	unsigned long start_phys = start, end_phys = end;
 
-	pr_debug("init_memory_mapping\n");
+	printk(KERN_INFO "init_memory_mapping\n");
 
 	/*
 	 * Find space for the kernel direct mapping tables.
@@ -479,6 +580,9 @@ void __init_refok init_memory_mapping(un
 	if (!after_bootmem)
 		reserve_early(table_start << PAGE_SHIFT,
 				 table_end << PAGE_SHIFT, "PGTABLE");
+
+	if (!after_bootmem)
+		early_memtest(start_phys, end_phys);
 }
 
 #ifndef CONFIG_NUMA
Index: linux-2.6/include/asm-x86/e820_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_64.h
+++ linux-2.6/include/asm-x86/e820_64.h
@@ -16,6 +16,9 @@
 #ifndef __ASSEMBLY__
 extern unsigned long find_e820_area(unsigned long start, unsigned long end, 
 				    unsigned long size, unsigned long align);
+extern unsigned long find_e820_area_size(unsigned long start,
+					 unsigned long *sizep,
+					 unsigned long align);
 extern void add_memory_region(unsigned long start, unsigned long size, 
 			      int type);
 extern void update_memory_range(u64 start, u64 size, unsigned old_type,

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-21  6:58 [PATCH] x86_64: early memtest to find bad ram Yinghai Lu
@ 2008-03-21 12:03 ` Ingo Molnar
  2008-03-21 13:08   ` H. Peter Anvin
                     ` (2 more replies)
  2008-03-21 22:49 ` Sami Farin
  2008-03-22 20:48 ` Arjan van de Ven
  2 siblings, 3 replies; 20+ messages in thread
From: Ingo Molnar @ 2008-03-21 12:03 UTC (permalink / raw)
  To: yhlu.kernel; +Cc: Andrew Morton, H. Peter Anvin, kernel list


* Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:

> do simple memtest after init_memory_mapping
> 
> use find_e820_area_size to find all ram range that is not reserved.
> 
> and do some simple bits test to find some bad ram.
> 
> if find some bad ram, use reserve_early to exclude that range.

very nice patch! I always thought that this was the proper way to do 
memtest - and we could in fact also do something like this after SMP 
bringup, and hit the memory bus via multiple CPUs. [that will need a 
different enumeration though than e820 maps]

one structural observation: please make this unified functionality, so 
that 32-bit kernels can make use of it too.

a small style nit from scripts/checkpatch.pl:

> +//		printk(KERN_DEBUG "find_e820_area_size : e820 %d [%llx, %lx]\n", i, ei->addr, ei_last);

remove such lines or make them pr_debug(). (checkpatch also found more 
such cases)

also, please add a CONFIG_BOOTPARAM_MEMTEST=y option so that 
distributions can enable this by default in their debug kernels.

i've applied your current version to get some testing, please send delta 
patches against x86/latest.

	Ingo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-21 12:03 ` Ingo Molnar
@ 2008-03-21 13:08   ` H. Peter Anvin
  2008-03-21 14:29     ` Ingo Molnar
  2008-03-21 19:08     ` Yinghai Lu
  2008-03-21 21:22   ` Jan Engelhardt
  2008-03-22  0:04   ` Yinghai Lu
  2 siblings, 2 replies; 20+ messages in thread
From: H. Peter Anvin @ 2008-03-21 13:08 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: yhlu.kernel, Andrew Morton, kernel list

Ingo Molnar wrote:
> 
> very nice patch! I always thought that this was the proper way to do 
> memtest - and we could in fact also do something like this after SMP 
> bringup, and hit the memory bus via multiple CPUs. [that will need a 
> different enumeration though than e820 maps]
> 
> one structural observation: please make this unified functionality, so 
> that 32-bit kernels can make use of it too.
> 

Indeed.  Of course, it would also be nice if distros shipped 
bootloader-invoked prekernel test software, like memtest86+, by default.

	-hpa

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-21 13:08   ` H. Peter Anvin
@ 2008-03-21 14:29     ` Ingo Molnar
  2008-03-22  5:45       ` Willy Tarreau
  2008-03-21 19:08     ` Yinghai Lu
  1 sibling, 1 reply; 20+ messages in thread
From: Ingo Molnar @ 2008-03-21 14:29 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: yhlu.kernel, Andrew Morton, kernel list


* H. Peter Anvin <hpa@zytor.com> wrote:

>> very nice patch! I always thought that this was the proper way to do 
>> memtest - and we could in fact also do something like this after SMP 
>> bringup, and hit the memory bus via multiple CPUs. [that will need a 
>> different enumeration though than e820 maps]
>>
>> one structural observation: please make this unified functionality, 
>> so that 32-bit kernels can make use of it too.
>>
>
> Indeed.  Of course, it would also be nice if distros shipped 
> bootloader-invoked prekernel test software, like memtest86+, by 
> default.

some do (Fedora for example), but it's still a bit quirky for users to 
invoke and it would be nice to see those results in the kernel log as 
well and flag possibly flaky systems that way. (add a taint bit, etc., 
etc.)

	Ingo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-21 13:08   ` H. Peter Anvin
  2008-03-21 14:29     ` Ingo Molnar
@ 2008-03-21 19:08     ` Yinghai Lu
  2008-03-21 19:58       ` H. Peter Anvin
  1 sibling, 1 reply; 20+ messages in thread
From: Yinghai Lu @ 2008-03-21 19:08 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: Ingo Molnar, Andrew Morton, kernel list

On Fri, Mar 21, 2008 at 6:08 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> Ingo Molnar wrote:
>  >
>  > very nice patch! I always thought that this was the proper way to do
>  > memtest - and we could in fact also do something like this after SMP
>  > bringup, and hit the memory bus via multiple CPUs. [that will need a
>  > different enumeration though than e820 maps]
>  >
>  > one structural observation: please make this unified functionality, so
>  > that 32-bit kernels can make use of it too.
>  >
>
>  Indeed.  Of course, it would also be nice if distros shipped
>  bootloader-invoked prekernel test software, like memtest86+, by default.

the current memtest86 is running in 32 bit mode, and only support 64G ram.

I tried to expand that a bit, to support 1024g, but it only works on
some machine.
could be stack provide is not big enough?

YH

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-21 19:08     ` Yinghai Lu
@ 2008-03-21 19:58       ` H. Peter Anvin
  2008-03-21 20:09         ` Yinghai Lu
  0 siblings, 1 reply; 20+ messages in thread
From: H. Peter Anvin @ 2008-03-21 19:58 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Ingo Molnar, Andrew Morton, kernel list

Yinghai Lu wrote:
> On Fri, Mar 21, 2008 at 6:08 AM, H. Peter Anvin <hpa@zytor.com> wrote:
>> Ingo Molnar wrote:
>>  >
>>  > very nice patch! I always thought that this was the proper way to do
>>  > memtest - and we could in fact also do something like this after SMP
>>  > bringup, and hit the memory bus via multiple CPUs. [that will need a
>>  > different enumeration though than e820 maps]
>>  >
>>  > one structural observation: please make this unified functionality, so
>>  > that 32-bit kernels can make use of it too.
>>  >
>>
>>  Indeed.  Of course, it would also be nice if distros shipped
>>  bootloader-invoked prekernel test software, like memtest86+, by default.
> 
> the current memtest86 is running in 32 bit mode, and only support 64G ram.
> 
> I tried to expand that a bit, to support 1024g, but it only works on
> some machine.
> could be stack provide is not big enough?
> 

Wonder how hard it would be to make it run 64 bits...

	-hpa

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-21 19:58       ` H. Peter Anvin
@ 2008-03-21 20:09         ` Yinghai Lu
  0 siblings, 0 replies; 20+ messages in thread
From: Yinghai Lu @ 2008-03-21 20:09 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: Ingo Molnar, Andrew Morton, kernel list

On Fri, Mar 21, 2008 at 12:58 PM, H. Peter Anvin <hpa@zytor.com> wrote:
>
> Yinghai Lu wrote:
>  > On Fri, Mar 21, 2008 at 6:08 AM, H. Peter Anvin <hpa@zytor.com> wrote:
>  >> Ingo Molnar wrote:
>  >>  >
>  >>  > very nice patch! I always thought that this was the proper way to do
>  >>  > memtest - and we could in fact also do something like this after SMP
>  >>  > bringup, and hit the memory bus via multiple CPUs. [that will need a
>  >>  > different enumeration though than e820 maps]
>  >>  >
>  >>  > one structural observation: please make this unified functionality, so
>  >>  > that 32-bit kernels can make use of it too.
>  >>  >
>  >>
>  >>  Indeed.  Of course, it would also be nice if distros shipped
>  >>  bootloader-invoked prekernel test software, like memtest86+, by default.
>  >
>  > the current memtest86 is running in 32 bit mode, and only support 64G ram.
>  >
>  > I tried to expand that a bit, to support 1024g, but it only works on
>  > some machine.
>  > could be stack provide is not big enough?
>  >
>
>  Wonder how hard it would be to make it run 64 bits...

1. in 32 bit test less than 4g in 32 bit mode
2. switch to 64 bit, set page table under 4g to cover all ram...

YH

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-21 12:03 ` Ingo Molnar
  2008-03-21 13:08   ` H. Peter Anvin
@ 2008-03-21 21:22   ` Jan Engelhardt
  2008-03-21 21:43     ` Yinghai Lu
  2008-03-22  0:04   ` Yinghai Lu
  2 siblings, 1 reply; 20+ messages in thread
From: Jan Engelhardt @ 2008-03-21 21:22 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: yhlu.kernel, Andrew Morton, H. Peter Anvin, kernel list


On Mar 21 2008 13:03, Ingo Molnar wrote:
> * Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:
>
>> do simple memtest after init_memory_mapping
>> use find_e820_area_size to find all ram range that is not reserved.
>> and do some simple bits test to find some bad ram.
>> if find some bad ram, use reserve_early to exclude that range.
>
> very nice patch! I always thought that this was the proper way to do
> memtest - and we could in fact also do something like this after SMP
> bringup, and hit the memory bus via multiple CPUs. [that will need a
> different enumeration though than e820 maps]

Perhaps this can even be used to provide on-the-fly badram
patch semantics?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-21 21:22   ` Jan Engelhardt
@ 2008-03-21 21:43     ` Yinghai Lu
  0 siblings, 0 replies; 20+ messages in thread
From: Yinghai Lu @ 2008-03-21 21:43 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: Ingo Molnar, Andrew Morton, H. Peter Anvin, kernel list

On Fri, Mar 21, 2008 at 2:22 PM, Jan Engelhardt <jengelh@computergmbh.de> wrote:
>
>  On Mar 21 2008 13:03, Ingo Molnar wrote:
>  > * Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:
>  >
>  >> do simple memtest after init_memory_mapping
>  >> use find_e820_area_size to find all ram range that is not reserved.
>  >> and do some simple bits test to find some bad ram.
>  >> if find some bad ram, use reserve_early to exclude that range.
>  >
>  > very nice patch! I always thought that this was the proper way to do
>  > memtest - and we could in fact also do something like this after SMP
>  > bringup, and hit the memory bus via multiple CPUs. [that will need a
>  > different enumeration though than e820 maps]
>
>  Perhaps this can even be used to provide on-the-fly badram
>  patch semantics?

yes. but bad ranges can not be too many. otherwise early_res array
will overflow. then need to use memmap=nn$ss to exclude range already
found.

YH

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-21  6:58 [PATCH] x86_64: early memtest to find bad ram Yinghai Lu
  2008-03-21 12:03 ` Ingo Molnar
@ 2008-03-21 22:49 ` Sami Farin
  2008-03-22 20:48 ` Arjan van de Ven
  2 siblings, 0 replies; 20+ messages in thread
From: Sami Farin @ 2008-03-21 22:49 UTC (permalink / raw)
  To: kernel list

On Thu, Mar 20, 2008 at 23:58:33 -0700, Yinghai Lu wrote:
> 
> do simple memtest after init_memory_mapping
> 
> use find_e820_area_size to find all ram range that is not reserved.
> 
> and do some simple bits test to find some bad ram.
> 
> if find some bad ram, use reserve_early to exclude that range.

Does somebody still remember the bug report in which the fault
was found to be in hardware?  By bisecting, the user got "working"
kernel when movnti was not used to copy data.
Would be neat if also non-temporal moves were done in this early memtest.

Having the memtest feature in kernel is useful,
considering my grub can not load memtest86+ binary, failing with error
"Selected item cannot fit into memory",
with or without the patch at
https://bugzilla.redhat.com/show_bug.cgi?id=237279

I have user-space app to test movnti...  ask if you want it.

-- 
Do what you love because life is too short for anything else.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-21 12:03 ` Ingo Molnar
  2008-03-21 13:08   ` H. Peter Anvin
  2008-03-21 21:22   ` Jan Engelhardt
@ 2008-03-22  0:04   ` Yinghai Lu
  2008-03-22 12:04     ` Ingo Molnar
  2 siblings, 1 reply; 20+ messages in thread
From: Yinghai Lu @ 2008-03-22  0:04 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Andrew Morton, H. Peter Anvin, kernel list

On Fri, Mar 21, 2008 at 5:03 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
>  * Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:
>
>  > do simple memtest after init_memory_mapping
>  >
>  > use find_e820_area_size to find all ram range that is not reserved.
>  >
>  > and do some simple bits test to find some bad ram.
>  >
>  > if find some bad ram, use reserve_early to exclude that range.
>
>  very nice patch! I always thought that this was the proper way to do
>  memtest - and we could in fact also do something like this after SMP
>  bringup, and hit the memory bus via multiple CPUs. [that will need a
>  different enumeration though than e820 maps]
or
1. core0/node0 check all memory at first
2. every core0 will check all memory later one by one.

>
>  one structural observation: please make this unified functionality, so
>  that 32-bit kernels can make use of it too.

that will need to use PAE to switch 2G windows ....

>
>  a small style nit from scripts/checkpatch.pl:
>
>
>  > +//           printk(KERN_DEBUG "find_e820_area_size : e820 %d [%llx, %lx]\n", i, ei->addr, ei_last);
>
>  remove such lines or make them pr_debug(). (checkpatch also found more
>  such cases)
>
>  also, please add a CONFIG_BOOTPARAM_MEMTEST=y option so that
>  distributions can enable this by default in their debug kernels.
>
>  i've applied your current version to get some testing, please send delta
>  patches against x86/latest.

thanks. will submit delta patch.

YH

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-21 14:29     ` Ingo Molnar
@ 2008-03-22  5:45       ` Willy Tarreau
  2008-03-22  6:48         ` Yinghai Lu
  0 siblings, 1 reply; 20+ messages in thread
From: Willy Tarreau @ 2008-03-22  5:45 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: H. Peter Anvin, yhlu.kernel, Andrew Morton, kernel list

On Fri, Mar 21, 2008 at 03:29:19PM +0100, Ingo Molnar wrote:
> 
> * H. Peter Anvin <hpa@zytor.com> wrote:
> 
> >> very nice patch! I always thought that this was the proper way to do 
> >> memtest - and we could in fact also do something like this after SMP 
> >> bringup, and hit the memory bus via multiple CPUs. [that will need a 
> >> different enumeration though than e820 maps]
> >>
> >> one structural observation: please make this unified functionality, 
> >> so that 32-bit kernels can make use of it too.
> >>
> >
> > Indeed.  Of course, it would also be nice if distros shipped 
> > bootloader-invoked prekernel test software, like memtest86+, by 
> > default.
> 
> some do (Fedora for example), but it's still a bit quirky for users to 
> invoke and it would be nice to see those results in the kernel log as 
> well and flag possibly flaky systems that way. (add a taint bit, etc., 
> etc.)

It may even make sense to merge in the full memtest86. The code is small
(both source and binary) and IIRC it shares a lot of init code with x86.
The remaining problem would then be how to maintain its tests up do date.

Willy


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-22  5:45       ` Willy Tarreau
@ 2008-03-22  6:48         ` Yinghai Lu
  0 siblings, 0 replies; 20+ messages in thread
From: Yinghai Lu @ 2008-03-22  6:48 UTC (permalink / raw)
  To: Willy Tarreau; +Cc: Ingo Molnar, H. Peter Anvin, Andrew Morton, kernel list

On Fri, Mar 21, 2008 at 10:45 PM, Willy Tarreau <w@1wt.eu> wrote:
>
> On Fri, Mar 21, 2008 at 03:29:19PM +0100, Ingo Molnar wrote:
>  >
>  > * H. Peter Anvin <hpa@zytor.com> wrote:
>  >
>  > >> very nice patch! I always thought that this was the proper way to do
>  > >> memtest - and we could in fact also do something like this after SMP
>  > >> bringup, and hit the memory bus via multiple CPUs. [that will need a
>  > >> different enumeration though than e820 maps]
>  > >>
>  > >> one structural observation: please make this unified functionality,
>  > >> so that 32-bit kernels can make use of it too.
>  > >>
>  > >
>  > > Indeed.  Of course, it would also be nice if distros shipped
>  > > bootloader-invoked prekernel test software, like memtest86+, by
>  > > default.
>  >
>  > some do (Fedora for example), but it's still a bit quirky for users to
>  > invoke and it would be nice to see those results in the kernel log as
>  > well and flag possibly flaky systems that way. (add a taint bit, etc.,
>  > etc.)
>
>  It may even make sense to merge in the full memtest86. The code is small
>  (both source and binary) and IIRC it shares a lot of init code with x86.
>  The remaining problem would then be how to maintain its tests up do date.

memtester is another choice, and it is more easy to be merged.

YH

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-22  0:04   ` Yinghai Lu
@ 2008-03-22 12:04     ` Ingo Molnar
  2008-03-22 16:59       ` Yinghai Lu
  0 siblings, 1 reply; 20+ messages in thread
From: Ingo Molnar @ 2008-03-22 12:04 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Andrew Morton, H. Peter Anvin, kernel list


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> >  one structural observation: please make this unified functionality, 
> >  so that 32-bit kernels can make use of it too.
> 
> that will need to use PAE to switch 2G windows ....

well, please try some non-PAE, checks-direct-mappings approach - if 
someone wants to extend it to the highmem bits i'm sure it will be done.

	Ingo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-22 12:04     ` Ingo Molnar
@ 2008-03-22 16:59       ` Yinghai Lu
  2008-03-25 11:00         ` Ingo Molnar
  0 siblings, 1 reply; 20+ messages in thread
From: Yinghai Lu @ 2008-03-22 16:59 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Andrew Morton, H. Peter Anvin, kernel list

On Sat, Mar 22, 2008 at 5:04 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
>  * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
>  > >  one structural observation: please make this unified functionality,
>  > >  so that 32-bit kernels can make use of it too.
>  >
>  > that will need to use PAE to switch 2G windows ....
>
>  well, please try some non-PAE, checks-direct-mappings approach - if
>  someone wants to extend it to the highmem bits i'm sure it will be done.

OK, First need to move some early_res code from e820_64.c to e820_32.c

or we can start to merge them. anyone is working on that?

YH

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-21  6:58 [PATCH] x86_64: early memtest to find bad ram Yinghai Lu
  2008-03-21 12:03 ` Ingo Molnar
  2008-03-21 22:49 ` Sami Farin
@ 2008-03-22 20:48 ` Arjan van de Ven
  2008-03-22 21:01   ` H. Peter Anvin
  2008-03-22 21:21   ` Yinghai Lu
  2 siblings, 2 replies; 20+ messages in thread
From: Arjan van de Ven @ 2008-03-22 20:48 UTC (permalink / raw)
  To: yhlu.kernel
  Cc: yhlu.kernel.send, Andrew Morton, Ingo Molnar, H. Peter Anvin,
	kernel list

On Thu, 20 Mar 2008 23:58:33 -0700
Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:

> 
> do simple memtest after init_memory_mapping
> 
> use find_e820_area_size to find all ram range that is not reserved.

be careful, there's some special memory that e820 right now says is not reserved,
but still has bios data (the first 4Kb of memory come to mind)


-- 
If you want to reach me at my work email, use arjan@linux.intel.com
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-22 20:48 ` Arjan van de Ven
@ 2008-03-22 21:01   ` H. Peter Anvin
  2008-03-22 21:21     ` Yinghai Lu
  2008-03-22 21:21   ` Yinghai Lu
  1 sibling, 1 reply; 20+ messages in thread
From: H. Peter Anvin @ 2008-03-22 21:01 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: yhlu.kernel, yhlu.kernel.send, Andrew Morton, Ingo Molnar,
	kernel list

Arjan van de Ven wrote:
> On Thu, 20 Mar 2008 23:58:33 -0700
> Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:
> 
>> do simple memtest after init_memory_mapping
>>
>> use find_e820_area_size to find all ram range that is not reserved.
> 
> be careful, there's some special memory that e820 right now says is not reserved,
> but still has bios data (the first 4Kb of memory come to mind)
> 

Is that true even after Yinghai's changes?  I have lost track of all the 
patches...

	-hpa

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-22 20:48 ` Arjan van de Ven
  2008-03-22 21:01   ` H. Peter Anvin
@ 2008-03-22 21:21   ` Yinghai Lu
  1 sibling, 0 replies; 20+ messages in thread
From: Yinghai Lu @ 2008-03-22 21:21 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: yhlu.kernel.send, Andrew Morton, Ingo Molnar, H. Peter Anvin,
	kernel list

On Sat, Mar 22, 2008 at 1:48 PM, Arjan van de Ven <arjan@infradead.org> wrote:
> On Thu, 20 Mar 2008 23:58:33 -0700
>
> Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:
>
>  >
>
> > do simple memtest after init_memory_mapping
>  >
>  > use find_e820_area_size to find all ram range that is not reserved.
>
>  be careful, there's some special memory that e820 right now says is not reserved,
>  but still has bios data (the first 4Kb of memory come to mind)
only test ranges that have E820_RAM and exclude range that is early_reserved...
so it is safe.

YH

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-22 21:01   ` H. Peter Anvin
@ 2008-03-22 21:21     ` Yinghai Lu
  0 siblings, 0 replies; 20+ messages in thread
From: Yinghai Lu @ 2008-03-22 21:21 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Arjan van de Ven, yhlu.kernel.send, Andrew Morton, Ingo Molnar,
	kernel list

On Sat, Mar 22, 2008 at 2:01 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> Arjan van de Ven wrote:
>  > On Thu, 20 Mar 2008 23:58:33 -0700
>  > Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:
>  >
>  >> do simple memtest after init_memory_mapping
>  >>
>  >> use find_e820_area_size to find all ram range that is not reserved.
>  >
>  > be careful, there's some special memory that e820 right now says is not reserved,
>  > but still has bios data (the first 4Kb of memory come to mind)
>  >
>
>  Is that true even after Yinghai's changes?  I have lost track of all the
>  patches...

find_e820_area_size should be safe.

YH

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] x86_64: early memtest to find bad ram
  2008-03-22 16:59       ` Yinghai Lu
@ 2008-03-25 11:00         ` Ingo Molnar
  0 siblings, 0 replies; 20+ messages in thread
From: Ingo Molnar @ 2008-03-25 11:00 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Andrew Morton, H. Peter Anvin, kernel list


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> On Sat, Mar 22, 2008 at 5:04 AM, Ingo Molnar <mingo@elte.hu> wrote:
> >
> >  * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> >
> >  > >  one structural observation: please make this unified functionality,
> >  > >  so that 32-bit kernels can make use of it too.
> >  >
> >  > that will need to use PAE to switch 2G windows ....
> >
> >  well, please try some non-PAE, checks-direct-mappings approach - if 
> >  someone wants to extend it to the highmem bits i'm sure it will be 
> >  done.
> 
> OK, First need to move some early_res code from e820_64.c to e820_32.c

yeah, please do that.

> or we can start to merge them. anyone is working on that?

not that i know of - feel free.

	Ingo

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2008-03-25 11:01 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-03-21  6:58 [PATCH] x86_64: early memtest to find bad ram Yinghai Lu
2008-03-21 12:03 ` Ingo Molnar
2008-03-21 13:08   ` H. Peter Anvin
2008-03-21 14:29     ` Ingo Molnar
2008-03-22  5:45       ` Willy Tarreau
2008-03-22  6:48         ` Yinghai Lu
2008-03-21 19:08     ` Yinghai Lu
2008-03-21 19:58       ` H. Peter Anvin
2008-03-21 20:09         ` Yinghai Lu
2008-03-21 21:22   ` Jan Engelhardt
2008-03-21 21:43     ` Yinghai Lu
2008-03-22  0:04   ` Yinghai Lu
2008-03-22 12:04     ` Ingo Molnar
2008-03-22 16:59       ` Yinghai Lu
2008-03-25 11:00         ` Ingo Molnar
2008-03-21 22:49 ` Sami Farin
2008-03-22 20:48 ` Arjan van de Ven
2008-03-22 21:01   ` H. Peter Anvin
2008-03-22 21:21     ` Yinghai Lu
2008-03-22 21:21   ` Yinghai Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox