linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v2
  2008-04-28 15:28       ` Mika Fischer
@ 2008-04-28  5:50         ` Arjan van de Ven
  2008-04-28 16:01         ` Gabriel C
  1 sibling, 0 replies; 89+ messages in thread
From: Arjan van de Ven @ 2008-04-28  5:50 UTC (permalink / raw)
  To: Mika Fischer
  Cc: Ingo Molnar, yhlu.kernel, Andrew Morton,
	linux-kernel@vger.kernel.org, H. Peter Anvin, Thomas Gleixner,
	Gabriel C

On Mon, 28 Apr 2008 17:28:21 +0200
Mika Fischer <mika.fischer@zoopnet.de> wrote:

> Arjan van de Ven schrieb:
> > yep one should not touch existing MTRRs. If you run out, tough luck.
> > Thats what we have PAT for.
> > Changing them anyway is a deathtrap with various things,
> > suspend/resume being only one of the nasty cases.
> 
> Hm. I currently have to remove the offending (i.e. overlapping my
> video memory) MTRRs and split them so that they have a hole where my
> video memory is.

I'm not arguing that it sometimes works. It's just a big trap to default enable ;(


> 
> Ah, so the new X will be able to use the video memory in
> write-combining mode even if I have an MTRR saying this area is
> uncachable or write-back?

yes.
When it gets released...

^ permalink raw reply	[flat|nested] 89+ messages in thread

* [PATCH] x86: mtrr cleanup for converting continuous to discrete layout
@ 2008-04-28  6:37 Yinghai Lu
  2008-04-28  9:06 ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v2 Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-04-28  6:37 UTC (permalink / raw)
  To: Andrew Morton, Ingo Molnar; +Cc: linux-kernel@vger.kernel.org


some BIOS like to use continus MTRR layout, and X driver can not add
WB entries for graphical cards when 4g or more RAM installed.

the patch will change MTRR to discrete.

mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
 	rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
 }
 
+/*  fill the MSR pair relating to a var range  */
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+	struct mtrr_var_range *vr;
+
+	vr = mtrr_state.var_ranges;
+
+	vr[index].base_lo = base_lo;
+	vr[index].base_hi = base_hi;
+	vr[index].mask_lo = mask_lo;
+	vr[index].mask_hi = mask_hi;
+}
+
 static void
 get_fixed_ranges(mtrr_type * frs)
 {
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
 #include <linux/smp.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
+#include <linux/sort.h>
 
 #include <asm/e820.h>
 #include <asm/mtrr.h>
@@ -609,6 +610,348 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
+static int disable_mtrr_cleanup;
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+	disable_mtrr_cleanup = 1;
+	return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+#define RANGE_NUM 256
+
+struct res_range {
+	size_t start;
+	size_t end;
+};
+
+static void __init subtract_range(struct res_range *range, size_t start,
+				size_t end)
+{
+	int i;
+	int j;
+
+	for (j = 0; j < RANGE_NUM; j++) {
+		if (!range[j].end)
+			continue;
+
+		if (start <= range[j].start && end >= range[j].end) {
+			range[j].start = 0;
+			range[j].end = 0;
+			continue;
+		}
+
+		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+			range[j].start = end + 1;
+			continue;
+		}
+
+
+		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+			range[j].end = start - 1;
+			continue;
+		}
+
+		if (start > range[j].start && end < range[j].end) {
+			/* find the new spare */
+			for (i = 0; i < RANGE_NUM; i++) {
+				if (range[i].end == 0)
+					break;
+			}
+			if (i < RANGE_NUM) {
+				range[i].end = range[j].end;
+				range[i].start = end + 1;
+			} else {
+				printk(KERN_ERR "run of slot in ranges\n");
+			}
+			range[j].end = start - 1;
+			continue;
+		}
+	}
+}
+
+static int __cpuinit cmp_range(const void *x1, const void *x2)
+{
+	const struct res_range *r1 = x1;
+	const struct res_range *r2 = x2;
+	s64 start1, start2;
+
+	start1 = r1->start;
+	start2 = r2->start;
+
+	return start1 - start2;
+}
+
+struct var_mtrr_state {
+	unsigned long range_startk, range_sizek;
+	unsigned long chunk_sizek;
+	unsigned int reg;
+	unsigned address_bits;
+};
+
+static void __init set_var_mtrr(
+	unsigned int reg, unsigned long basek, unsigned long sizek,
+	unsigned char type, unsigned address_bits)
+{
+	u32 base_lo, base_hi, mask_lo, mask_hi;
+	unsigned address_mask_high;
+
+	if (!sizek) {
+		fill_mtrr_var_range(reg, 0, 0, 0, 0);
+		return;
+	}
+
+	address_mask_high = ((1u << (address_bits - 32u)) - 1u);
+
+	base_hi = basek >> 22;
+	base_lo  = basek << 10;
+
+	if (sizek < 4*1024*1024) {
+		mask_hi = address_mask_high;
+		mask_lo = ~((sizek << 10) - 1);
+	} else {
+		mask_hi = address_mask_high & (~((sizek >> 22) - 1));
+		mask_lo = 0;
+	}
+
+	base_lo |= type;
+	mask_lo |= 0x800;
+	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
+}
+
+static unsigned int __init range_to_mtrr(unsigned int reg,
+	unsigned long range_startk, unsigned long range_sizek,
+	unsigned long next_range_startk, unsigned char type, unsigned address_bits)
+{
+	if (!range_sizek || (reg >= num_var_ranges))
+		return reg;
+
+	while (range_sizek) {
+		unsigned long max_align, align;
+		unsigned long sizek;
+		/* Compute the maximum size I can make a range */
+		if (range_startk)
+			max_align = ffs(range_startk) - 1;
+		else
+			max_align = 32;
+		align = fls(range_sizek) - 1;
+		if (align > max_align)
+			align = max_align;
+
+		sizek = 1 << align;
+		printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
+			reg, range_startk >> 10, sizek >> 10,
+			(type == MTRR_TYPE_UNCACHABLE)?"UC":
+			    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
+			);
+		set_var_mtrr(reg++, range_startk, sizek, type, address_bits);
+		range_startk += sizek;
+		range_sizek -= sizek;
+		if (reg >= num_var_ranges)
+			break;
+	}
+	return reg;
+}
+
+static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
+{
+	unsigned long hole_basek, hole_sizek;
+	unsigned long range0_basek, range0_sizek;
+	unsigned long range_basek, range_sizek;
+	unsigned long next_basek;
+	unsigned long chunk_sizek;
+
+	hole_basek = 0;
+	hole_sizek = 0;
+	chunk_sizek = state->chunk_sizek;
+	range0_basek = state->range_startk;
+	next_basek = basek;
+
+	/* try to append some small hole */
+	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+	if ((range0_sizek == state->range_sizek) ||
+		((range0_basek + range0_sizek > basek) && basek)) {
+			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
+			state->reg = range_to_mtrr(state->reg, range0_basek,
+				range0_sizek, next_basek, MTRR_TYPE_WRBACK, state->address_bits);
+		return;
+	}
+
+	range0_sizek -= chunk_sizek;
+	range_basek = range0_basek + range0_sizek;
+	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range0_basek,
+		range0_sizek, range_basek, MTRR_TYPE_WRBACK, state->address_bits);
+
+	range_sizek = chunk_sizek;
+	if (range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) {
+		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
+		next_basek = hole_basek = range_basek + range_sizek - hole_sizek;
+	} else {
+		range_sizek = state->range_sizek - range0_sizek;
+	}
+	printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range_basek,
+		range_sizek, next_basek, MTRR_TYPE_WRBACK, state->address_bits);
+	if (hole_sizek) {
+		printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, hole_basek,
+			hole_sizek, basek, MTRR_TYPE_UNCACHABLE, state->address_bits);
+	}
+}
+
+static void __init set_var_mtrr_range(struct var_mtrr_state *state, size_t base_pfn, size_t size_pfn)
+{
+	unsigned long basek, sizek;
+
+	if (state->reg >= num_var_ranges)
+		return;
+
+	basek = base_pfn << (PAGE_SHIFT - 10);
+	sizek = size_pfn << (PAGE_SHIFT - 10);
+
+	/* See if I can merge with the last range */
+	if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) {
+		unsigned long endk = basek + sizek;
+		state->range_sizek = endk - state->range_startk;
+		return;
+	}
+	/* Write the range mtrrs */
+	if (state->range_sizek != 0) {
+		range_to_mtrr_with_hole(state, basek);
+
+		state->range_startk = 0;
+		state->range_sizek = 0;
+	}
+	/* Allocate an msr */
+	state->range_startk = basek;
+	state->range_sizek  = sizek;
+}
+
+static u64 mtrr_chunk_size __initdata = (256ULL<<20);
+
+static int __init parse_mtrr_chunk_size_opt(char *p)
+{
+	if (!p)
+		return -EINVAL;
+	mtrr_chunk_size = memparse(p, &p);
+	return 0;
+}
+early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
+
+static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, unsigned address_bits)
+{
+	struct var_mtrr_state var_state;
+	int i;
+
+	var_state.range_startk = 0;
+	var_state.range_sizek = 0;
+	var_state.reg = 0;
+	var_state.address_bits = address_bits;
+	var_state.chunk_sizek = mtrr_chunk_size >> 10;
+
+	/* Write the range etc */
+	for (i = 0; i < nr_range; i++)
+		set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1);
+
+	/* Write the last range */
+	range_to_mtrr_with_hole(&var_state, 0);
+	printk(KERN_INFO "DONE variable MTRRs\n");
+	/* Clear out the extra MTRR's */
+	while (var_state.reg < num_var_ranges)
+		set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits);
+}
+
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+	unsigned long i, base, size, def, dummy;
+	mtrr_type type;
+	struct res_range range[RANGE_NUM];
+	int nr_range;
+
+	/* extra one for all 0 */
+	int num[MTRR_NUM_TYPES + 1];
+
+	if (!is_cpu(INTEL) || disable_mtrr_cleanup)
+		return 0;
+	rdmsr(MTRRdefType_MSR, def, dummy);
+	def &= 0xff;
+	if (def != MTRR_TYPE_UNCACHABLE)
+		return 0;
+
+	/* check entries number */
+	memset(num, 0, sizeof(num));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type >= MTRR_NUM_TYPES)
+			continue;
+		if (!size)
+			type = MTRR_NUM_TYPES;
+		num[type]++;
+	}
+
+	/* check if we got UC entries */
+	if (!num[MTRR_TYPE_UNCACHABLE])
+		return 0;
+
+	/* check if we only had WB and UC */
+	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+		num_var_ranges - num[MTRR_NUM_TYPES])
+		return 0;
+
+	/*
+	 * get WB ranges at first
+	 * assume BIOS don't give us overlapping WB entries
+	 * or add add_range?
+	 */
+	memset(range, 0, sizeof(range));
+	nr_range = 0;
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		range[nr_range].start = base;
+		range[nr_range].end = base + size - 1;
+		nr_range++;
+	}
+	printk(KERN_INFO "After WB checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* take out UC ranges */
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_UNCACHABLE)
+			continue;
+		if (!size)
+			continue;
+		subtract_range(range, base, base + size);
+	}
+	/* get new range num */
+	nr_range = 0;
+	for (i = 0; i < RANGE_NUM; i++) {
+		if (!range[i].end)
+			continue;
+		nr_range++;
+	}
+	printk(KERN_INFO "After UC checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	printk(KERN_INFO "After sorting\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* convert ranges to var ranges state */
+	x86_setup_var_mtrrs(range, nr_range, address_bits);
+
+	return 1;
+
+}
+
 static int disable_mtrr_trim;
 
 static int __init disable_mtrr_trim_setup(char *str)
@@ -729,18 +1072,21 @@ int __init mtrr_trim_uncached_memory(uns
  */
 void __init mtrr_bp_init(void)
 {
+	u32 phys_addr;
 	init_ifs();
 
+	phys_addr = 32;
+
 	if (cpu_has_mtrr) {
 		mtrr_if = &generic_mtrr_ops;
 		size_or_mask = 0xff000000;	/* 36 bits */
 		size_and_mask = 0x00f00000;
+		phys_addr = 36;
 
 		/* This is an AMD specific MSR, but we assume(hope?) that
 		   Intel will implement it to when they extend the address
 		   bus of the Xeon. */
 		if (cpuid_eax(0x80000000) >= 0x80000008) {
-			u32 phys_addr;
 			phys_addr = cpuid_eax(0x80000008) & 0xff;
 			/* CPUID workaround for Intel 0F33/0F34 CPU */
 			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -758,6 +1104,7 @@ void __init mtrr_bp_init(void)
 			   don't support PAE */
 			size_or_mask = 0xfff00000;	/* 32 bits */
 			size_and_mask = 0;
+			phys_addr = 32;
 		}
 	} else {
 		switch (boot_cpu_data.x86_vendor) {
@@ -791,8 +1138,13 @@ void __init mtrr_bp_init(void)
 	if (mtrr_if) {
 		set_num_var_ranges();
 		init_table();
-		if (use_intel())
+		if (use_intel()) {
 			get_mtrr_state();
+
+			if (mtrr_cleanup(phys_addr))
+				mtrr_if->set_all();
+
+		}
 	}
 }
 
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/mtrr.h
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_conte
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
 void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
 
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
 void get_mtrr_state(void);
 
 extern void set_mtrr_ops(struct mtrr_ops * ops);
Index: linux-2.6/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.orig/Documentation/kernel-parameters.txt
+++ linux-2.6/Documentation/kernel-parameters.txt
@@ -595,6 +595,16 @@ and is between 256 and 4096 characters. 
 			See drivers/char/README.epca and
 			Documentation/digiepca.txt.
 
+	disable_mtrr_cleanup [X86]
+			By default the kernel will adjust mtrr from continuous
+			to discrete layout, to make x server driver could add
+			WB entry later. This parameter disables that behavior,
+			will not touch MTRRs.
+
+	mtrr_chunk_size=nn[KMG] [X86]
+			used for mtrr cleanup. It is largest continous chunk
+			that could hold holes aka. UC entries.
+
 	disable_mtrr_trim [X86, Intel and AMD only]
 			By default the kernel will trim any uncacheable
 			memory out of your available memory pool based on

^ permalink raw reply	[flat|nested] 89+ messages in thread

* [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v2
  2008-04-28  6:37 [PATCH] x86: mtrr cleanup for converting continuous to discrete layout Yinghai Lu
@ 2008-04-28  9:06 ` Yinghai Lu
  2008-04-28 13:08   ` Ingo Molnar
  2008-04-28 19:44   ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v3 Yinghai Lu
  0 siblings, 2 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-28  9:06 UTC (permalink / raw)
  To: Andrew Morton, Ingo Molnar; +Cc: linux-kernel@vger.kernel.org


some BIOS like to use continus MTRR layout, and X driver can not add
WB entries for graphical cards when 4g or more RAM installed.

the patch will change MTRR to discrete.

mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.

v2: fix -1 for UC 

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
 	rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
 }
 
+/*  fill the MSR pair relating to a var range  */
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+	struct mtrr_var_range *vr;
+
+	vr = mtrr_state.var_ranges;
+
+	vr[index].base_lo = base_lo;
+	vr[index].base_hi = base_hi;
+	vr[index].mask_lo = mask_lo;
+	vr[index].mask_hi = mask_hi;
+}
+
 static void
 get_fixed_ranges(mtrr_type * frs)
 {
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
 #include <linux/smp.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
+#include <linux/sort.h>
 
 #include <asm/e820.h>
 #include <asm/mtrr.h>
@@ -609,6 +610,348 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
+static int disable_mtrr_cleanup;
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+	disable_mtrr_cleanup = 1;
+	return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+#define RANGE_NUM 256
+
+struct res_range {
+	size_t start;
+	size_t end;
+};
+
+static void __init subtract_range(struct res_range *range, size_t start,
+				size_t end)
+{
+	int i;
+	int j;
+
+	for (j = 0; j < RANGE_NUM; j++) {
+		if (!range[j].end)
+			continue;
+
+		if (start <= range[j].start && end >= range[j].end) {
+			range[j].start = 0;
+			range[j].end = 0;
+			continue;
+		}
+
+		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+			range[j].start = end + 1;
+			continue;
+		}
+
+
+		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+			range[j].end = start - 1;
+			continue;
+		}
+
+		if (start > range[j].start && end < range[j].end) {
+			/* find the new spare */
+			for (i = 0; i < RANGE_NUM; i++) {
+				if (range[i].end == 0)
+					break;
+			}
+			if (i < RANGE_NUM) {
+				range[i].end = range[j].end;
+				range[i].start = end + 1;
+			} else {
+				printk(KERN_ERR "run of slot in ranges\n");
+			}
+			range[j].end = start - 1;
+			continue;
+		}
+	}
+}
+
+static int __cpuinit cmp_range(const void *x1, const void *x2)
+{
+	const struct res_range *r1 = x1;
+	const struct res_range *r2 = x2;
+	s64 start1, start2;
+
+	start1 = r1->start;
+	start2 = r2->start;
+
+	return start1 - start2;
+}
+
+struct var_mtrr_state {
+	unsigned long range_startk, range_sizek;
+	unsigned long chunk_sizek;
+	unsigned int reg;
+	unsigned address_bits;
+};
+
+static void __init set_var_mtrr(
+	unsigned int reg, unsigned long basek, unsigned long sizek,
+	unsigned char type, unsigned address_bits)
+{
+	u32 base_lo, base_hi, mask_lo, mask_hi;
+	unsigned address_mask_high;
+
+	if (!sizek) {
+		fill_mtrr_var_range(reg, 0, 0, 0, 0);
+		return;
+	}
+
+	address_mask_high = ((1u << (address_bits - 32u)) - 1u);
+
+	base_hi = basek >> 22;
+	base_lo  = basek << 10;
+
+	if (sizek < 4*1024*1024) {
+		mask_hi = address_mask_high;
+		mask_lo = ~((sizek << 10) - 1);
+	} else {
+		mask_hi = address_mask_high & (~((sizek >> 22) - 1));
+		mask_lo = 0;
+	}
+
+	base_lo |= type;
+	mask_lo |= 0x800;
+	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
+}
+
+static unsigned int __init range_to_mtrr(unsigned int reg,
+	unsigned long range_startk, unsigned long range_sizek,
+	unsigned long next_range_startk, unsigned char type, unsigned address_bits)
+{
+	if (!range_sizek || (reg >= num_var_ranges))
+		return reg;
+
+	while (range_sizek) {
+		unsigned long max_align, align;
+		unsigned long sizek;
+		/* Compute the maximum size I can make a range */
+		if (range_startk)
+			max_align = ffs(range_startk) - 1;
+		else
+			max_align = 32;
+		align = fls(range_sizek) - 1;
+		if (align > max_align)
+			align = max_align;
+
+		sizek = 1 << align;
+		printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
+			reg, range_startk >> 10, sizek >> 10,
+			(type == MTRR_TYPE_UNCACHABLE)?"UC":
+			    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
+			);
+		set_var_mtrr(reg++, range_startk, sizek, type, address_bits);
+		range_startk += sizek;
+		range_sizek -= sizek;
+		if (reg >= num_var_ranges)
+			break;
+	}
+	return reg;
+}
+
+static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
+{
+	unsigned long hole_basek, hole_sizek;
+	unsigned long range0_basek, range0_sizek;
+	unsigned long range_basek, range_sizek;
+	unsigned long next_basek;
+	unsigned long chunk_sizek;
+
+	hole_basek = 0;
+	hole_sizek = 0;
+	chunk_sizek = state->chunk_sizek;
+	range0_basek = state->range_startk;
+	next_basek = basek;
+
+	/* try to append some small hole */
+	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+	if ((range0_sizek == state->range_sizek) ||
+		((range0_basek + range0_sizek > basek) && basek)) {
+			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
+			state->reg = range_to_mtrr(state->reg, range0_basek,
+				range0_sizek, next_basek, MTRR_TYPE_WRBACK, state->address_bits);
+		return;
+	}
+
+	range0_sizek -= chunk_sizek;
+	range_basek = range0_basek + range0_sizek;
+	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range0_basek,
+		range0_sizek, range_basek, MTRR_TYPE_WRBACK, state->address_bits);
+
+	range_sizek = chunk_sizek;
+	if (range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) {
+		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
+		next_basek = hole_basek = range_basek + range_sizek - hole_sizek;
+	} else {
+		range_sizek = state->range_sizek - range0_sizek;
+	}
+	printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range_basek,
+		range_sizek, next_basek, MTRR_TYPE_WRBACK, state->address_bits);
+	if (hole_sizek) {
+		printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, hole_basek,
+			hole_sizek, basek, MTRR_TYPE_UNCACHABLE, state->address_bits);
+	}
+}
+
+static void __init set_var_mtrr_range(struct var_mtrr_state *state, size_t base_pfn, size_t size_pfn)
+{
+	unsigned long basek, sizek;
+
+	if (state->reg >= num_var_ranges)
+		return;
+
+	basek = base_pfn << (PAGE_SHIFT - 10);
+	sizek = size_pfn << (PAGE_SHIFT - 10);
+
+	/* See if I can merge with the last range */
+	if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) {
+		unsigned long endk = basek + sizek;
+		state->range_sizek = endk - state->range_startk;
+		return;
+	}
+	/* Write the range mtrrs */
+	if (state->range_sizek != 0) {
+		range_to_mtrr_with_hole(state, basek);
+
+		state->range_startk = 0;
+		state->range_sizek = 0;
+	}
+	/* Allocate an msr */
+	state->range_startk = basek;
+	state->range_sizek  = sizek;
+}
+
+static u64 mtrr_chunk_size __initdata = (256ULL<<20);
+
+static int __init parse_mtrr_chunk_size_opt(char *p)
+{
+	if (!p)
+		return -EINVAL;
+	mtrr_chunk_size = memparse(p, &p);
+	return 0;
+}
+early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
+
+static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, unsigned address_bits)
+{
+	struct var_mtrr_state var_state;
+	int i;
+
+	var_state.range_startk = 0;
+	var_state.range_sizek = 0;
+	var_state.reg = 0;
+	var_state.address_bits = address_bits;
+	var_state.chunk_sizek = mtrr_chunk_size >> 10;
+
+	/* Write the range etc */
+	for (i = 0; i < nr_range; i++)
+		set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1);
+
+	/* Write the last range */
+	range_to_mtrr_with_hole(&var_state, 0);
+	printk(KERN_INFO "DONE variable MTRRs\n");
+	/* Clear out the extra MTRR's */
+	while (var_state.reg < num_var_ranges)
+		set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits);
+}
+
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+	unsigned long i, base, size, def, dummy;
+	mtrr_type type;
+	struct res_range range[RANGE_NUM];
+	int nr_range;
+
+	/* extra one for all 0 */
+	int num[MTRR_NUM_TYPES + 1];
+
+	if (!is_cpu(INTEL) || disable_mtrr_cleanup)
+		return 0;
+	rdmsr(MTRRdefType_MSR, def, dummy);
+	def &= 0xff;
+	if (def != MTRR_TYPE_UNCACHABLE)
+		return 0;
+
+	/* check entries number */
+	memset(num, 0, sizeof(num));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type >= MTRR_NUM_TYPES)
+			continue;
+		if (!size)
+			type = MTRR_NUM_TYPES;
+		num[type]++;
+	}
+
+	/* check if we got UC entries */
+	if (!num[MTRR_TYPE_UNCACHABLE])
+		return 0;
+
+	/* check if we only had WB and UC */
+	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+		num_var_ranges - num[MTRR_NUM_TYPES])
+		return 0;
+
+	/*
+	 * get WB ranges at first
+	 * assume BIOS don't give us overlapping WB entries
+	 * or add add_range?
+	 */
+	memset(range, 0, sizeof(range));
+	nr_range = 0;
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		range[nr_range].start = base;
+		range[nr_range].end = base + size - 1;
+		nr_range++;
+	}
+	printk(KERN_INFO "After WB checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* take out UC ranges */
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_UNCACHABLE)
+			continue;
+		if (!size)
+			continue;
+		subtract_range(range, base, base + size - 1);
+	}
+	/* get new range num */
+	nr_range = 0;
+	for (i = 0; i < RANGE_NUM; i++) {
+		if (!range[i].end)
+			continue;
+		nr_range++;
+	}
+	printk(KERN_INFO "After UC checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	printk(KERN_INFO "After sorting\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* convert ranges to var ranges state */
+	x86_setup_var_mtrrs(range, nr_range, address_bits);
+
+	return 1;
+
+}
+
 static int disable_mtrr_trim;
 
 static int __init disable_mtrr_trim_setup(char *str)
@@ -729,18 +1072,21 @@ int __init mtrr_trim_uncached_memory(uns
  */
 void __init mtrr_bp_init(void)
 {
+	u32 phys_addr;
 	init_ifs();
 
+	phys_addr = 32;
+
 	if (cpu_has_mtrr) {
 		mtrr_if = &generic_mtrr_ops;
 		size_or_mask = 0xff000000;	/* 36 bits */
 		size_and_mask = 0x00f00000;
+		phys_addr = 36;
 
 		/* This is an AMD specific MSR, but we assume(hope?) that
 		   Intel will implement it to when they extend the address
 		   bus of the Xeon. */
 		if (cpuid_eax(0x80000000) >= 0x80000008) {
-			u32 phys_addr;
 			phys_addr = cpuid_eax(0x80000008) & 0xff;
 			/* CPUID workaround for Intel 0F33/0F34 CPU */
 			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -758,6 +1104,7 @@ void __init mtrr_bp_init(void)
 			   don't support PAE */
 			size_or_mask = 0xfff00000;	/* 32 bits */
 			size_and_mask = 0;
+			phys_addr = 32;
 		}
 	} else {
 		switch (boot_cpu_data.x86_vendor) {
@@ -791,8 +1138,13 @@ void __init mtrr_bp_init(void)
 	if (mtrr_if) {
 		set_num_var_ranges();
 		init_table();
-		if (use_intel())
+		if (use_intel()) {
 			get_mtrr_state();
+
+			if (mtrr_cleanup(phys_addr))
+				mtrr_if->set_all();
+
+		}
 	}
 }
 
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/mtrr.h
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_conte
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
 void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
 
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
 void get_mtrr_state(void);
 
 extern void set_mtrr_ops(struct mtrr_ops * ops);
Index: linux-2.6/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.orig/Documentation/kernel-parameters.txt
+++ linux-2.6/Documentation/kernel-parameters.txt
@@ -595,6 +595,16 @@ and is between 256 and 4096 characters. 
 			See drivers/char/README.epca and
 			Documentation/digiepca.txt.
 
+	disable_mtrr_cleanup [X86]
+			By default the kernel will adjust mtrr from continuous
+			to discrete layout, to make x server driver could add
+			WB entry later. This parameter disables that behavior,
+			will not touch MTRRs.
+
+	mtrr_chunk_size=nn[KMG] [X86]
+			used for mtrr cleanup. It is largest continous chunk
+			that could hold holes aka. UC entries.
+
 	disable_mtrr_trim [X86, Intel and AMD only]
 			By default the kernel will trim any uncacheable
 			memory out of your available memory pool based on

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v2
  2008-04-28  9:06 ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v2 Yinghai Lu
@ 2008-04-28 13:08   ` Ingo Molnar
  2008-04-28 13:49     ` Arjan van de Ven
  2008-04-28 19:44   ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v3 Yinghai Lu
  1 sibling, 1 reply; 89+ messages in thread
From: Ingo Molnar @ 2008-04-28 13:08 UTC (permalink / raw)
  To: yhlu.kernel
  Cc: Andrew Morton, linux-kernel@vger.kernel.org, H. Peter Anvin,
	Thomas Gleixner, Arjan van de Ven


* Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:

> some BIOS like to use continus MTRR layout, and X driver can not add 
> WB entries for graphical cards when 4g or more RAM installed.
> 
> the patch will change MTRR to discrete.
> 
> mtrr_chunk_size= could be used to have smaller continuous block to 
> hold holes. default is 256m, could be set according to size of 
> graphics card memory.

hm.

> +static int disable_mtrr_cleanup;

i think this should be default-disabled. Touching MTRRs on a live system 
could interact with SMM and MCE handlers.

how relevant is this feature with modern Xorg? I thought modern Xorg 
would get its mappings via /sys, hence it would not have to touch MTRRs 
at all.

	Ingo

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v2
  2008-04-28 13:08   ` Ingo Molnar
@ 2008-04-28 13:49     ` Arjan van de Ven
  2008-04-28 15:28       ` Mika Fischer
  0 siblings, 1 reply; 89+ messages in thread
From: Arjan van de Ven @ 2008-04-28 13:49 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: yhlu.kernel, Andrew Morton, linux-kernel@vger.kernel.org,
	H. Peter Anvin, Thomas Gleixner

On Mon, 28 Apr 2008 15:08:26 +0200
Ingo Molnar <mingo@elte.hu> wrote:

> 
> * Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:
> 
> > some BIOS like to use continus MTRR layout, and X driver can not
> > add WB entries for graphical cards when 4g or more RAM installed.
> > 
> > the patch will change MTRR to discrete.
> > 
> > mtrr_chunk_size= could be used to have smaller continuous block to 
> > hold holes. default is 256m, could be set according to size of 
> > graphics card memory.
> 
> hm.
> 
> > +static int disable_mtrr_cleanup;
> 
> i think this should be default-disabled. Touching MTRRs on a live
> system could interact with SMM and MCE handlers.

yep one should not touch existing MTRRs. If you run out, tough luck.
Thats what we have PAT for.
Changing them anyway is a deathtrap with various things, suspend/resume being only
one of the nasty cases.

> 
> how relevant is this feature with modern Xorg? I thought modern Xorg 
> would get its mappings via /sys, hence it would not have to touch
> MTRRs at all.

that's true for current X, but not for 6 month old X :=(

-- 
If you want to reach me at my work email, use arjan@linux.intel.com
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v2
  2008-04-28 13:49     ` Arjan van de Ven
@ 2008-04-28 15:28       ` Mika Fischer
  2008-04-28  5:50         ` Arjan van de Ven
  2008-04-28 16:01         ` Gabriel C
  0 siblings, 2 replies; 89+ messages in thread
From: Mika Fischer @ 2008-04-28 15:28 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Ingo Molnar, yhlu.kernel, Andrew Morton,
	linux-kernel@vger.kernel.org, H. Peter Anvin, Thomas Gleixner,
	Gabriel C

Arjan van de Ven schrieb:
> yep one should not touch existing MTRRs. If you run out, tough luck.
> Thats what we have PAT for.
> Changing them anyway is a deathtrap with various things, suspend/resume being only
> one of the nasty cases.

Hm. I currently have to remove the offending (i.e. overlapping my video
memory) MTRRs and split them so that they have a hole where my video
memory is.

Only that makes the X server happy, which wants to set up a
write-combining range covering the video memory.

Is there a better workaround?

>> how relevant is this feature with modern Xorg? I thought modern Xorg 
>> would get its mappings via /sys, hence it would not have to touch
>> MTRRs at all.
> 
> that's true for current X, but not for 6 month old X :=(

Ah, so the new X will be able to use the video memory in write-combining
mode even if I have an MTRR saying this area is uncachable or write-back?

If that is the case, then I agree that this patch is not really needed.

Regards,
 Mika

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v2
  2008-04-28 15:28       ` Mika Fischer
  2008-04-28  5:50         ` Arjan van de Ven
@ 2008-04-28 16:01         ` Gabriel C
  2008-04-28 16:28           ` Mika Fischer
  1 sibling, 1 reply; 89+ messages in thread
From: Gabriel C @ 2008-04-28 16:01 UTC (permalink / raw)
  To: Mika Fischer
  Cc: Arjan van de Ven, Ingo Molnar, yhlu.kernel, Andrew Morton,
	linux-kernel@vger.kernel.org, H. Peter Anvin, Thomas Gleixner

Mika Fischer wrote:
>>> how relevant is this feature with modern Xorg? I thought modern Xorg 
>>> would get its mappings via /sys, hence it would not have to touch
>>> MTRRs at all.
>> that's true for current X, but not for 6 month old X :=(
> 
> Ah, so the new X will be able to use the video memory in write-combining
> mode even if I have an MTRR saying this area is uncachable or write-back?
> 
> If that is the case, then I agree that this patch is not really needed.

I doubt there will be any usable Xorg release soon.
And I doubt peoples will run Xorg , mesa , etc master HEAD to have that feature. 

I think this patch is needed for now but as Ingo said , it should not be default.
Peoples with these sort problems could boot with some option to enables this workaround.

Just my opinion.

> 
> Regards,
>  Mika
> 

Gabriel

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v2
  2008-04-28 16:01         ` Gabriel C
@ 2008-04-28 16:28           ` Mika Fischer
  0 siblings, 0 replies; 89+ messages in thread
From: Mika Fischer @ 2008-04-28 16:28 UTC (permalink / raw)
  To: Gabriel C
  Cc: Arjan van de Ven, Ingo Molnar, yhlu.kernel, Andrew Morton,
	linux-kernel@vger.kernel.org, H. Peter Anvin, Thomas Gleixner

Gabriel C schrieb:
>> Ah, so the new X will be able to use the video memory in write-combining
>> mode even if I have an MTRR saying this area is uncachable or write-back?
>>
>> If that is the case, then I agree that this patch is not really needed.

It seems not to be the case, so this is moot anyway :)

> I doubt there will be any usable Xorg release soon.
> And I doubt peoples will run Xorg , mesa , etc master HEAD to have that feature. 
> 
> I think this patch is needed for now but as Ingo said , it should not be default.
> Peoples with these sort problems could boot with some option to enables this workaround.

Well, if it is compiled but inactive by default and can be enabled with
a kernel boot option, this is fine too.

But if it's a compile-time option, I doubt that any distro-kernel would
touch it if it's really dangerous. They're probably more likely to
backport the Xorg stuff if the new Xorg does not get finished in time.

Regards,
 Mika

^ permalink raw reply	[flat|nested] 89+ messages in thread

* [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v3
  2008-04-28  9:06 ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v2 Yinghai Lu
  2008-04-28 13:08   ` Ingo Molnar
@ 2008-04-28 19:44   ` Yinghai Lu
  2008-04-28 20:15     ` Ingo Molnar
  2008-04-28 20:16     ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v4 Yinghai Lu
  1 sibling, 2 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-28 19:44 UTC (permalink / raw)
  To: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner
  Cc: linux-kernel@vger.kernel.org, Gabriel C, Mika Fischer


some BIOS like to use continus MTRR layout, and some X driver can not add
WB entries for graphical cards when 4g or more RAM installed.

the patch will change MTRR to discrete.

mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.

v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
    skip the var state change warning.
    remove next_basek in range_to_mtrr()

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
 	rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
 }
 
+/*  fill the MSR pair relating to a var range  */
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+	struct mtrr_var_range *vr;
+
+	vr = mtrr_state.var_ranges;
+
+	vr[index].base_lo = base_lo;
+	vr[index].base_hi = base_hi;
+	vr[index].mask_lo = mask_lo;
+	vr[index].mask_hi = mask_hi;
+}
+
 static void
 get_fixed_ranges(mtrr_type * frs)
 {
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
 #include <linux/smp.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
+#include <linux/sort.h>
 
 #include <asm/e820.h>
 #include <asm/mtrr.h>
@@ -609,6 +610,345 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
+static int __initdata enable_mtrr_cleanup;
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+	enable_mtrr_cleanup = 1;
+	return 0;
+}
+early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
+
+#define RANGE_NUM 256
+
+struct res_range {
+	size_t start;
+	size_t end;
+};
+
+static void __init subtract_range(struct res_range *range, size_t start,
+				size_t end)
+{
+	int i;
+	int j;
+
+	for (j = 0; j < RANGE_NUM; j++) {
+		if (!range[j].end)
+			continue;
+
+		if (start <= range[j].start && end >= range[j].end) {
+			range[j].start = 0;
+			range[j].end = 0;
+			continue;
+		}
+
+		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+			range[j].start = end + 1;
+			continue;
+		}
+
+
+		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+			range[j].end = start - 1;
+			continue;
+		}
+
+		if (start > range[j].start && end < range[j].end) {
+			/* find the new spare */
+			for (i = 0; i < RANGE_NUM; i++) {
+				if (range[i].end == 0)
+					break;
+			}
+			if (i < RANGE_NUM) {
+				range[i].end = range[j].end;
+				range[i].start = end + 1;
+			} else {
+				printk(KERN_ERR "run of slot in ranges\n");
+			}
+			range[j].end = start - 1;
+			continue;
+		}
+	}
+}
+
+static int __cpuinit cmp_range(const void *x1, const void *x2)
+{
+	const struct res_range *r1 = x1;
+	const struct res_range *r2 = x2;
+	s64 start1, start2;
+
+	start1 = r1->start;
+	start2 = r2->start;
+
+	return start1 - start2;
+}
+
+struct var_mtrr_state {
+	unsigned long range_startk, range_sizek;
+	unsigned long chunk_sizek;
+	unsigned int reg;
+	unsigned address_bits;
+};
+
+static void __init set_var_mtrr(
+	unsigned int reg, unsigned long basek, unsigned long sizek,
+	unsigned char type, unsigned address_bits)
+{
+	u32 base_lo, base_hi, mask_lo, mask_hi;
+	unsigned address_mask_high;
+
+	if (!sizek) {
+		fill_mtrr_var_range(reg, 0, 0, 0, 0);
+		return;
+	}
+
+	address_mask_high = ((1u << (address_bits - 32u)) - 1u);
+
+	base_hi = basek >> 22;
+	base_lo  = basek << 10;
+
+	if (sizek < 4*1024*1024) {
+		mask_hi = address_mask_high;
+		mask_lo = ~((sizek << 10) - 1);
+	} else {
+		mask_hi = address_mask_high & (~((sizek >> 22) - 1));
+		mask_lo = 0;
+	}
+
+	base_lo |= type;
+	mask_lo |= 0x800;
+	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
+}
+
+static unsigned int __init range_to_mtrr(unsigned int reg,
+	unsigned long range_startk, unsigned long range_sizek,
+	unsigned char type, unsigned address_bits)
+{
+	if (!range_sizek || (reg >= num_var_ranges))
+		return reg;
+
+	while (range_sizek) {
+		unsigned long max_align, align;
+		unsigned long sizek;
+		/* Compute the maximum size I can make a range */
+		if (range_startk)
+			max_align = ffs(range_startk) - 1;
+		else
+			max_align = 32;
+		align = fls(range_sizek) - 1;
+		if (align > max_align)
+			align = max_align;
+
+		sizek = 1 << align;
+		printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
+			reg, range_startk >> 10, sizek >> 10,
+			(type == MTRR_TYPE_UNCACHABLE)?"UC":
+			    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
+			);
+		set_var_mtrr(reg++, range_startk, sizek, type, address_bits);
+		range_startk += sizek;
+		range_sizek -= sizek;
+		if (reg >= num_var_ranges)
+			break;
+	}
+	return reg;
+}
+
+static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
+{
+	unsigned long hole_basek, hole_sizek;
+	unsigned long range0_basek, range0_sizek;
+	unsigned long range_basek, range_sizek;
+	unsigned long chunk_sizek;
+
+	hole_basek = 0;
+	hole_sizek = 0;
+	chunk_sizek = state->chunk_sizek;
+	range0_basek = state->range_startk;
+
+	/* try to append some small hole */
+	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+	if ((range0_sizek == state->range_sizek) ||
+		((range0_basek + range0_sizek > basek) && basek)) {
+			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
+			state->reg = range_to_mtrr(state->reg, range0_basek,
+				range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+		return;
+	}
+
+	range0_sizek -= chunk_sizek;
+	range_basek = range0_basek + range0_sizek;
+	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range0_basek,
+			range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+
+	range_sizek = chunk_sizek;
+	if (range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1))
+		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
+	else
+		range_sizek = state->range_sizek - range0_sizek;
+
+	printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range_basek,
+			range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+	if (hole_sizek) {
+		printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, hole_basek,
+				hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits);
+	}
+}
+
+static void __init set_var_mtrr_range(struct var_mtrr_state *state, size_t base_pfn, size_t size_pfn)
+{
+	unsigned long basek, sizek;
+
+	if (state->reg >= num_var_ranges)
+		return;
+
+	basek = base_pfn << (PAGE_SHIFT - 10);
+	sizek = size_pfn << (PAGE_SHIFT - 10);
+
+	/* See if I can merge with the last range */
+	if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) {
+		unsigned long endk = basek + sizek;
+		state->range_sizek = endk - state->range_startk;
+		return;
+	}
+	/* Write the range mtrrs */
+	if (state->range_sizek != 0) {
+		range_to_mtrr_with_hole(state, basek);
+
+		state->range_startk = 0;
+		state->range_sizek = 0;
+	}
+	/* Allocate an msr */
+	state->range_startk = basek;
+	state->range_sizek  = sizek;
+}
+
+static u64 mtrr_chunk_size __initdata = (256ULL<<20);
+
+static int __init parse_mtrr_chunk_size_opt(char *p)
+{
+	if (!p)
+		return -EINVAL;
+	mtrr_chunk_size = memparse(p, &p);
+	return 0;
+}
+early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
+
+static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, unsigned address_bits)
+{
+	struct var_mtrr_state var_state;
+	int i;
+
+	var_state.range_startk = 0;
+	var_state.range_sizek = 0;
+	var_state.reg = 0;
+	var_state.address_bits = address_bits;
+	var_state.chunk_sizek = mtrr_chunk_size >> 10;
+
+	/* Write the range etc */
+	for (i = 0; i < nr_range; i++)
+		set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1);
+
+	/* Write the last range */
+	range_to_mtrr_with_hole(&var_state, 0);
+	printk(KERN_INFO "DONE variable MTRRs\n");
+	/* Clear out the extra MTRR's */
+	while (var_state.reg < num_var_ranges)
+		set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits);
+}
+
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+	unsigned long i, base, size, def, dummy;
+	mtrr_type type;
+	struct res_range range[RANGE_NUM];
+	int nr_range;
+
+	/* extra one for all 0 */
+	int num[MTRR_NUM_TYPES + 1];
+
+	if (!is_cpu(INTEL) || !enable_mtrr_cleanup)
+		return 0;
+	rdmsr(MTRRdefType_MSR, def, dummy);
+	def &= 0xff;
+	if (def != MTRR_TYPE_UNCACHABLE)
+		return 0;
+
+	/* check entries number */
+	memset(num, 0, sizeof(num));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type >= MTRR_NUM_TYPES)
+			continue;
+		if (!size)
+			type = MTRR_NUM_TYPES;
+		num[type]++;
+	}
+
+	/* check if we got UC entries */
+	if (!num[MTRR_TYPE_UNCACHABLE])
+		return 0;
+
+	/* check if we only had WB and UC */
+	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+		num_var_ranges - num[MTRR_NUM_TYPES])
+		return 0;
+
+	/*
+	 * get WB ranges at first
+	 * assume BIOS don't give us overlapping WB entries
+	 * or add add_range?
+	 */
+	memset(range, 0, sizeof(range));
+	nr_range = 0;
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		range[nr_range].start = base;
+		range[nr_range].end = base + size - 1;
+		nr_range++;
+	}
+	printk(KERN_INFO "After WB checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* take out UC ranges */
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_UNCACHABLE)
+			continue;
+		if (!size)
+			continue;
+		subtract_range(range, base, base + size - 1);
+	}
+	/* get new range num */
+	nr_range = 0;
+	for (i = 0; i < RANGE_NUM; i++) {
+		if (!range[i].end)
+			continue;
+		nr_range++;
+	}
+	printk(KERN_INFO "After UC checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	printk(KERN_INFO "After sorting\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* convert ranges to var ranges state */
+	x86_setup_var_mtrrs(range, nr_range, address_bits);
+
+	return 1;
+
+}
+
 static int disable_mtrr_trim;
 
 static int __init disable_mtrr_trim_setup(char *str)
@@ -729,18 +1069,21 @@ int __init mtrr_trim_uncached_memory(uns
  */
 void __init mtrr_bp_init(void)
 {
+	u32 phys_addr;
 	init_ifs();
 
+	phys_addr = 32;
+
 	if (cpu_has_mtrr) {
 		mtrr_if = &generic_mtrr_ops;
 		size_or_mask = 0xff000000;	/* 36 bits */
 		size_and_mask = 0x00f00000;
+		phys_addr = 36;
 
 		/* This is an AMD specific MSR, but we assume(hope?) that
 		   Intel will implement it to when they extend the address
 		   bus of the Xeon. */
 		if (cpuid_eax(0x80000000) >= 0x80000008) {
-			u32 phys_addr;
 			phys_addr = cpuid_eax(0x80000008) & 0xff;
 			/* CPUID workaround for Intel 0F33/0F34 CPU */
 			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -758,6 +1101,7 @@ void __init mtrr_bp_init(void)
 			   don't support PAE */
 			size_or_mask = 0xfff00000;	/* 32 bits */
 			size_and_mask = 0;
+			phys_addr = 32;
 		}
 	} else {
 		switch (boot_cpu_data.x86_vendor) {
@@ -791,8 +1135,13 @@ void __init mtrr_bp_init(void)
 	if (mtrr_if) {
 		set_num_var_ranges();
 		init_table();
-		if (use_intel())
+		if (use_intel()) {
 			get_mtrr_state();
+
+			if (mtrr_cleanup(phys_addr))
+				mtrr_if->set_all();
+
+		}
 	}
 }
 
@@ -829,7 +1178,7 @@ static int __init mtrr_init_finialize(vo
 {
 	if (!mtrr_if)
 		return 0;
-	if (use_intel())
+	if (use_intel() && !enable_mtrr_cleanup)
 		mtrr_state_warn();
 	else {
 		/* The CPUs haven't MTRR and seem to not support SMP. They have
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/mtrr.h
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_conte
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
 void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
 
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
 void get_mtrr_state(void);
 
 extern void set_mtrr_ops(struct mtrr_ops * ops);
Index: linux-2.6/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.orig/Documentation/kernel-parameters.txt
+++ linux-2.6/Documentation/kernel-parameters.txt
@@ -595,6 +595,16 @@ and is between 256 and 4096 characters. 
 			See drivers/char/README.epca and
 			Documentation/digiepca.txt.
 
+	enable_mtrr_cleanup [X86]
+			By default the kernel will not touch mtrr. But we may
+			need adjust mtrr from continuous to discrete layout,
+			to make x server driver could add WB entry later. This
+			parameter enable that behavior, will touch MTRRs.
+
+	mtrr_chunk_size=nn[KMG] [X86]
+			used for mtrr cleanup. It is largest continous chunk
+			that could hold holes aka. UC entries.
+
 	disable_mtrr_trim [X86, Intel and AMD only]
 			By default the kernel will trim any uncacheable
 			memory out of your available memory pool based on

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v3
  2008-04-28 19:44   ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v3 Yinghai Lu
@ 2008-04-28 20:15     ` Ingo Molnar
  2008-04-28 20:18       ` Yinghai Lu
  2008-04-28 20:16     ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v4 Yinghai Lu
  1 sibling, 1 reply; 89+ messages in thread
From: Ingo Molnar @ 2008-04-28 20:15 UTC (permalink / raw)
  To: yhlu.kernel
  Cc: Andrew Morton, H. Peter Anvin, Thomas Gleixner,
	linux-kernel@vger.kernel.org, Gabriel C, Mika Fischer


* Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:

> some BIOS like to use continus MTRR layout, and some X driver can not 
> add WB entries for graphical cards when 4g or more RAM installed.
> 
> the patch will change MTRR to discrete.
> 
> mtrr_chunk_size= could be used to have smaller continuous block to 
> hold holes. default is 256m, could be set according to size of 
> graphics card memory.
> 
> v2: fix -1 for UC checking

> v3: default to disable, and need use enable_mtrr_cleanup to enable 
>     this feature
>     skip the var state change warning.
>     remove next_basek in range_to_mtrr()

a boot option is often inconvenient though - could you perhaps also make 
this a Kconfig option, with it defaulting to off? Something like 
CONFIG_MTRR_SANITIZE=y or so? That way distros can make a conscious 
decision as well whether they want this feature (for the Xorg they pick) 
or not.

	Ingo

^ permalink raw reply	[flat|nested] 89+ messages in thread

* [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v4
  2008-04-28 19:44   ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v3 Yinghai Lu
  2008-04-28 20:15     ` Ingo Molnar
@ 2008-04-28 20:16     ` Yinghai Lu
  2008-04-28 22:05       ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5 Yinghai Lu
  1 sibling, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-04-28 20:16 UTC (permalink / raw)
  To: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner
  Cc: linux-kernel@vger.kernel.org, Gabriel C, Mika Fischer


some BIOS like to use continus MTRR layout, and may X driver can not add
WB entries for graphical cards when 4g or more RAM installed.

the patch will change MTRR to discrete.

mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.

v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
    skip the var state change warning.
    remove next_basek in range_to_mtrr()
v4: correct warning mask.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
 	rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
 }
 
+/*  fill the MSR pair relating to a var range  */
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+	struct mtrr_var_range *vr;
+
+	vr = mtrr_state.var_ranges;
+
+	vr[index].base_lo = base_lo;
+	vr[index].base_hi = base_hi;
+	vr[index].mask_lo = mask_lo;
+	vr[index].mask_hi = mask_hi;
+}
+
 static void
 get_fixed_ranges(mtrr_type * frs)
 {
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
 #include <linux/smp.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
+#include <linux/sort.h>
 
 #include <asm/e820.h>
 #include <asm/mtrr.h>
@@ -609,6 +610,345 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
+static int __initdata enable_mtrr_cleanup;
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+	enable_mtrr_cleanup = 1;
+	return 0;
+}
+early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
+
+#define RANGE_NUM 256
+
+struct res_range {
+	size_t start;
+	size_t end;
+};
+
+static void __init subtract_range(struct res_range *range, size_t start,
+				size_t end)
+{
+	int i;
+	int j;
+
+	for (j = 0; j < RANGE_NUM; j++) {
+		if (!range[j].end)
+			continue;
+
+		if (start <= range[j].start && end >= range[j].end) {
+			range[j].start = 0;
+			range[j].end = 0;
+			continue;
+		}
+
+		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+			range[j].start = end + 1;
+			continue;
+		}
+
+
+		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+			range[j].end = start - 1;
+			continue;
+		}
+
+		if (start > range[j].start && end < range[j].end) {
+			/* find the new spare */
+			for (i = 0; i < RANGE_NUM; i++) {
+				if (range[i].end == 0)
+					break;
+			}
+			if (i < RANGE_NUM) {
+				range[i].end = range[j].end;
+				range[i].start = end + 1;
+			} else {
+				printk(KERN_ERR "run of slot in ranges\n");
+			}
+			range[j].end = start - 1;
+			continue;
+		}
+	}
+}
+
+static int __cpuinit cmp_range(const void *x1, const void *x2)
+{
+	const struct res_range *r1 = x1;
+	const struct res_range *r2 = x2;
+	s64 start1, start2;
+
+	start1 = r1->start;
+	start2 = r2->start;
+
+	return start1 - start2;
+}
+
+struct var_mtrr_state {
+	unsigned long range_startk, range_sizek;
+	unsigned long chunk_sizek;
+	unsigned int reg;
+	unsigned address_bits;
+};
+
+static void __init set_var_mtrr(
+	unsigned int reg, unsigned long basek, unsigned long sizek,
+	unsigned char type, unsigned address_bits)
+{
+	u32 base_lo, base_hi, mask_lo, mask_hi;
+	unsigned address_mask_high;
+
+	if (!sizek) {
+		fill_mtrr_var_range(reg, 0, 0, 0, 0);
+		return;
+	}
+
+	address_mask_high = ((1u << (address_bits - 32u)) - 1u);
+
+	base_hi = basek >> 22;
+	base_lo  = basek << 10;
+
+	if (sizek < 4*1024*1024) {
+		mask_hi = address_mask_high;
+		mask_lo = ~((sizek << 10) - 1);
+	} else {
+		mask_hi = address_mask_high & (~((sizek >> 22) - 1));
+		mask_lo = 0;
+	}
+
+	base_lo |= type;
+	mask_lo |= 0x800;
+	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
+}
+
+static unsigned int __init range_to_mtrr(unsigned int reg,
+	unsigned long range_startk, unsigned long range_sizek,
+	unsigned char type, unsigned address_bits)
+{
+	if (!range_sizek || (reg >= num_var_ranges))
+		return reg;
+
+	while (range_sizek) {
+		unsigned long max_align, align;
+		unsigned long sizek;
+		/* Compute the maximum size I can make a range */
+		if (range_startk)
+			max_align = ffs(range_startk) - 1;
+		else
+			max_align = 32;
+		align = fls(range_sizek) - 1;
+		if (align > max_align)
+			align = max_align;
+
+		sizek = 1 << align;
+		printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
+			reg, range_startk >> 10, sizek >> 10,
+			(type == MTRR_TYPE_UNCACHABLE)?"UC":
+			    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
+			);
+		set_var_mtrr(reg++, range_startk, sizek, type, address_bits);
+		range_startk += sizek;
+		range_sizek -= sizek;
+		if (reg >= num_var_ranges)
+			break;
+	}
+	return reg;
+}
+
+static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
+{
+	unsigned long hole_basek, hole_sizek;
+	unsigned long range0_basek, range0_sizek;
+	unsigned long range_basek, range_sizek;
+	unsigned long chunk_sizek;
+
+	hole_basek = 0;
+	hole_sizek = 0;
+	chunk_sizek = state->chunk_sizek;
+	range0_basek = state->range_startk;
+
+	/* try to append some small hole */
+	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+	if ((range0_sizek == state->range_sizek) ||
+		((range0_basek + range0_sizek > basek) && basek)) {
+			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
+			state->reg = range_to_mtrr(state->reg, range0_basek,
+				range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+		return;
+	}
+
+	range0_sizek -= chunk_sizek;
+	range_basek = range0_basek + range0_sizek;
+	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range0_basek,
+			range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+
+	range_sizek = chunk_sizek;
+	if (range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1))
+		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
+	else
+		range_sizek = state->range_sizek - range0_sizek;
+
+	printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range_basek,
+			range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+	if (hole_sizek) {
+		printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, hole_basek,
+				hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits);
+	}
+}
+
+static void __init set_var_mtrr_range(struct var_mtrr_state *state, size_t base_pfn, size_t size_pfn)
+{
+	unsigned long basek, sizek;
+
+	if (state->reg >= num_var_ranges)
+		return;
+
+	basek = base_pfn << (PAGE_SHIFT - 10);
+	sizek = size_pfn << (PAGE_SHIFT - 10);
+
+	/* See if I can merge with the last range */
+	if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) {
+		unsigned long endk = basek + sizek;
+		state->range_sizek = endk - state->range_startk;
+		return;
+	}
+	/* Write the range mtrrs */
+	if (state->range_sizek != 0) {
+		range_to_mtrr_with_hole(state, basek);
+
+		state->range_startk = 0;
+		state->range_sizek = 0;
+	}
+	/* Allocate an msr */
+	state->range_startk = basek;
+	state->range_sizek  = sizek;
+}
+
+static u64 mtrr_chunk_size __initdata = (256ULL<<20);
+
+static int __init parse_mtrr_chunk_size_opt(char *p)
+{
+	if (!p)
+		return -EINVAL;
+	mtrr_chunk_size = memparse(p, &p);
+	return 0;
+}
+early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
+
+static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, unsigned address_bits)
+{
+	struct var_mtrr_state var_state;
+	int i;
+
+	var_state.range_startk = 0;
+	var_state.range_sizek = 0;
+	var_state.reg = 0;
+	var_state.address_bits = address_bits;
+	var_state.chunk_sizek = mtrr_chunk_size >> 10;
+
+	/* Write the range etc */
+	for (i = 0; i < nr_range; i++)
+		set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1);
+
+	/* Write the last range */
+	range_to_mtrr_with_hole(&var_state, 0);
+	printk(KERN_INFO "DONE variable MTRRs\n");
+	/* Clear out the extra MTRR's */
+	while (var_state.reg < num_var_ranges)
+		set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits);
+}
+
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+	unsigned long i, base, size, def, dummy;
+	mtrr_type type;
+	struct res_range range[RANGE_NUM];
+	int nr_range;
+
+	/* extra one for all 0 */
+	int num[MTRR_NUM_TYPES + 1];
+
+	if (!is_cpu(INTEL) || !enable_mtrr_cleanup)
+		return 0;
+	rdmsr(MTRRdefType_MSR, def, dummy);
+	def &= 0xff;
+	if (def != MTRR_TYPE_UNCACHABLE)
+		return 0;
+
+	/* check entries number */
+	memset(num, 0, sizeof(num));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type >= MTRR_NUM_TYPES)
+			continue;
+		if (!size)
+			type = MTRR_NUM_TYPES;
+		num[type]++;
+	}
+
+	/* check if we got UC entries */
+	if (!num[MTRR_TYPE_UNCACHABLE])
+		return 0;
+
+	/* check if we only had WB and UC */
+	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+		num_var_ranges - num[MTRR_NUM_TYPES])
+		return 0;
+
+	/*
+	 * get WB ranges at first
+	 * assume BIOS don't give us overlapping WB entries
+	 * or add add_range?
+	 */
+	memset(range, 0, sizeof(range));
+	nr_range = 0;
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		range[nr_range].start = base;
+		range[nr_range].end = base + size - 1;
+		nr_range++;
+	}
+	printk(KERN_INFO "After WB checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* take out UC ranges */
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_UNCACHABLE)
+			continue;
+		if (!size)
+			continue;
+		subtract_range(range, base, base + size - 1);
+	}
+	/* get new range num */
+	nr_range = 0;
+	for (i = 0; i < RANGE_NUM; i++) {
+		if (!range[i].end)
+			continue;
+		nr_range++;
+	}
+	printk(KERN_INFO "After UC checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	printk(KERN_INFO "After sorting\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* convert ranges to var ranges state */
+	x86_setup_var_mtrrs(range, nr_range, address_bits);
+
+	return 1;
+
+}
+
 static int disable_mtrr_trim;
 
 static int __init disable_mtrr_trim_setup(char *str)
@@ -729,18 +1069,21 @@ int __init mtrr_trim_uncached_memory(uns
  */
 void __init mtrr_bp_init(void)
 {
+	u32 phys_addr;
 	init_ifs();
 
+	phys_addr = 32;
+
 	if (cpu_has_mtrr) {
 		mtrr_if = &generic_mtrr_ops;
 		size_or_mask = 0xff000000;	/* 36 bits */
 		size_and_mask = 0x00f00000;
+		phys_addr = 36;
 
 		/* This is an AMD specific MSR, but we assume(hope?) that
 		   Intel will implement it to when they extend the address
 		   bus of the Xeon. */
 		if (cpuid_eax(0x80000000) >= 0x80000008) {
-			u32 phys_addr;
 			phys_addr = cpuid_eax(0x80000008) & 0xff;
 			/* CPUID workaround for Intel 0F33/0F34 CPU */
 			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -758,6 +1101,7 @@ void __init mtrr_bp_init(void)
 			   don't support PAE */
 			size_or_mask = 0xfff00000;	/* 32 bits */
 			size_and_mask = 0;
+			phys_addr = 32;
 		}
 	} else {
 		switch (boot_cpu_data.x86_vendor) {
@@ -791,8 +1135,13 @@ void __init mtrr_bp_init(void)
 	if (mtrr_if) {
 		set_num_var_ranges();
 		init_table();
-		if (use_intel())
+		if (use_intel()) {
 			get_mtrr_state();
+
+			if (mtrr_cleanup(phys_addr))
+				mtrr_if->set_all();
+
+		}
 	}
 }
 
@@ -829,9 +1178,10 @@ static int __init mtrr_init_finialize(vo
 {
 	if (!mtrr_if)
 		return 0;
-	if (use_intel())
-		mtrr_state_warn();
-	else {
+	if (use_intel()) {
+		if (!enable_mtrr_cleanup)
+			mtrr_state_warn();
+	} else {
 		/* The CPUs haven't MTRR and seem to not support SMP. They have
 		 * specific drivers, we use a tricky method to support
 		 * suspend/resume for them.
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/mtrr.h
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_conte
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
 void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
 
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
 void get_mtrr_state(void);
 
 extern void set_mtrr_ops(struct mtrr_ops * ops);
Index: linux-2.6/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.orig/Documentation/kernel-parameters.txt
+++ linux-2.6/Documentation/kernel-parameters.txt
@@ -595,6 +595,16 @@ and is between 256 and 4096 characters. 
 			See drivers/char/README.epca and
 			Documentation/digiepca.txt.
 
+	enable_mtrr_cleanup [X86]
+			By default the kernel will not touch mtrr. But we may
+			need adjust mtrr from continuous to discrete layout,
+			to make x server driver could add WB entry later. This
+			parameter enable that behavior, will touch MTRRs.
+
+	mtrr_chunk_size=nn[KMG] [X86]
+			used for mtrr cleanup. It is largest continous chunk
+			that could hold holes aka. UC entries.
+
 	disable_mtrr_trim [X86, Intel and AMD only]
 			By default the kernel will trim any uncacheable
 			memory out of your available memory pool based on

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v3
  2008-04-28 20:15     ` Ingo Molnar
@ 2008-04-28 20:18       ` Yinghai Lu
  2008-04-28 20:29         ` Ingo Molnar
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-04-28 20:18 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, H. Peter Anvin, Thomas Gleixner,
	linux-kernel@vger.kernel.org, Gabriel C, Mika Fischer

On Mon, Apr 28, 2008 at 1:15 PM, Ingo Molnar <mingo@elte.hu> wrote:
>
>  * Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:
>
>  > some BIOS like to use continus MTRR layout, and some X driver can not
>  > add WB entries for graphical cards when 4g or more RAM installed.
>  >
>  > the patch will change MTRR to discrete.
>  >
>  > mtrr_chunk_size= could be used to have smaller continuous block to
>  > hold holes. default is 256m, could be set according to size of
>  > graphics card memory.
>  >
>  > v2: fix -1 for UC checking
>
>  > v3: default to disable, and need use enable_mtrr_cleanup to enable
>  >     this feature
>  >     skip the var state change warning.
>  >     remove next_basek in range_to_mtrr()
>
>  a boot option is often inconvenient though - could you perhaps also make
>  this a Kconfig option, with it defaulting to off? Something like
>  CONFIG_MTRR_SANITIZE=y or so? That way distros can make a conscious
>  decision as well whether they want this feature (for the Xorg they pick)
>  or not.

use disable_mtrr_cleanup and CONFIG_MTRR_SANITIZER?

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v3
  2008-04-28 20:18       ` Yinghai Lu
@ 2008-04-28 20:29         ` Ingo Molnar
  0 siblings, 0 replies; 89+ messages in thread
From: Ingo Molnar @ 2008-04-28 20:29 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, H. Peter Anvin, Thomas Gleixner,
	linux-kernel@vger.kernel.org, Gabriel C, Mika Fischer


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> On Mon, Apr 28, 2008 at 1:15 PM, Ingo Molnar <mingo@elte.hu> wrote:
> >
> >  * Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:
> >
> >  > some BIOS like to use continus MTRR layout, and some X driver can not
> >  > add WB entries for graphical cards when 4g or more RAM installed.
> >  >
> >  > the patch will change MTRR to discrete.
> >  >
> >  > mtrr_chunk_size= could be used to have smaller continuous block to
> >  > hold holes. default is 256m, could be set according to size of
> >  > graphics card memory.
> >  >
> >  > v2: fix -1 for UC checking
> >
> >  > v3: default to disable, and need use enable_mtrr_cleanup to enable
> >  >     this feature
> >  >     skip the var state change warning.
> >  >     remove next_basek in range_to_mtrr()
> >
> >  a boot option is often inconvenient though - could you perhaps also make
> >  this a Kconfig option, with it defaulting to off? Something like
> >  CONFIG_MTRR_SANITIZE=y or so? That way distros can make a conscious
> >  decision as well whether they want this feature (for the Xorg they pick)
> >  or not.
> 
> use disable_mtrr_cleanup and CONFIG_MTRR_SANITIZER?

yeah, please keep both.

in general, the best configurability for any particular new kernel 
feature is a trio of options:

  - .config option (disabled/boot-enabled/boot-disabled)
  - sysctl option
  - boot option

since your new feature runs during early bootup the sysctl vector is 
meaningless - that leaves the boot option and the .config. The .config 
can have 3 states:

   completely-disabled
   disabled-but-boot-option-enable-able
   enabled-but-boot-option-disable-able

depending on how the feature works out, people and distros will 
gravitate towards one of these combinations. We usually do not know it 
ahead of time which one that will be.

	Ingo

^ permalink raw reply	[flat|nested] 89+ messages in thread

* [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5
  2008-04-28 20:16     ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v4 Yinghai Lu
@ 2008-04-28 22:05       ` Yinghai Lu
  2008-04-28 22:36         ` Randy Dunlap
                           ` (4 more replies)
  0 siblings, 5 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-28 22:05 UTC (permalink / raw)
  To: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C
  Cc: linux-kernel@vger.kernel.org, Mika Fischer


some BIOS like to use continus MTRR layout, and may X driver can not add
WB entries for graphical cards when 4g or more RAM installed.

the patch will change MTRR to discrete.

mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.

v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
    skip the var state change warning.
    remove next_basek in range_to_mtrr()
v4: correct warning mask.
v5: CONFIG_MTRR_SANITIZER

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
 	rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
 }
 
+/*  fill the MSR pair relating to a var range  */
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+	struct mtrr_var_range *vr;
+
+	vr = mtrr_state.var_ranges;
+
+	vr[index].base_lo = base_lo;
+	vr[index].base_hi = base_hi;
+	vr[index].mask_lo = mask_lo;
+	vr[index].mask_hi = mask_hi;
+}
+
 static void
 get_fixed_ranges(mtrr_type * frs)
 {
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
 #include <linux/smp.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
+#include <linux/sort.h>
 
 #include <asm/e820.h>
 #include <asm/mtrr.h>
@@ -609,6 +610,366 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
+#ifdef CONFIG_MTRR_SANITIZER
+
+#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT
+static int enable_mtrr_cleanup __initdata = 1;
+#else
+static int enable_mtrr_cleanup __initdata;
+#endif
+
+#else
+
+static int enable_mtrr_cleanup __initdata = -1;
+
+#endif
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 0;
+	return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 1;
+	return 0;
+}
+early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
+
+#define RANGE_NUM 256
+
+struct res_range {
+	size_t start;
+	size_t end;
+};
+
+static void __init subtract_range(struct res_range *range, size_t start,
+				size_t end)
+{
+	int i;
+	int j;
+
+	for (j = 0; j < RANGE_NUM; j++) {
+		if (!range[j].end)
+			continue;
+
+		if (start <= range[j].start && end >= range[j].end) {
+			range[j].start = 0;
+			range[j].end = 0;
+			continue;
+		}
+
+		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+			range[j].start = end + 1;
+			continue;
+		}
+
+
+		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+			range[j].end = start - 1;
+			continue;
+		}
+
+		if (start > range[j].start && end < range[j].end) {
+			/* find the new spare */
+			for (i = 0; i < RANGE_NUM; i++) {
+				if (range[i].end == 0)
+					break;
+			}
+			if (i < RANGE_NUM) {
+				range[i].end = range[j].end;
+				range[i].start = end + 1;
+			} else {
+				printk(KERN_ERR "run of slot in ranges\n");
+			}
+			range[j].end = start - 1;
+			continue;
+		}
+	}
+}
+
+static int __cpuinit cmp_range(const void *x1, const void *x2)
+{
+	const struct res_range *r1 = x1;
+	const struct res_range *r2 = x2;
+	s64 start1, start2;
+
+	start1 = r1->start;
+	start2 = r2->start;
+
+	return start1 - start2;
+}
+
+struct var_mtrr_state {
+	unsigned long range_startk, range_sizek;
+	unsigned long chunk_sizek;
+	unsigned int reg;
+	unsigned address_bits;
+};
+
+static void __init set_var_mtrr(
+	unsigned int reg, unsigned long basek, unsigned long sizek,
+	unsigned char type, unsigned address_bits)
+{
+	u32 base_lo, base_hi, mask_lo, mask_hi;
+	unsigned address_mask_high;
+
+	if (!sizek) {
+		fill_mtrr_var_range(reg, 0, 0, 0, 0);
+		return;
+	}
+
+	address_mask_high = ((1u << (address_bits - 32u)) - 1u);
+
+	base_hi = basek >> 22;
+	base_lo  = basek << 10;
+
+	if (sizek < 4*1024*1024) {
+		mask_hi = address_mask_high;
+		mask_lo = ~((sizek << 10) - 1);
+	} else {
+		mask_hi = address_mask_high & (~((sizek >> 22) - 1));
+		mask_lo = 0;
+	}
+
+	base_lo |= type;
+	mask_lo |= 0x800;
+	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
+}
+
+static unsigned int __init range_to_mtrr(unsigned int reg,
+	unsigned long range_startk, unsigned long range_sizek,
+	unsigned char type, unsigned address_bits)
+{
+	if (!range_sizek || (reg >= num_var_ranges))
+		return reg;
+
+	while (range_sizek) {
+		unsigned long max_align, align;
+		unsigned long sizek;
+		/* Compute the maximum size I can make a range */
+		if (range_startk)
+			max_align = ffs(range_startk) - 1;
+		else
+			max_align = 32;
+		align = fls(range_sizek) - 1;
+		if (align > max_align)
+			align = max_align;
+
+		sizek = 1 << align;
+		printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
+			reg, range_startk >> 10, sizek >> 10,
+			(type == MTRR_TYPE_UNCACHABLE)?"UC":
+			    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
+			);
+		set_var_mtrr(reg++, range_startk, sizek, type, address_bits);
+		range_startk += sizek;
+		range_sizek -= sizek;
+		if (reg >= num_var_ranges)
+			break;
+	}
+	return reg;
+}
+
+static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
+{
+	unsigned long hole_basek, hole_sizek;
+	unsigned long range0_basek, range0_sizek;
+	unsigned long range_basek, range_sizek;
+	unsigned long chunk_sizek;
+
+	hole_basek = 0;
+	hole_sizek = 0;
+	chunk_sizek = state->chunk_sizek;
+	range0_basek = state->range_startk;
+
+	/* try to append some small hole */
+	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+	if ((range0_sizek == state->range_sizek) ||
+		((range0_basek + range0_sizek > basek) && basek)) {
+			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
+			state->reg = range_to_mtrr(state->reg, range0_basek,
+				range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+		return;
+	}
+
+	range0_sizek -= chunk_sizek;
+	range_basek = range0_basek + range0_sizek;
+	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range0_basek,
+			range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+
+	range_sizek = chunk_sizek;
+	if (range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1))
+		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
+	else
+		range_sizek = state->range_sizek - range0_sizek;
+
+	printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range_basek,
+			range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+	if (hole_sizek) {
+		printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, hole_basek,
+				hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits);
+	}
+}
+
+static void __init set_var_mtrr_range(struct var_mtrr_state *state, size_t base_pfn, size_t size_pfn)
+{
+	unsigned long basek, sizek;
+
+	if (state->reg >= num_var_ranges)
+		return;
+
+	basek = base_pfn << (PAGE_SHIFT - 10);
+	sizek = size_pfn << (PAGE_SHIFT - 10);
+
+	/* See if I can merge with the last range */
+	if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) {
+		unsigned long endk = basek + sizek;
+		state->range_sizek = endk - state->range_startk;
+		return;
+	}
+	/* Write the range mtrrs */
+	if (state->range_sizek != 0) {
+		range_to_mtrr_with_hole(state, basek);
+
+		state->range_startk = 0;
+		state->range_sizek = 0;
+	}
+	/* Allocate an msr */
+	state->range_startk = basek;
+	state->range_sizek  = sizek;
+}
+
+static u64 mtrr_chunk_size __initdata = (256ULL<<20);
+
+static int __init parse_mtrr_chunk_size_opt(char *p)
+{
+	if (!p)
+		return -EINVAL;
+	mtrr_chunk_size = memparse(p, &p);
+	return 0;
+}
+early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
+
+static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, unsigned address_bits)
+{
+	struct var_mtrr_state var_state;
+	int i;
+
+	var_state.range_startk = 0;
+	var_state.range_sizek = 0;
+	var_state.reg = 0;
+	var_state.address_bits = address_bits;
+	var_state.chunk_sizek = mtrr_chunk_size >> 10;
+
+	/* Write the range etc */
+	for (i = 0; i < nr_range; i++)
+		set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1);
+
+	/* Write the last range */
+	range_to_mtrr_with_hole(&var_state, 0);
+	printk(KERN_INFO "DONE variable MTRRs\n");
+	/* Clear out the extra MTRR's */
+	while (var_state.reg < num_var_ranges)
+		set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits);
+}
+
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+	unsigned long i, base, size, def, dummy;
+	mtrr_type type;
+	struct res_range range[RANGE_NUM];
+	int nr_range;
+
+	/* extra one for all 0 */
+	int num[MTRR_NUM_TYPES + 1];
+
+	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
+		return 0;
+	rdmsr(MTRRdefType_MSR, def, dummy);
+	def &= 0xff;
+	if (def != MTRR_TYPE_UNCACHABLE)
+		return 0;
+
+	/* check entries number */
+	memset(num, 0, sizeof(num));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type >= MTRR_NUM_TYPES)
+			continue;
+		if (!size)
+			type = MTRR_NUM_TYPES;
+		num[type]++;
+	}
+
+	/* check if we got UC entries */
+	if (!num[MTRR_TYPE_UNCACHABLE])
+		return 0;
+
+	/* check if we only had WB and UC */
+	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+		num_var_ranges - num[MTRR_NUM_TYPES])
+		return 0;
+
+	/*
+	 * get WB ranges at first
+	 * assume BIOS don't give us overlapping WB entries
+	 * or add add_range?
+	 */
+	memset(range, 0, sizeof(range));
+	nr_range = 0;
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		range[nr_range].start = base;
+		range[nr_range].end = base + size - 1;
+		nr_range++;
+	}
+	printk(KERN_INFO "After WB checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* take out UC ranges */
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_UNCACHABLE)
+			continue;
+		if (!size)
+			continue;
+		subtract_range(range, base, base + size - 1);
+	}
+	/* get new range num */
+	nr_range = 0;
+	for (i = 0; i < RANGE_NUM; i++) {
+		if (!range[i].end)
+			continue;
+		nr_range++;
+	}
+	printk(KERN_INFO "After UC checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	printk(KERN_INFO "After sorting\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* convert ranges to var ranges state */
+	x86_setup_var_mtrrs(range, nr_range, address_bits);
+
+	return 1;
+
+}
+
 static int disable_mtrr_trim;
 
 static int __init disable_mtrr_trim_setup(char *str)
@@ -729,18 +1090,21 @@ int __init mtrr_trim_uncached_memory(uns
  */
 void __init mtrr_bp_init(void)
 {
+	u32 phys_addr;
 	init_ifs();
 
+	phys_addr = 32;
+
 	if (cpu_has_mtrr) {
 		mtrr_if = &generic_mtrr_ops;
 		size_or_mask = 0xff000000;	/* 36 bits */
 		size_and_mask = 0x00f00000;
+		phys_addr = 36;
 
 		/* This is an AMD specific MSR, but we assume(hope?) that
 		   Intel will implement it to when they extend the address
 		   bus of the Xeon. */
 		if (cpuid_eax(0x80000000) >= 0x80000008) {
-			u32 phys_addr;
 			phys_addr = cpuid_eax(0x80000008) & 0xff;
 			/* CPUID workaround for Intel 0F33/0F34 CPU */
 			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -758,6 +1122,7 @@ void __init mtrr_bp_init(void)
 			   don't support PAE */
 			size_or_mask = 0xfff00000;	/* 32 bits */
 			size_and_mask = 0;
+			phys_addr = 32;
 		}
 	} else {
 		switch (boot_cpu_data.x86_vendor) {
@@ -791,8 +1156,13 @@ void __init mtrr_bp_init(void)
 	if (mtrr_if) {
 		set_num_var_ranges();
 		init_table();
-		if (use_intel())
+		if (use_intel()) {
 			get_mtrr_state();
+
+			if (mtrr_cleanup(phys_addr))
+				mtrr_if->set_all();
+
+		}
 	}
 }
 
@@ -829,9 +1199,10 @@ static int __init mtrr_init_finialize(vo
 {
 	if (!mtrr_if)
 		return 0;
-	if (use_intel())
-		mtrr_state_warn();
-	else {
+	if (use_intel()) {
+		if (enable_mtrr_cleanup < 1)
+			mtrr_state_warn();
+	} else {
 		/* The CPUs haven't MTRR and seem to not support SMP. They have
 		 * specific drivers, we use a tricky method to support
 		 * suspend/resume for them.
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/mtrr.h
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_conte
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
 void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
 
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
 void get_mtrr_state(void);
 
 extern void set_mtrr_ops(struct mtrr_ops * ops);
Index: linux-2.6/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.orig/Documentation/kernel-parameters.txt
+++ linux-2.6/Documentation/kernel-parameters.txt
@@ -595,6 +595,16 @@ and is between 256 and 4096 characters. 
 			See drivers/char/README.epca and
 			Documentation/digiepca.txt.
 
+	disable_mtrr_cleanup [X86]
+	enable_mtrr_cleanup [X86]
+			The kernel try to adjust MTRR layout from continuous
+			to discrete, to make x server driver could add WB entry
+			later. This parameter enable/disable that.
+
+	mtrr_chunk_size=nn[KMG] [X86]
+			used for mtrr cleanup. It is largest continous chunk
+			that could hold holes aka. UC entries.
+
 	disable_mtrr_trim [X86, Intel and AMD only]
 			By default the kernel will trim any uncacheable
 			memory out of your available memory pool based on
Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig
+++ linux-2.6/arch/x86/Kconfig
@@ -1035,6 +1035,32 @@ config MTRR
 
 	  See <file:Documentation/mtrr.txt> for more information.
 
+config MTRR_SANITIZER
+	def_bool y
+	prompt "MTRR cleanup support"
+	depends on MTRR
+	help
+	  Convert MTRR layout from continuous to discrete, so some X driver
+	  could add WB entries.
+
+	  Say N here if you see bootup problems (boot crash, boot hang,
+	  spontaneous reboots).
+
+	  could be disabled with disable_mtrr_cleanup. also mtrr_chunk_size
+	  could be used to send largest mtrr entry size for continuous block
+	  to hold holes (aka. UC entries)
+
+	  If unsure, say Y.
+
+config MTRR_SANITIZER_ENABLE_DEFAULT
+	bool
+	prompt "MTRR cleanup enable default"
+	depends on MTRR_SANITIZER
+	help
+	  Enable mtrr cleanup by default
+
+	  If unsure, say Y.
+
 config X86_PAT
 	bool
 	prompt "x86 PAT support"

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5
  2008-04-28 22:05       ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5 Yinghai Lu
@ 2008-04-28 22:36         ` Randy Dunlap
  2008-04-28 22:47           ` Yinghai Lu
  2008-04-29  2:42         ` Andrew Morton
                           ` (3 subsequent siblings)
  4 siblings, 1 reply; 89+ messages in thread
From: Randy Dunlap @ 2008-04-28 22:36 UTC (permalink / raw)
  To: yhlu.kernel
  Cc: Yinghai Lu, Andrew Morton, Ingo Molnar, H. Peter Anvin,
	Thomas Gleixner, Gabriel C, linux-kernel@vger.kernel.org,
	Mika Fischer

On Mon, 28 Apr 2008 15:05:05 -0700 Yinghai Lu wrote:

> Index: linux-2.6/Documentation/kernel-parameters.txt
> ===================================================================
> --- linux-2.6.orig/Documentation/kernel-parameters.txt
> +++ linux-2.6/Documentation/kernel-parameters.txt
> @@ -595,6 +595,16 @@ and is between 256 and 4096 characters. 
>  			See drivers/char/README.epca and
>  			Documentation/digiepca.txt.
>  
> +	disable_mtrr_cleanup [X86]
> +	enable_mtrr_cleanup [X86]
> +			The kernel try to adjust MTRR layout from continuous

                                   tries

> +			to discrete, to make x server driver could add WB entry

                                             X server driver able to add WB entry

> +			later. This parameter enable/disable that.

                                              enables/disables

> +
> +	mtrr_chunk_size=nn[KMG] [X86]
> +			used for mtrr cleanup. It is largest continous chunk
> +			that could hold holes aka. UC entries.
> +
>  	disable_mtrr_trim [X86, Intel and AMD only]
>  			By default the kernel will trim any uncacheable
>  			memory out of your available memory pool based on
> Index: linux-2.6/arch/x86/Kconfig
> ===================================================================
> --- linux-2.6.orig/arch/x86/Kconfig
> +++ linux-2.6/arch/x86/Kconfig
> @@ -1035,6 +1035,32 @@ config MTRR
>  
>  	  See <file:Documentation/mtrr.txt> for more information.
>  
> +config MTRR_SANITIZER
> +	def_bool y
> +	prompt "MTRR cleanup support"
> +	depends on MTRR
> +	help
> +	  Convert MTRR layout from continuous to discrete, so some X driver
> +	  could add WB entries.
> +
> +	  Say N here if you see bootup problems (boot crash, boot hang,
> +	  spontaneous reboots).
> +
> +	  could be disabled with disable_mtrr_cleanup. also mtrr_chunk_size

          Could                                        Also

> +	  could be used to send largest mtrr entry size for continuous block
> +	  to hold holes (aka. UC entries)
> +
> +	  If unsure, say Y.
> +
> +config MTRR_SANITIZER_ENABLE_DEFAULT
> +	bool
> +	prompt "MTRR cleanup enable default"

	prompt "Enable MTRR cleanup"
or
	prompt "Enable MTRR cleanup by default"

> +	depends on MTRR_SANITIZER
> +	help
> +	  Enable mtrr cleanup by default

                                 default.

> +
> +	  If unsure, say Y.
> +
>  config X86_PAT
>  	bool
>  	prompt "x86 PAT support"

---
~Randy

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5
  2008-04-28 22:36         ` Randy Dunlap
@ 2008-04-28 22:47           ` Yinghai Lu
  0 siblings, 0 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-28 22:47 UTC (permalink / raw)
  To: Randy Dunlap
  Cc: Yinghai Lu, Andrew Morton, Ingo Molnar, H. Peter Anvin,
	Thomas Gleixner, Gabriel C, linux-kernel@vger.kernel.org,
	Mika Fischer

On Mon, Apr 28, 2008 at 3:36 PM, Randy Dunlap <randy.dunlap@oracle.com> wrote:
> On Mon, 28 Apr 2008 15:05:05 -0700 Yinghai Lu wrote:
>
>  > Index: linux-2.6/Documentation/kernel-parameters.txt
>  > ===================================================================
>  > --- linux-2.6.orig/Documentation/kernel-parameters.txt
>  > +++ linux-2.6/Documentation/kernel-parameters.txt
>  > @@ -595,6 +595,16 @@ and is between 256 and 4096 characters.
>  >                       See drivers/char/README.epca and
>  >                       Documentation/digiepca.txt.
>  >
>  > +     disable_mtrr_cleanup [X86]
>  > +     enable_mtrr_cleanup [X86]
>  > +                     The kernel try to adjust MTRR layout from continuous
>
>                                    tries
>
>
>  > +                     to discrete, to make x server driver could add WB entry
>
>                                              X server driver able to add WB entry
>
>
>  > +                     later. This parameter enable/disable that.
>
>                                               enables/disables
>
>
>  > +
>  > +     mtrr_chunk_size=nn[KMG] [X86]
>  > +                     used for mtrr cleanup. It is largest continous chunk
>  > +                     that could hold holes aka. UC entries.
>  > +
>  >       disable_mtrr_trim [X86, Intel and AMD only]
>  >                       By default the kernel will trim any uncacheable
>  >                       memory out of your available memory pool based on
>  > Index: linux-2.6/arch/x86/Kconfig
>  > ===================================================================
>  > --- linux-2.6.orig/arch/x86/Kconfig
>  > +++ linux-2.6/arch/x86/Kconfig
>  > @@ -1035,6 +1035,32 @@ config MTRR
>  >
>  >         See <file:Documentation/mtrr.txt> for more information.
>  >
>  > +config MTRR_SANITIZER
>  > +     def_bool y
>  > +     prompt "MTRR cleanup support"
>  > +     depends on MTRR
>  > +     help
>  > +       Convert MTRR layout from continuous to discrete, so some X driver
>  > +       could add WB entries.
>  > +
>  > +       Say N here if you see bootup problems (boot crash, boot hang,
>  > +       spontaneous reboots).
>  > +
>  > +       could be disabled with disable_mtrr_cleanup. also mtrr_chunk_size
>
>           Could                                        Also
>
>
>  > +       could be used to send largest mtrr entry size for continuous block
>  > +       to hold holes (aka. UC entries)
>  > +
>  > +       If unsure, say Y.
>  > +
>  > +config MTRR_SANITIZER_ENABLE_DEFAULT
>  > +     bool
>  > +     prompt "MTRR cleanup enable default"
>
>         prompt "Enable MTRR cleanup"
>  or
>         prompt "Enable MTRR cleanup by default"
>
>
>  > +     depends on MTRR_SANITIZER
>  > +     help
>  > +       Enable mtrr cleanup by default
>
>                                  default.
>
>  > +
>  > +       If unsure, say Y.
>  > +
>  >  config X86_PAT
>  >       bool
>  >       prompt "x86 PAT support"

thanks.

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5
  2008-04-28 22:05       ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5 Yinghai Lu
  2008-04-28 22:36         ` Randy Dunlap
@ 2008-04-29  2:42         ` Andrew Morton
  2008-04-29  3:01           ` Yinghai Lu
       [not found]         ` <200804290157.30651.yhlu.kernel@gmail.com>
                           ` (2 subsequent siblings)
  4 siblings, 1 reply; 89+ messages in thread
From: Andrew Morton @ 2008-04-29  2:42 UTC (permalink / raw)
  To: yhlu.kernel
  Cc: Yinghai Lu, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, linux-kernel@vger.kernel.org, Mika Fischer

On Mon, 28 Apr 2008 15:05:05 -0700 Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:

> 
> some BIOS like to use continus MTRR layout, and may X driver can not add
> WB entries for graphical cards when 4g or more RAM installed.
> 
> the patch will change MTRR to discrete.
> 
> mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
> default is 256m, could be set according to size of graphics card memory.
> 
> v2: fix -1 for UC checking
> v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>     skip the var state change warning.
>     remove next_basek in range_to_mtrr()
> v4: correct warning mask.
> v5: CONFIG_MTRR_SANITIZER
> 
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

> +#ifdef CONFIG_MTRR_SANITIZER
> +
> +#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT

I don't think these newly-added config items should exist, sorry.  But
then, the changelog does't describe _why_ they exist (it should!) and I
probably missed it in the discusson.

Anyone who distributes a kernel will need to enable both
CONFIG_MTRR_SANITIZER and CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT, so the
config items are only useful for saving a bit of kernel text in custom
kernel builds.

> +static int enable_mtrr_cleanup __initdata = 1;
> +#else
> +static int enable_mtrr_cleanup __initdata;

The disable_mtrr_cleanup and enable_mtrr_cleanup boot options are also
problematic.  We really really want this stuff to all happen automatically.

What happens with this sort of thing is that people's machines misbehave
and I expect most of them never find out about the magic option.  They
give up on Linux or use a different computer or use a different distro
which happened to set the option the other way, etc, etc.  Some people will
think to do a bit of googling and might stumble across the option after a
while.

It's all rather user-unfriendly and we should try really hard to just make
things work.  Is this at all possible?


Anyway.  I think the problem which you have identified is solveable in
userspace, isn't it?  Read the existing mtrr settings and rewrite them in a
better form?  If so, we could prepare a little program which does that and
make the X people and distributors aware of it.  This has the significant
advantage that it will fix pre-2.6.26 kernels too.

> +#endif
> +
> +#else
> +
> +static int enable_mtrr_cleanup __initdata = -1;
> +
> +#endif
> +
> +static int __init disable_mtrr_cleanup_setup(char *str)
> +{
> +	if (enable_mtrr_cleanup != -1)
> +		enable_mtrr_cleanup = 0;
> +	return 0;
> +}
> +early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
> +
> +static int __init enable_mtrr_cleanup_setup(char *str)
> +{
> +	if (enable_mtrr_cleanup != -1)
> +		enable_mtrr_cleanup = 1;
> +	return 0;
> +}
> +early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
> +
> +#define RANGE_NUM 256
> +
> +struct res_range {
> +	size_t start;
> +	size_t end;
> +};

size_t is a surprising choice of type.

> +static void __init subtract_range(struct res_range *range, size_t start,
> +				size_t end)
> +{
> +	int i;
> +	int j;
> +
> +	for (j = 0; j < RANGE_NUM; j++) {
> +		if (!range[j].end)
> +			continue;
> +
> +		if (start <= range[j].start && end >= range[j].end) {
> +			range[j].start = 0;
> +			range[j].end = 0;
> +			continue;
> +		}
> +
> +		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {

We prefer that code remain within 0 columns, please.

> +			range[j].start = end + 1;
> +			continue;
> +		}
> +
> +
> +		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
> +			range[j].end = start - 1;
> +			continue;
> +		}
> +
> +		if (start > range[j].start && end < range[j].end) {
> +			/* find the new spare */
> +			for (i = 0; i < RANGE_NUM; i++) {
> +				if (range[i].end == 0)
> +					break;
> +			}
> +			if (i < RANGE_NUM) {
> +				range[i].end = range[j].end;
> +				range[i].start = end + 1;
> +			} else {
> +				printk(KERN_ERR "run of slot in ranges\n");
> +			}
> +			range[j].end = start - 1;
> +			continue;
> +		}
> +	}
> +}
> +
> +static int __cpuinit cmp_range(const void *x1, const void *x2)

You wanted __init here.



^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5
  2008-04-29  2:42         ` Andrew Morton
@ 2008-04-29  3:01           ` Yinghai Lu
  0 siblings, 0 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-29  3:01 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Yinghai Lu, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, linux-kernel@vger.kernel.org, Mika Fischer

On Mon, Apr 28, 2008 at 7:42 PM, Andrew Morton
<akpm@linux-foundation.org> wrote:
> On Mon, 28 Apr 2008 15:05:05 -0700 Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:
>
>  >
>  > some BIOS like to use continus MTRR layout, and may X driver can not add
>  > WB entries for graphical cards when 4g or more RAM installed.
>  >
>  > the patch will change MTRR to discrete.
>  >
>  > mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
>  > default is 256m, could be set according to size of graphics card memory.
>  >
>  > v2: fix -1 for UC checking
>  > v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>  >     skip the var state change warning.
>  >     remove next_basek in range_to_mtrr()
>  > v4: correct warning mask.
>  > v5: CONFIG_MTRR_SANITIZER
>  >
>  > Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
>
>
> > +#ifdef CONFIG_MTRR_SANITIZER
>  > +
>  > +#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT
>
>  I don't think these newly-added config items should exist, sorry.  But
>  then, the changelog does't describe _why_ they exist (it should!) and I
>  probably missed it in the discusson.
>
>  Anyone who distributes a kernel will need to enable both
>  CONFIG_MTRR_SANITIZER and CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT, so the
>  config items are only useful for saving a bit of kernel text in custom
>  kernel builds.
>
>
>  > +static int enable_mtrr_cleanup __initdata = 1;
>  > +#else
>  > +static int enable_mtrr_cleanup __initdata;
>
>  The disable_mtrr_cleanup and enable_mtrr_cleanup boot options are also
>  problematic.  We really really want this stuff to all happen automatically.
>
>  What happens with this sort of thing is that people's machines misbehave
>  and I expect most of them never find out about the magic option.  They
>  give up on Linux or use a different computer or use a different distro
>  which happened to set the option the other way, etc, etc.  Some people will
>  think to do a bit of googling and might stumble across the option after a
>  while.
>
>  It's all rather user-unfriendly and we should try really hard to just make
>  things work.  Is this at all possible?

Eric or Andi said it is too risky to touch mtrr.

>
>
>  Anyway.  I think the problem which you have identified is solveable in
>  userspace, isn't it?  Read the existing mtrr settings and rewrite them in a
>  better form?  If so, we could prepare a little program which does that and
>  make the X people and distributors aware of it.  This has the significant
>  advantage that it will fix pre-2.6.26 kernels too.

sounds good.

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* [PATCH 2/2] x86: fix trimming e820 with MTRR holes.
       [not found]         ` <200804290157.30651.yhlu.kernel@gmail.com>
@ 2008-04-29  8:59           ` Yinghai Lu
  2008-04-29 11:35             ` Ingo Molnar
  2008-04-30  3:25             ` [PATCH] x86: fix trimming e820 with MTRR holes. - fix Yinghai Lu
  0 siblings, 2 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-29  8:59 UTC (permalink / raw)
  To: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, Mika Fischer
  Cc: linux-kernel@vger.kernel.org


converting MTRR layout from continous to discrete, some time could run out of
MTRRs. So add gran_sizek to prevent that by dumpping small RAM piece less than
gran_sizek.

previous trimming only can handle highest_pfn from mtrr to end_pfn from e820.
when have more than 4g RAM installed, there will be holes below 4g. so need to
check ram below 4g is coverred well.

need to be applied after
	[PATCH] x86: mtrr cleanup for converting continuous to discrete layout v7

Signed-off-by: Yinghai Lu <yinghai.lu@gmail.com>

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -1093,6 +1093,17 @@ int __init amd_special_default_mtrr(void
 	return 0;
 }
 
+static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
+{
+	u64 trim_start, trim_size;
+	trim_start =  start_pfn;
+	trim_start <<= PAGE_SHIFT;
+	trim_size = limit_pfn;
+	trim_size <<= PAGE_SHIFT;
+	trim_size -= trim_start;
+	return update_memory_range(trim_start, trim_size, E820_RAM,
+				E820_RESERVED);
+}
 /**
  * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
  * @end_pfn: ending page frame number
@@ -1108,8 +1119,13 @@ int __init mtrr_trim_uncached_memory(uns
 {
 	unsigned long i, base, size, highest_pfn = 0, def, dummy;
 	mtrr_type type;
-	u64 trim_start, trim_size;
+	struct res_range range[RANGE_NUM];
+	int nr_range;
+	u64 total_real_trim_size;
+	int changed;
 
+	/* extra one for all 0 */
+	int num[MTRR_NUM_TYPES + 1];
 	/*
 	 * Make sure we only trim uncachable memory on machines that
 	 * support the Intel MTRR architecture:
@@ -1121,9 +1137,6 @@ int __init mtrr_trim_uncached_memory(uns
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
 
-	if (amd_special_default_mtrr())
-		return 0;
-
 	/* Find highest cached pfn */
 	for (i = 0; i < num_var_ranges; i++) {
 		mtrr_if->get(i, &base, &size, &type);
@@ -1143,26 +1156,80 @@ int __init mtrr_trim_uncached_memory(uns
 		return 0;
 	}
 
-	if (highest_pfn < end_pfn) {
+	/* check entries number */
+	memset(num, 0, sizeof(num));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type >= MTRR_NUM_TYPES)
+			continue;
+		if (!size)
+			type = MTRR_NUM_TYPES;
+		num[type]++;
+	}
+
+	/* no entry for WB? */
+	if (!num[MTRR_TYPE_WRBACK])
+		return 0;
+
+	/* check if we only had WB and UC */
+	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+		num_var_ranges - num[MTRR_NUM_TYPES])
+		return 0;
+
+	memset(range, 0, sizeof(range));
+	nr_range = 0;
+	if (mtrr_tom2) {
+		range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
+		range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
+		if (highest_pfn < range[nr_range].end + 1)
+			highest_pfn = range[nr_range].end + 1;
+		nr_range++;
+	}
+	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
+
+	changed = 0;
+	total_real_trim_size = 0;
+
+	/* check the top at first */
+	i = nr_range - 1;
+	if (range[i].end + 1 < end_pfn) {
+			total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn);
+	}
+
+	if (total_real_trim_size) {
 		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
-			" all of memory, losing %luMB of RAM.\n",
-			(end_pfn - highest_pfn) >> (20 - PAGE_SHIFT));
+			" all of memory, losing %lluMB of RAM.\n",
+			total_real_trim_size >> 20);
 
 		WARN_ON(1);
 
-		printk(KERN_INFO "update e820 for mtrr\n");
-		trim_start = highest_pfn;
-		trim_start <<= PAGE_SHIFT;
-		trim_size = end_pfn;
-		trim_size <<= PAGE_SHIFT;
-		trim_size -= trim_start;
-		update_memory_range(trim_start, trim_size, E820_RAM,
-					E820_RESERVED);
+		printk(KERN_INFO "update e820 for mtrr -- end_pfn\n");
 		update_e820();
-		return 1;
+		changed = 1;
 	}
 
-	return 0;
+	total_real_trim_size = 0;
+	if (range[0].start)
+		total_real_trim_size += real_trim_memory(0, range[0].start);
+
+	for (i = 0; i < nr_range - 1; i--) {
+		if (range[i].end + 1 < range[i+1].start)
+			total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start);
+	}
+
+	if (total_real_trim_size) {
+		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
+			" all of memory, losing %lluMB of RAM.\n",
+			total_real_trim_size >> 20);
+
+		WARN_ON(1);
+
+		printk(KERN_INFO "update e820 for mtrr -- holes\n");
+		update_e820();
+		changed = 1;
+	}
+
+	return changed;
 }
 
 /**
Index: linux-2.6/arch/x86/kernel/e820_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_32.c
+++ linux-2.6/arch/x86/kernel/e820_32.c
@@ -736,10 +736,11 @@ static int __init parse_memmap(char *arg
 	return 0;
 }
 early_param("memmap", parse_memmap);
-void __init update_memory_range(u64 start, u64 size, unsigned old_type,
+u64 __init update_memory_range(u64 start, u64 size, unsigned old_type,
 				unsigned new_type)
 {
 	int i;
+	u64 real_updated_size = 0;
 
 	BUG_ON(old_type == new_type);
 
@@ -751,6 +752,7 @@ void __init update_memory_range(u64 star
 		/* totally covered? */
 		if (ei->addr >= start && ei->size <= size) {
 			ei->type = new_type;
+			real_updated_size += ei->size;
 			continue;
 		}
 		/* partially covered */
@@ -760,7 +762,10 @@ void __init update_memory_range(u64 star
 			continue;
 		add_memory_region(final_start, final_end - final_start,
 					 new_type);
+		real_updated_size += find_end - final_start;
 	}
+
+	return real_update_size;
 }
 void __init update_e820(void)
 {
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -829,10 +829,11 @@ void __init finish_e820_parsing(void)
 	}
 }
 
-void __init update_memory_range(u64 start, u64 size, unsigned old_type,
+u64 __init update_memory_range(u64 start, u64 size, unsigned old_type,
 				unsigned new_type)
 {
 	int i;
+	u64 real_updated_size = 0;
 
 	BUG_ON(old_type == new_type);
 
@@ -844,6 +845,7 @@ void __init update_memory_range(u64 star
 		/* totally covered? */
 		if (ei->addr >= start && ei->size <= size) {
 			ei->type = new_type;
+			real_updated_size += ei->size;
 			continue;
 		}
 		/* partially covered */
@@ -853,7 +855,9 @@ void __init update_memory_range(u64 star
 			continue;
 		add_memory_region(final_start, final_end - final_start,
 					 new_type);
+		real_updated_size += final_end - final_start;
 	}
+	return real_updated_size;
 }
 
 void __init update_e820(void)
Index: linux-2.6/include/asm-x86/e820_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_32.h
+++ linux-2.6/include/asm-x86/e820_32.h
@@ -28,7 +28,7 @@ extern void propagate_e820_map(void);
 extern void register_bootmem_low_pages(unsigned long max_low_pfn);
 extern void add_memory_region(unsigned long long start,
 			      unsigned long long size, int type);
-extern void update_memory_range(u64 start, u64 size, unsigned old_type,
+extern u64 update_memory_range(u64 start, u64 size, unsigned old_type,
 				unsigned new_type);
 extern void e820_register_memory(void);
 extern void limit_regions(unsigned long long size);
Index: linux-2.6/include/asm-x86/e820_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820_64.h
+++ linux-2.6/include/asm-x86/e820_64.h
@@ -21,7 +21,7 @@ extern unsigned long find_e820_area_size
 					 unsigned long align);
 extern void add_memory_region(unsigned long start, unsigned long size,
 			      int type);
-extern void update_memory_range(u64 start, u64 size, unsigned old_type,
+extern u64 update_memory_range(u64 start, u64 size, unsigned old_type,
 				unsigned new_type);
 extern void setup_memory_region(void);
 extern void contig_e820_setup(void);

^ permalink raw reply	[flat|nested] 89+ messages in thread

* [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-28 22:05       ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5 Yinghai Lu
                           ` (2 preceding siblings ...)
       [not found]         ` <200804290157.30651.yhlu.kernel@gmail.com>
@ 2008-04-29  9:00         ` Yinghai Lu
  2008-04-29  9:47           ` Gabriel C
  2008-04-29 10:52           ` [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v8 Yinghai Lu
  2008-04-29 19:00         ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5 Eric W. Biederman
  4 siblings, 2 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-29  9:00 UTC (permalink / raw)
  To: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, Mika Fischer
  Cc: linux-kernel@vger.kernel.org


some BIOS like to use continus MTRR layout, and may X driver can not add
WB entries for graphical cards when 4g or more RAM installed.

the patch will change MTRR to discrete.

mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.

v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
    skip the var state change warning.
    remove next_basek in range_to_mtrr()
v4: correct warning mask.
v5: CONFIG_MTRR_SANITIZER
v6: fix 1g, 2g, 512 aligment with extra hole
v7: gran_sizek to prevent running out of MTRRs.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -37,7 +37,7 @@ static struct fixed_range_block fixed_ra
 static unsigned long smp_changes_mask;
 static struct mtrr_state mtrr_state = {};
 static int mtrr_state_set;
-static u64 tom2;
+u64 mtrr_tom2;
 
 #undef MODULE_PARAM_PREFIX
 #define MODULE_PARAM_PREFIX "mtrr."
@@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
 		}
 	}
 
-	if (tom2) {
-		if (start >= (1ULL<<32) && (end < tom2))
+	if (mtrr_tom2) {
+		if (start >= (1ULL<<32) && (end < mtrr_tom2))
 			return MTRR_TYPE_WRBACK;
 	}
 
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
 	rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
 }
 
+/*  fill the MSR pair relating to a var range  */
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+	struct mtrr_var_range *vr;
+
+	vr = mtrr_state.var_ranges;
+
+	vr[index].base_lo = base_lo;
+	vr[index].base_hi = base_hi;
+	vr[index].mask_lo = mask_lo;
+	vr[index].mask_hi = mask_hi;
+}
+
 static void
 get_fixed_ranges(mtrr_type * frs)
 {
@@ -216,10 +230,10 @@ void __init get_mtrr_state(void)
 		unsigned lo, hi;
 		/* TOP_MEM2 */
 		rdmsr(MSR_K8_TOP_MEM2, lo, hi);
-		tom2 = hi;
-		tom2 <<= 32;
-		tom2 |= lo;
-		tom2 &= 0xffffff8000000ULL;
+		mtrr_tom2 = hi;
+		mtrr_tom2 <<= 32;
+		mtrr_tom2 |= lo;
+		mtrr_tom2 &= 0xffffff8000000ULL;
 	}
 	if (mtrr_show) {
 		int high_width;
@@ -251,9 +265,9 @@ void __init get_mtrr_state(void)
 			else
 				printk(KERN_INFO "MTRR %u disabled\n", i);
 		}
-		if (tom2) {
+		if (mtrr_tom2) {
 			printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
-					  tom2, tom2>>20);
+					  mtrr_tom2, mtrr_tom2>>20);
 		}
 	}
 	mtrr_state_set = 1;
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
 #include <linux/smp.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
+#include <linux/sort.h>
 
 #include <asm/e820.h>
 #include <asm/mtrr.h>
@@ -609,6 +610,450 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
+#ifdef CONFIG_MTRR_SANITIZER
+
+#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT
+static int enable_mtrr_cleanup __initdata = 1;
+#else
+static int enable_mtrr_cleanup __initdata;
+#endif
+
+#else
+
+static int enable_mtrr_cleanup __initdata = -1;
+
+#endif
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 0;
+	return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 1;
+	return 0;
+}
+early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
+
+#define RANGE_NUM 256
+
+struct res_range {
+	unsigned long start;
+	unsigned long end;
+};
+
+static int __init add_range(struct res_range *range, int nr_range, unsigned long start,
+			      unsigned long end, int merge)
+{
+	int i;
+
+	if (!merge)
+		goto addit;
+
+	/* try to merge it with old one */
+	for (i = 0; i < nr_range; i++) {
+		unsigned long final_start, final_end;
+		unsigned long common_start, common_end;
+
+		if (!range[i].end)
+			continue;
+
+		common_start = max(range[i].start, start);
+		common_end = min(range[i].end, end);
+		if (common_start > common_end + 1)
+			continue;
+
+		final_start = min(range[i].start, start);
+		final_end = max(range[i].end, end);
+
+		range[i].start = final_start;
+		range[i].end =  final_end;
+		return nr_range;
+	}
+
+addit:
+	/* need to add that */
+	if (nr_range >= RANGE_NUM)
+		return nr_range;
+
+	range[nr_range].start = start;
+	range[nr_range].end = end;
+
+	nr_range++;
+
+	return nr_range;
+
+}
+static void __init subtract_range(struct res_range *range, unsigned long start,
+				unsigned long end)
+{
+	int i;
+	int j;
+
+	for (j = 0; j < RANGE_NUM; j++) {
+		if (!range[j].end)
+			continue;
+
+		if (start <= range[j].start && end >= range[j].end) {
+			range[j].start = 0;
+			range[j].end = 0;
+			continue;
+		}
+
+		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+			range[j].start = end + 1;
+			continue;
+		}
+
+
+		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+			range[j].end = start - 1;
+			continue;
+		}
+
+		if (start > range[j].start && end < range[j].end) {
+			/* find the new spare */
+			for (i = 0; i < RANGE_NUM; i++) {
+				if (range[i].end == 0)
+					break;
+			}
+			if (i < RANGE_NUM) {
+				range[i].end = range[j].end;
+				range[i].start = end + 1;
+			} else {
+				printk(KERN_ERR "run of slot in ranges\n");
+			}
+			range[j].end = start - 1;
+			continue;
+		}
+	}
+}
+
+static int __init cmp_range(const void *x1, const void *x2)
+{
+	const struct res_range *r1 = x1;
+	const struct res_range *r2 = x2;
+	long start1, start2;
+
+	start1 = r1->start;
+	start2 = r2->start;
+
+	return start1 - start2;
+}
+
+struct var_mtrr_state {
+	unsigned long range_startk, range_sizek;
+	unsigned long chunk_sizek;
+	unsigned long gran_sizek;
+	unsigned int reg;
+	unsigned address_bits;
+};
+
+static void __init set_var_mtrr(
+	unsigned int reg, unsigned long basek, unsigned long sizek,
+	unsigned char type, unsigned address_bits)
+{
+	u32 base_lo, base_hi, mask_lo, mask_hi;
+	unsigned address_mask_high;
+
+	if (!sizek) {
+		fill_mtrr_var_range(reg, 0, 0, 0, 0);
+		return;
+	}
+
+	address_mask_high = ((1u << (address_bits - 32u)) - 1u);
+
+	base_hi = basek >> 22;
+	base_lo  = basek << 10;
+
+	if (sizek < 4*1024*1024) {
+		mask_hi = address_mask_high;
+		mask_lo = ~((sizek << 10) - 1);
+	} else {
+		mask_hi = address_mask_high & (~((sizek >> 22) - 1));
+		mask_lo = 0;
+	}
+
+	base_lo |= type;
+	mask_lo |= 0x800;
+	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
+}
+
+static unsigned int __init range_to_mtrr(unsigned int reg,
+	unsigned long range_startk, unsigned long range_sizek,
+	unsigned char type, unsigned address_bits)
+{
+	if (!range_sizek || (reg >= num_var_ranges))
+		return reg;
+
+	while (range_sizek) {
+		unsigned long max_align, align;
+		unsigned long sizek;
+		/* Compute the maximum size I can make a range */
+		if (range_startk)
+			max_align = ffs(range_startk) - 1;
+		else
+			max_align = 32;
+		align = fls(range_sizek) - 1;
+		if (align > max_align)
+			align = max_align;
+
+		sizek = 1 << align;
+		printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
+			reg, range_startk >> 10, sizek >> 10,
+			(type == MTRR_TYPE_UNCACHABLE)?"UC":
+			    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
+			);
+		set_var_mtrr(reg++, range_startk, sizek, type, address_bits);
+		range_startk += sizek;
+		range_sizek -= sizek;
+		if (reg >= num_var_ranges)
+			break;
+	}
+	return reg;
+}
+
+static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
+{
+	unsigned long hole_basek, hole_sizek;
+	unsigned long range0_basek, range0_sizek;
+	unsigned long range_basek, range_sizek;
+	unsigned long chunk_sizek;
+	unsigned long gran_sizek;
+
+	hole_basek = 0;
+	hole_sizek = 0;
+	chunk_sizek = state->chunk_sizek;
+	gran_sizek = state->gran_sizek;
+
+	/* align with gran size, prevent small block used up MTRRs */
+	range_basek = ALIGN(state->range_startk, gran_sizek);
+	if (range_basek > basek)
+		return;
+	range_sizek = ALIGN(state->range_sizek - (range_basek - range_basek), gran_sizek);
+	while (range_basek + range_sizek > basek) {
+		range_sizek -= gran_sizek;
+		if (!range_sizek)
+			return;
+	}
+	state->range_startk = range_basek;
+	state->range_sizek = range_sizek;
+
+	/* try to append some small hole */
+	range0_basek = state->range_startk;
+	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+	if ((range0_sizek == state->range_sizek) ||
+	    ((range0_basek + range0_sizek - chunk_sizek > basek) && basek)) {
+			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10);
+			state->reg = range_to_mtrr(state->reg, range0_basek,
+				state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+		return;
+	}
+
+
+	range0_sizek -= chunk_sizek;
+	range_basek = range0_basek + range0_sizek;
+	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range0_basek,
+			range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+
+	range_sizek = chunk_sizek;
+	if (range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1))
+		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
+	else
+		range_sizek = state->range_sizek - range0_sizek;
+
+	printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range_basek,
+			range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+	if (hole_sizek) {
+		printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, hole_basek,
+				hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits);
+	}
+}
+
+static void __init set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, unsigned long size_pfn)
+{
+	unsigned long basek, sizek;
+
+	if (state->reg >= num_var_ranges)
+		return;
+
+	basek = base_pfn << (PAGE_SHIFT - 10);
+	sizek = size_pfn << (PAGE_SHIFT - 10);
+
+	/* See if I can merge with the last range */
+	if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) {
+		unsigned long endk = basek + sizek;
+		state->range_sizek = endk - state->range_startk;
+		return;
+	}
+	/* Write the range mtrrs */
+	if (state->range_sizek != 0) {
+		range_to_mtrr_with_hole(state, basek);
+
+		state->range_startk = 0;
+		state->range_sizek = 0;
+	}
+	/* Allocate an msr */
+	state->range_startk = basek;
+	state->range_sizek  = sizek;
+}
+
+/* mininum size of mtrr block that can take hole */
+static u64 mtrr_chunk_size __initdata = (256ULL<<20);
+
+static int __init parse_mtrr_chunk_size_opt(char *p)
+{
+	if (!p)
+		return -EINVAL;
+	mtrr_chunk_size = memparse(p, &p);
+	return 0;
+}
+early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
+
+/* granity of mtrr of block */
+static u64 mtrr_gran_size __initdata = (64ULL<<20);
+
+static int __init parse_mtrr_gran_size_opt(char *p)
+{
+	if (!p)
+		return -EINVAL;
+	mtrr_gran_size = memparse(p, &p);
+	return 0;
+}
+early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
+
+static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, unsigned address_bits)
+{
+	struct var_mtrr_state var_state;
+	int i;
+
+	var_state.range_startk = 0;
+	var_state.range_sizek = 0;
+	var_state.reg = 0;
+	var_state.address_bits = address_bits;
+	var_state.chunk_sizek = mtrr_chunk_size >> 10;
+	var_state.gran_sizek = mtrr_gran_size >> 10;
+
+	/* Write the range etc */
+	for (i = 0; i < nr_range; i++)
+		set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1);
+
+	/* Write the last range */
+	range_to_mtrr_with_hole(&var_state, 0);
+	printk(KERN_INFO "DONE variable MTRRs\n");
+	/* Clear out the extra MTRR's */
+	while (var_state.reg < num_var_ranges)
+		set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits);
+}
+
+static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, unsigned long extra_remove_base, unsigned long extra_remove_size)
+{
+	unsigned long i, base, size;
+	mtrr_type type;
+
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		nr_range = add_range(range, nr_range, base, base + size - 1, 1);
+	}
+	printk(KERN_INFO "After WB checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* take out UC ranges */
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_UNCACHABLE)
+			continue;
+		if (!size)
+			continue;
+		subtract_range(range, base, base + size - 1);
+	}
+	if (extra_remove_size)
+		subtract_range(range, extra_remove_base,  extra_remove_base + extra_remove_size  - 1);
+
+	/* get new range num */
+	nr_range = 0;
+	for (i = 0; i < RANGE_NUM; i++) {
+		if (!range[i].end)
+			continue;
+		nr_range++;
+	}
+	printk(KERN_INFO "After UC checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	printk(KERN_INFO "After sorting\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	return nr_range;
+}
+
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+	unsigned long i, base, size, def, dummy;
+	mtrr_type type;
+	struct res_range range[RANGE_NUM];
+	int nr_range;
+	unsigned long extra_remove_base, extra_remove_size;
+
+	/* extra one for all 0 */
+	int num[MTRR_NUM_TYPES + 1];
+
+	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
+		return 0;
+	rdmsr(MTRRdefType_MSR, def, dummy);
+	def &= 0xff;
+	if (def != MTRR_TYPE_UNCACHABLE)
+		return 0;
+
+	/* check entries number */
+	memset(num, 0, sizeof(num));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type >= MTRR_NUM_TYPES)
+			continue;
+		if (!size)
+			type = MTRR_NUM_TYPES;
+		num[type]++;
+	}
+
+	/* check if we got UC entries */
+	if (!num[MTRR_TYPE_UNCACHABLE])
+		return 0;
+
+	/* check if we only had WB and UC */
+	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+		num_var_ranges - num[MTRR_NUM_TYPES])
+		return 0;
+
+	memset(range, 0, sizeof(range));
+	extra_remove_size = 0;
+	if (mtrr_tom2) {
+		extra_remove_base = 1 << (32 - PAGE_SHIFT);
+		extra_remove_size = (mtrr_tom2>>PAGE_SHIFT) - extra_remove_base;
+	}
+	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size);
+
+	/* convert ranges to var ranges state */
+	x86_setup_var_mtrrs(range, nr_range, address_bits);
+
+	return 1;
+
+}
+
 static int disable_mtrr_trim;
 
 static int __init disable_mtrr_trim_setup(char *str)
@@ -729,18 +1174,21 @@ int __init mtrr_trim_uncached_memory(uns
  */
 void __init mtrr_bp_init(void)
 {
+	u32 phys_addr;
 	init_ifs();
 
+	phys_addr = 32;
+
 	if (cpu_has_mtrr) {
 		mtrr_if = &generic_mtrr_ops;
 		size_or_mask = 0xff000000;	/* 36 bits */
 		size_and_mask = 0x00f00000;
+		phys_addr = 36;
 
 		/* This is an AMD specific MSR, but we assume(hope?) that
 		   Intel will implement it to when they extend the address
 		   bus of the Xeon. */
 		if (cpuid_eax(0x80000000) >= 0x80000008) {
-			u32 phys_addr;
 			phys_addr = cpuid_eax(0x80000008) & 0xff;
 			/* CPUID workaround for Intel 0F33/0F34 CPU */
 			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -758,6 +1206,7 @@ void __init mtrr_bp_init(void)
 			   don't support PAE */
 			size_or_mask = 0xfff00000;	/* 32 bits */
 			size_and_mask = 0;
+			phys_addr = 32;
 		}
 	} else {
 		switch (boot_cpu_data.x86_vendor) {
@@ -791,8 +1240,13 @@ void __init mtrr_bp_init(void)
 	if (mtrr_if) {
 		set_num_var_ranges();
 		init_table();
-		if (use_intel())
+		if (use_intel()) {
 			get_mtrr_state();
+
+			if (mtrr_cleanup(phys_addr))
+				mtrr_if->set_all();
+
+		}
 	}
 }
 
@@ -829,9 +1283,10 @@ static int __init mtrr_init_finialize(vo
 {
 	if (!mtrr_if)
 		return 0;
-	if (use_intel())
-		mtrr_state_warn();
-	else {
+	if (use_intel()) {
+		if (enable_mtrr_cleanup < 1)
+			mtrr_state_warn();
+	} else {
 		/* The CPUs haven't MTRR and seem to not support SMP. They have
 		 * specific drivers, we use a tricky method to support
 		 * suspend/resume for them.
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/mtrr.h
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_conte
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
 void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
 
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
 void get_mtrr_state(void);
 
 extern void set_mtrr_ops(struct mtrr_ops * ops);
@@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if;
 #define use_intel()	(mtrr_if && mtrr_if->use_intel_if == 1)
 
 extern unsigned int num_var_ranges;
+extern u64 mtrr_tom2;
 
 void mtrr_state_warn(void);
 const char *mtrr_attrib_to_str(int x);
Index: linux-2.6/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.orig/Documentation/kernel-parameters.txt
+++ linux-2.6/Documentation/kernel-parameters.txt
@@ -595,6 +595,20 @@ and is between 256 and 4096 characters. 
 			See drivers/char/README.epca and
 			Documentation/digiepca.txt.
 
+	disable_mtrr_cleanup [X86]
+	enable_mtrr_cleanup [X86]
+			The kernel tries to adjust MTRR layout from continuous
+			to discrete, to make X server driver able to add WB
+			entry later. This parameter enables/disables that.
+
+	mtrr_chunk_size=nn[KMG] [X86]
+			used for mtrr cleanup. It is largest continous chunk
+			that could hold holes aka. UC entries.
+
+	mtrr_gran_size=nn[KMG] [X86]
+			used for mtrr cleanup. It is granity of mtrr block.
+			Big value could prevent small alignment use up MTRRs.
+
 	disable_mtrr_trim [X86, Intel and AMD only]
 			By default the kernel will trim any uncacheable
 			memory out of your available memory pool based on
Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig
+++ linux-2.6/arch/x86/Kconfig
@@ -1035,6 +1035,32 @@ config MTRR
 
 	  See <file:Documentation/mtrr.txt> for more information.
 
+config MTRR_SANITIZER
+	def_bool y
+	prompt "MTRR cleanup support"
+	depends on MTRR
+	help
+	  Convert MTRR layout from continuous to discrete, so some X driver
+	  could add WB entries.
+
+	  Say N here if you see bootup problems (boot crash, boot hang,
+	  spontaneous reboots).
+
+	  Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size
+	  could be used to send largest mtrr entry size for continuous block
+	  to hold holes (aka. UC entries)
+
+	  If unsure, say Y.
+
+config MTRR_SANITIZER_ENABLE_DEFAULT
+	def_bool y
+	prompt "Enable MTRR cleanup by default"
+	depends on MTRR_SANITIZER
+	help
+	  Enable mtrr cleanup by default
+
+	  If unsure, say Y.
+
 config X86_PAT
 	bool
 	prompt "x86 PAT support"

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-29  9:00         ` [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7 Yinghai Lu
@ 2008-04-29  9:47           ` Gabriel C
  2008-04-29 10:30             ` Yinghai Lu
  2008-04-29 10:52           ` [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v8 Yinghai Lu
  1 sibling, 1 reply; 89+ messages in thread
From: Gabriel C @ 2008-04-29  9:47 UTC (permalink / raw)
  To: yhlu.kernel
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

Yinghai Lu wrote:
> some BIOS like to use continus MTRR layout, and may X driver can not add
> WB entries for graphical cards when 4g or more RAM installed.
> 
> the patch will change MTRR to discrete.
> 
> mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
> default is 256m, could be set according to size of graphics card memory.
> 
> v2: fix -1 for UC checking
> v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>     skip the var state change warning.
>     remove next_basek in range_to_mtrr()
> v4: correct warning mask.
> v5: CONFIG_MTRR_SANITIZER
> v6: fix 1g, 2g, 512 aligment with extra hole
> v7: gran_sizek to prevent running out of MTRRs.
> 

With this version ( and patch http://lkml.org/lkml/2008/4/29/97 ) applyed on latest linus git tree
the box OOPS'es early.

Sorry I don't have time right now to write down the part of the OOPS I can see on monitor , I can try to find
some time later.

In any way OOPS'es on __free_one_page+0x191/0x21e


Gabriel

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-29  9:47           ` Gabriel C
@ 2008-04-29 10:30             ` Yinghai Lu
  2008-04-29 10:56               ` Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-04-29 10:30 UTC (permalink / raw)
  To: Gabriel C
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

On Tue, Apr 29, 2008 at 2:47 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
> Yinghai Lu wrote:
>  > some BIOS like to use continus MTRR layout, and may X driver can not add
>  > WB entries for graphical cards when 4g or more RAM installed.
>  >
>  > the patch will change MTRR to discrete.
>  >
>  > mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
>  > default is 256m, could be set according to size of graphics card memory.
>  >
>  > v2: fix -1 for UC checking
>  > v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>  >     skip the var state change warning.
>  >     remove next_basek in range_to_mtrr()
>  > v4: correct warning mask.
>  > v5: CONFIG_MTRR_SANITIZER
>  > v6: fix 1g, 2g, 512 aligment with extra hole
>  > v7: gran_sizek to prevent running out of MTRRs.
>  >
>
>  With this version ( and patch http://lkml.org/lkml/2008/4/29/97 ) applyed on latest linus git tree
>  the box OOPS'es early.
>
>  Sorry I don't have time right now to write down the part of the OOPS I can see on monitor , I can try to find
>  some time later.
>
>  In any way OOPS'es on __free_one_page+0x191/0x21e

thanks. found one problem with hole_basek ...

will send you v8, and hope it will be last version.

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v8
  2008-04-29  9:00         ` [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7 Yinghai Lu
  2008-04-29  9:47           ` Gabriel C
@ 2008-04-29 10:52           ` Yinghai Lu
  2008-04-29 13:07             ` Ingo Molnar
                               ` (2 more replies)
  1 sibling, 3 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-29 10:52 UTC (permalink / raw)
  To: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, Mika Fischer
  Cc: linux-kernel@vger.kernel.org


some BIOS like to use continus MTRR layout, and X driver can not add
WB entries for graphical cards when 4g or more RAM installed.

the patch will change MTRR to discrete.

mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.

mtrr_gran_size= could be used to send smallest mtrr block to avoid run out of MTRRs

v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
    skip the var state change warning.
    remove next_basek in range_to_mtrr()
v4: correct warning mask.
v5: CONFIG_MTRR_SANITIZER
v6: fix 1g, 2g, 512 aligment with extra hole
v7: gran_sizek to prevent running out of MTRRs.
v8: fix hole_basek caculation caused when removing next_basek
    gran_sizek using when basek is 0.

need to apply
	[PATCH] x86: fix trimming e820 with MTRR holes.
right after this one.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -37,7 +37,7 @@ static struct fixed_range_block fixed_ra
 static unsigned long smp_changes_mask;
 static struct mtrr_state mtrr_state = {};
 static int mtrr_state_set;
-static u64 tom2;
+u64 mtrr_tom2;
 
 #undef MODULE_PARAM_PREFIX
 #define MODULE_PARAM_PREFIX "mtrr."
@@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
 		}
 	}
 
-	if (tom2) {
-		if (start >= (1ULL<<32) && (end < tom2))
+	if (mtrr_tom2) {
+		if (start >= (1ULL<<32) && (end < mtrr_tom2))
 			return MTRR_TYPE_WRBACK;
 	}
 
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
 	rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
 }
 
+/*  fill the MSR pair relating to a var range  */
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+	struct mtrr_var_range *vr;
+
+	vr = mtrr_state.var_ranges;
+
+	vr[index].base_lo = base_lo;
+	vr[index].base_hi = base_hi;
+	vr[index].mask_lo = mask_lo;
+	vr[index].mask_hi = mask_hi;
+}
+
 static void
 get_fixed_ranges(mtrr_type * frs)
 {
@@ -216,10 +230,10 @@ void __init get_mtrr_state(void)
 		unsigned lo, hi;
 		/* TOP_MEM2 */
 		rdmsr(MSR_K8_TOP_MEM2, lo, hi);
-		tom2 = hi;
-		tom2 <<= 32;
-		tom2 |= lo;
-		tom2 &= 0xffffff8000000ULL;
+		mtrr_tom2 = hi;
+		mtrr_tom2 <<= 32;
+		mtrr_tom2 |= lo;
+		mtrr_tom2 &= 0xffffff8000000ULL;
 	}
 	if (mtrr_show) {
 		int high_width;
@@ -251,9 +265,9 @@ void __init get_mtrr_state(void)
 			else
 				printk(KERN_INFO "MTRR %u disabled\n", i);
 		}
-		if (tom2) {
+		if (mtrr_tom2) {
 			printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
-					  tom2, tom2>>20);
+					  mtrr_tom2, mtrr_tom2>>20);
 		}
 	}
 	mtrr_state_set = 1;
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
 #include <linux/smp.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
+#include <linux/sort.h>
 
 #include <asm/e820.h>
 #include <asm/mtrr.h>
@@ -609,6 +610,452 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
+#ifdef CONFIG_MTRR_SANITIZER
+
+#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT
+static int enable_mtrr_cleanup __initdata = 1;
+#else
+static int enable_mtrr_cleanup __initdata;
+#endif
+
+#else
+
+static int enable_mtrr_cleanup __initdata = -1;
+
+#endif
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 0;
+	return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 1;
+	return 0;
+}
+early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
+
+#define RANGE_NUM 256
+
+struct res_range {
+	unsigned long start;
+	unsigned long end;
+};
+
+static int __init add_range(struct res_range *range, int nr_range, unsigned long start,
+			      unsigned long end, int merge)
+{
+	int i;
+
+	if (!merge)
+		goto addit;
+
+	/* try to merge it with old one */
+	for (i = 0; i < nr_range; i++) {
+		unsigned long final_start, final_end;
+		unsigned long common_start, common_end;
+
+		if (!range[i].end)
+			continue;
+
+		common_start = max(range[i].start, start);
+		common_end = min(range[i].end, end);
+		if (common_start > common_end + 1)
+			continue;
+
+		final_start = min(range[i].start, start);
+		final_end = max(range[i].end, end);
+
+		range[i].start = final_start;
+		range[i].end =  final_end;
+		return nr_range;
+	}
+
+addit:
+	/* need to add that */
+	if (nr_range >= RANGE_NUM)
+		return nr_range;
+
+	range[nr_range].start = start;
+	range[nr_range].end = end;
+
+	nr_range++;
+
+	return nr_range;
+
+}
+static void __init subtract_range(struct res_range *range, unsigned long start,
+				unsigned long end)
+{
+	int i;
+	int j;
+
+	for (j = 0; j < RANGE_NUM; j++) {
+		if (!range[j].end)
+			continue;
+
+		if (start <= range[j].start && end >= range[j].end) {
+			range[j].start = 0;
+			range[j].end = 0;
+			continue;
+		}
+
+		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+			range[j].start = end + 1;
+			continue;
+		}
+
+
+		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+			range[j].end = start - 1;
+			continue;
+		}
+
+		if (start > range[j].start && end < range[j].end) {
+			/* find the new spare */
+			for (i = 0; i < RANGE_NUM; i++) {
+				if (range[i].end == 0)
+					break;
+			}
+			if (i < RANGE_NUM) {
+				range[i].end = range[j].end;
+				range[i].start = end + 1;
+			} else {
+				printk(KERN_ERR "run of slot in ranges\n");
+			}
+			range[j].end = start - 1;
+			continue;
+		}
+	}
+}
+
+static int __init cmp_range(const void *x1, const void *x2)
+{
+	const struct res_range *r1 = x1;
+	const struct res_range *r2 = x2;
+	long start1, start2;
+
+	start1 = r1->start;
+	start2 = r2->start;
+
+	return start1 - start2;
+}
+
+struct var_mtrr_state {
+	unsigned long range_startk, range_sizek;
+	unsigned long chunk_sizek;
+	unsigned long gran_sizek;
+	unsigned int reg;
+	unsigned address_bits;
+};
+
+static void __init set_var_mtrr(
+	unsigned int reg, unsigned long basek, unsigned long sizek,
+	unsigned char type, unsigned address_bits)
+{
+	u32 base_lo, base_hi, mask_lo, mask_hi;
+	unsigned address_mask_high;
+
+	if (!sizek) {
+		fill_mtrr_var_range(reg, 0, 0, 0, 0);
+		return;
+	}
+
+	address_mask_high = ((1u << (address_bits - 32u)) - 1u);
+
+	base_hi = basek >> 22;
+	base_lo  = basek << 10;
+
+	if (sizek < 4*1024*1024) {
+		mask_hi = address_mask_high;
+		mask_lo = ~((sizek << 10) - 1);
+	} else {
+		mask_hi = address_mask_high & (~((sizek >> 22) - 1));
+		mask_lo = 0;
+	}
+
+	base_lo |= type;
+	mask_lo |= 0x800;
+	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
+}
+
+static unsigned int __init range_to_mtrr(unsigned int reg,
+	unsigned long range_startk, unsigned long range_sizek,
+	unsigned char type, unsigned address_bits)
+{
+	if (!range_sizek || (reg >= num_var_ranges))
+		return reg;
+
+	while (range_sizek) {
+		unsigned long max_align, align;
+		unsigned long sizek;
+		/* Compute the maximum size I can make a range */
+		if (range_startk)
+			max_align = ffs(range_startk) - 1;
+		else
+			max_align = 32;
+		align = fls(range_sizek) - 1;
+		if (align > max_align)
+			align = max_align;
+
+		sizek = 1 << align;
+		printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
+			reg, range_startk >> 10, sizek >> 10,
+			(type == MTRR_TYPE_UNCACHABLE)?"UC":
+			    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
+			);
+		set_var_mtrr(reg++, range_startk, sizek, type, address_bits);
+		range_startk += sizek;
+		range_sizek -= sizek;
+		if (reg >= num_var_ranges)
+			break;
+	}
+	return reg;
+}
+
+static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
+{
+	unsigned long hole_basek, hole_sizek;
+	unsigned long range0_basek, range0_sizek;
+	unsigned long range_basek, range_sizek;
+	unsigned long chunk_sizek;
+	unsigned long gran_sizek;
+
+	hole_basek = 0;
+	hole_sizek = 0;
+	chunk_sizek = state->chunk_sizek;
+	gran_sizek = state->gran_sizek;
+
+	/* align with gran size, prevent small block used up MTRRs */
+	range_basek = ALIGN(state->range_startk, gran_sizek);
+	if ((range_basek > basek) && basek)
+		return;
+	range_sizek = ALIGN(state->range_sizek - (range_basek - state->range_startk), gran_sizek);
+
+	while (range_basek + range_sizek > (state->range_startk + state->range_sizek)) {
+		range_sizek -= gran_sizek;
+		if (!range_sizek)
+			return;
+	}
+	state->range_startk = range_basek;
+	state->range_sizek = range_sizek;
+
+	/* try to append some small hole */
+	range0_basek = state->range_startk;
+	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+	if ((range0_sizek == state->range_sizek) ||
+	    ((range0_basek + range0_sizek - chunk_sizek > basek) && basek)) {
+			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10);
+			state->reg = range_to_mtrr(state->reg, range0_basek,
+				state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+		return;
+	}
+
+
+	range0_sizek -= chunk_sizek;
+	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range0_basek,
+			range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+
+	range_basek = range0_basek + range0_sizek;
+	range_sizek = chunk_sizek;
+	if (range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) {
+		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
+		hole_basek = range_basek + range_sizek - hole_sizek;
+	} else
+		range_sizek = state->range_sizek - range0_sizek;
+
+	printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range_basek,
+			range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+	if (hole_sizek) {
+		printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, hole_basek,
+				hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits);
+	}
+}
+
+static void __init set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, unsigned long size_pfn)
+{
+	unsigned long basek, sizek;
+
+	if (state->reg >= num_var_ranges)
+		return;
+
+	basek = base_pfn << (PAGE_SHIFT - 10);
+	sizek = size_pfn << (PAGE_SHIFT - 10);
+
+	/* See if I can merge with the last range */
+	if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) {
+		unsigned long endk = basek + sizek;
+		state->range_sizek = endk - state->range_startk;
+		return;
+	}
+	/* Write the range mtrrs */
+	if (state->range_sizek != 0) {
+		range_to_mtrr_with_hole(state, basek);
+
+		state->range_startk = 0;
+		state->range_sizek = 0;
+	}
+	/* Allocate an msr */
+	state->range_startk = basek;
+	state->range_sizek  = sizek;
+}
+
+/* mininum size of mtrr block that can take hole */
+static u64 mtrr_chunk_size __initdata = (256ULL<<20);
+
+static int __init parse_mtrr_chunk_size_opt(char *p)
+{
+	if (!p)
+		return -EINVAL;
+	mtrr_chunk_size = memparse(p, &p);
+	return 0;
+}
+early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
+
+/* granity of mtrr of block */
+static u64 mtrr_gran_size __initdata = (64ULL<<20);
+
+static int __init parse_mtrr_gran_size_opt(char *p)
+{
+	if (!p)
+		return -EINVAL;
+	mtrr_gran_size = memparse(p, &p);
+	return 0;
+}
+early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
+
+static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, unsigned address_bits)
+{
+	struct var_mtrr_state var_state;
+	int i;
+
+	var_state.range_startk = 0;
+	var_state.range_sizek = 0;
+	var_state.reg = 0;
+	var_state.address_bits = address_bits;
+	var_state.chunk_sizek = mtrr_chunk_size >> 10;
+	var_state.gran_sizek = mtrr_gran_size >> 10;
+
+	/* Write the range etc */
+	for (i = 0; i < nr_range; i++)
+		set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1);
+
+	/* Write the last range */
+	range_to_mtrr_with_hole(&var_state, 0);
+	printk(KERN_INFO "DONE variable MTRRs\n");
+	/* Clear out the extra MTRR's */
+	while (var_state.reg < num_var_ranges)
+		set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits);
+}
+
+static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, unsigned long extra_remove_base, unsigned long extra_remove_size)
+{
+	unsigned long i, base, size;
+	mtrr_type type;
+
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		nr_range = add_range(range, nr_range, base, base + size - 1, 1);
+	}
+	printk(KERN_INFO "After WB checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* take out UC ranges */
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type != MTRR_TYPE_UNCACHABLE)
+			continue;
+		if (!size)
+			continue;
+		subtract_range(range, base, base + size - 1);
+	}
+	if (extra_remove_size)
+		subtract_range(range, extra_remove_base,  extra_remove_base + extra_remove_size  - 1);
+
+	/* get new range num */
+	nr_range = 0;
+	for (i = 0; i < RANGE_NUM; i++) {
+		if (!range[i].end)
+			continue;
+		nr_range++;
+	}
+	printk(KERN_INFO "After UC checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	printk(KERN_INFO "After sorting\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+
+	return nr_range;
+}
+
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+	unsigned long i, base, size, def, dummy;
+	mtrr_type type;
+	struct res_range range[RANGE_NUM];
+	int nr_range;
+	unsigned long extra_remove_base, extra_remove_size;
+
+	/* extra one for all 0 */
+	int num[MTRR_NUM_TYPES + 1];
+
+	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
+		return 0;
+	rdmsr(MTRRdefType_MSR, def, dummy);
+	def &= 0xff;
+	if (def != MTRR_TYPE_UNCACHABLE)
+		return 0;
+
+	/* check entries number */
+	memset(num, 0, sizeof(num));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		if (type >= MTRR_NUM_TYPES)
+			continue;
+		if (!size)
+			type = MTRR_NUM_TYPES;
+		num[type]++;
+	}
+
+	/* check if we got UC entries */
+	if (!num[MTRR_TYPE_UNCACHABLE])
+		return 0;
+
+	/* check if we only had WB and UC */
+	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+		num_var_ranges - num[MTRR_NUM_TYPES])
+		return 0;
+
+	memset(range, 0, sizeof(range));
+	extra_remove_size = 0;
+	if (mtrr_tom2) {
+		extra_remove_base = 1 << (32 - PAGE_SHIFT);
+		extra_remove_size = (mtrr_tom2>>PAGE_SHIFT) - extra_remove_base;
+	}
+	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size);
+
+	/* convert ranges to var ranges state */
+	x86_setup_var_mtrrs(range, nr_range, address_bits);
+
+	return 1;
+
+}
+
 static int disable_mtrr_trim;
 
 static int __init disable_mtrr_trim_setup(char *str)
@@ -729,18 +1176,21 @@ int __init mtrr_trim_uncached_memory(uns
  */
 void __init mtrr_bp_init(void)
 {
+	u32 phys_addr;
 	init_ifs();
 
+	phys_addr = 32;
+
 	if (cpu_has_mtrr) {
 		mtrr_if = &generic_mtrr_ops;
 		size_or_mask = 0xff000000;	/* 36 bits */
 		size_and_mask = 0x00f00000;
+		phys_addr = 36;
 
 		/* This is an AMD specific MSR, but we assume(hope?) that
 		   Intel will implement it to when they extend the address
 		   bus of the Xeon. */
 		if (cpuid_eax(0x80000000) >= 0x80000008) {
-			u32 phys_addr;
 			phys_addr = cpuid_eax(0x80000008) & 0xff;
 			/* CPUID workaround for Intel 0F33/0F34 CPU */
 			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -758,6 +1208,7 @@ void __init mtrr_bp_init(void)
 			   don't support PAE */
 			size_or_mask = 0xfff00000;	/* 32 bits */
 			size_and_mask = 0;
+			phys_addr = 32;
 		}
 	} else {
 		switch (boot_cpu_data.x86_vendor) {
@@ -791,8 +1242,13 @@ void __init mtrr_bp_init(void)
 	if (mtrr_if) {
 		set_num_var_ranges();
 		init_table();
-		if (use_intel())
+		if (use_intel()) {
 			get_mtrr_state();
+
+			if (mtrr_cleanup(phys_addr))
+				mtrr_if->set_all();
+
+		}
 	}
 }
 
@@ -829,9 +1285,10 @@ static int __init mtrr_init_finialize(vo
 {
 	if (!mtrr_if)
 		return 0;
-	if (use_intel())
-		mtrr_state_warn();
-	else {
+	if (use_intel()) {
+		if (enable_mtrr_cleanup < 1)
+			mtrr_state_warn();
+	} else {
 		/* The CPUs haven't MTRR and seem to not support SMP. They have
 		 * specific drivers, we use a tricky method to support
 		 * suspend/resume for them.
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/mtrr.h
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_conte
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
 void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
 
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
 void get_mtrr_state(void);
 
 extern void set_mtrr_ops(struct mtrr_ops * ops);
@@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if;
 #define use_intel()	(mtrr_if && mtrr_if->use_intel_if == 1)
 
 extern unsigned int num_var_ranges;
+extern u64 mtrr_tom2;
 
 void mtrr_state_warn(void);
 const char *mtrr_attrib_to_str(int x);
Index: linux-2.6/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.orig/Documentation/kernel-parameters.txt
+++ linux-2.6/Documentation/kernel-parameters.txt
@@ -595,6 +595,20 @@ and is between 256 and 4096 characters. 
 			See drivers/char/README.epca and
 			Documentation/digiepca.txt.
 
+	disable_mtrr_cleanup [X86]
+	enable_mtrr_cleanup [X86]
+			The kernel tries to adjust MTRR layout from continuous
+			to discrete, to make X server driver able to add WB
+			entry later. This parameter enables/disables that.
+
+	mtrr_chunk_size=nn[KMG] [X86]
+			used for mtrr cleanup. It is largest continous chunk
+			that could hold holes aka. UC entries.
+
+	mtrr_gran_size=nn[KMG] [X86]
+			used for mtrr cleanup. It is granity of mtrr block.
+			Big value could prevent small alignment use up MTRRs.
+
 	disable_mtrr_trim [X86, Intel and AMD only]
 			By default the kernel will trim any uncacheable
 			memory out of your available memory pool based on
Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig
+++ linux-2.6/arch/x86/Kconfig
@@ -1035,6 +1035,32 @@ config MTRR
 
 	  See <file:Documentation/mtrr.txt> for more information.
 
+config MTRR_SANITIZER
+	def_bool y
+	prompt "MTRR cleanup support"
+	depends on MTRR
+	help
+	  Convert MTRR layout from continuous to discrete, so some X driver
+	  could add WB entries.
+
+	  Say N here if you see bootup problems (boot crash, boot hang,
+	  spontaneous reboots).
+
+	  Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size
+	  could be used to send largest mtrr entry size for continuous block
+	  to hold holes (aka. UC entries)
+
+	  If unsure, say Y.
+
+config MTRR_SANITIZER_ENABLE_DEFAULT
+	def_bool y
+	prompt "Enable MTRR cleanup by default"
+	depends on MTRR_SANITIZER
+	help
+	  Enable mtrr cleanup by default
+
+	  If unsure, say Y.
+
 config X86_PAT
 	bool
 	prompt "x86 PAT support"

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-29 10:30             ` Yinghai Lu
@ 2008-04-29 10:56               ` Yinghai Lu
  2008-04-29 11:26                 ` Ingo Molnar
  2008-04-29 11:51                 ` Gabriel C
  0 siblings, 2 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-29 10:56 UTC (permalink / raw)
  To: Gabriel C
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

On Tue, Apr 29, 2008 at 3:30 AM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
> On Tue, Apr 29, 2008 at 2:47 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  > Yinghai Lu wrote:
>  >  > some BIOS like to use continus MTRR layout, and may X driver can not add
>  >  > WB entries for graphical cards when 4g or more RAM installed.
>  >  >
>  >  > the patch will change MTRR to discrete.
>  >  >
>  >  > mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
>  >  > default is 256m, could be set according to size of graphics card memory.
>  >  >
>  >  > v2: fix -1 for UC checking
>  >  > v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>  >  >     skip the var state change warning.
>  >  >     remove next_basek in range_to_mtrr()
>  >  > v4: correct warning mask.
>  >  > v5: CONFIG_MTRR_SANITIZER
>  >  > v6: fix 1g, 2g, 512 aligment with extra hole
>  >  > v7: gran_sizek to prevent running out of MTRRs.
>  >  >
>  >
>  >  With this version ( and patch http://lkml.org/lkml/2008/4/29/97 ) applyed on latest linus git tree
>  >  the box OOPS'es early.
>  >
>  >  Sorry I don't have time right now to write down the part of the OOPS I can see on monitor , I can try to find
>  >  some time later.
>  >
>  >  In any way OOPS'es on __free_one_page+0x191/0x21e
>
>  thanks. found one problem with hole_basek ...
>
>  will send you v8, and hope it will be last version.

please try v8, it should get rid of the 8m entry. it need patch
http://lkml.org/lkml/2008/4/29/97 too.

Thanks

Yinghai Lu

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-29 10:56               ` Yinghai Lu
@ 2008-04-29 11:26                 ` Ingo Molnar
  2008-04-29 11:51                 ` Gabriel C
  1 sibling, 0 replies; 89+ messages in thread
From: Ingo Molnar @ 2008-04-29 11:26 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Gabriel C, Andrew Morton, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> >  will send you v8, and hope it will be last version.
> 
> please try v8, it should get rid of the 8m entry. it need patch 
> http://lkml.org/lkml/2008/4/29/97 too.

thanks, applied both of them, in the right order and pushed out the 
tree.

	Ingo

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 2/2] x86: fix trimming e820 with MTRR holes.
  2008-04-29  8:59           ` [PATCH 2/2] x86: fix trimming e820 with MTRR holes Yinghai Lu
@ 2008-04-29 11:35             ` Ingo Molnar
  2008-04-29 17:18               ` Yinghai Lu
  2008-04-30  3:25             ` [PATCH] x86: fix trimming e820 with MTRR holes. - fix Yinghai Lu
  1 sibling, 1 reply; 89+ messages in thread
From: Ingo Molnar @ 2008-04-29 11:35 UTC (permalink / raw)
  To: yhlu.kernel
  Cc: Andrew Morton, H. Peter Anvin, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org


* Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:

> @@ -760,7 +762,10 @@ void __init update_memory_range(u64 star
>  			continue;
>  		add_memory_region(final_start, final_end - final_start,
>  					 new_type);
> +		real_updated_size += find_end - final_start;

s/find_end/final_end

i hope it boots ;-)

	Ingo

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-29 10:56               ` Yinghai Lu
  2008-04-29 11:26                 ` Ingo Molnar
@ 2008-04-29 11:51                 ` Gabriel C
  2008-04-29 17:11                   ` Yinghai Lu
  1 sibling, 1 reply; 89+ messages in thread
From: Gabriel C @ 2008-04-29 11:51 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

Yinghai Lu wrote:
> On Tue, Apr 29, 2008 at 3:30 AM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>> On Tue, Apr 29, 2008 at 2:47 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>>  > Yinghai Lu wrote:
>>  >  > some BIOS like to use continus MTRR layout, and may X driver can not add
>>  >  > WB entries for graphical cards when 4g or more RAM installed.
>>  >  >
>>  >  > the patch will change MTRR to discrete.
>>  >  >
>>  >  > mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
>>  >  > default is 256m, could be set according to size of graphics card memory.
>>  >  >
>>  >  > v2: fix -1 for UC checking
>>  >  > v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>>  >  >     skip the var state change warning.
>>  >  >     remove next_basek in range_to_mtrr()
>>  >  > v4: correct warning mask.
>>  >  > v5: CONFIG_MTRR_SANITIZER
>>  >  > v6: fix 1g, 2g, 512 aligment with extra hole
>>  >  > v7: gran_sizek to prevent running out of MTRRs.
>>  >  >
>>  >
>>  >  With this version ( and patch http://lkml.org/lkml/2008/4/29/97 ) applyed on latest linus git tree
>>  >  the box OOPS'es early.
>>  >
>>  >  Sorry I don't have time right now to write down the part of the OOPS I can see on monitor , I can try to find
>>  >  some time later.
>>  >
>>  >  In any way OOPS'es on __free_one_page+0x191/0x21e
>>
>>  thanks. found one problem with hole_basek ...
>>
>>  will send you v8, and hope it will be last version.
> 
> please try v8, it should get rid of the 8m entry. it need patch
> http://lkml.org/lkml/2008/4/29/97 too.

Box does boot with v8 but now I get that warning you fixed in v2 again =):

....
[    0.000000] Linux version 2.6.25-06058-ga01e035-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #805 SMP PREEMPT Tue Apr 29 13:04:49 CEST 2008
[    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
[    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
[    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
[    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
[    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800
[    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] range0: 0000000000000000 - 00000000c0000000
[    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
[    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
[    0.000000] range: 00000000c0000000 - 00000000d0000000
[    0.000000] Setting variable MTRR 2, base: 3072MB, range: 256MB, type WB
[    0.000000] hole: 00000000cc000000 - 00000000d0000000
[    0.000000] Setting variable MTRR 3, base: 3264MB, range: 64MB, type UC
[    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
[    0.000000] range0: 0000000100000000 - 0000000120000000
[    0.000000] Setting variable MTRR 4, base: 4096MB, range: 512MB, type WB
[    0.000000] range: 0000000120000000 - 0000000130000000
[    0.000000] Setting variable MTRR 5, base: 4608MB, range: 256MB, type WB
[    0.000000] hole: 000000012c000000 - 0000000130000000
[    0.000000] Setting variable MTRR 6, base: 4800MB, range: 64MB, type UC
[    0.000000] DONE variable MTRRs
[    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000d0000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000130000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
[    0.000000] ------------[ cut here ]------------
[    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x30c/0x35a()
[    0.000000] Modules linked in:
[    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
[    0.000000]
[    0.000000] Call Trace:
[    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
[    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
[    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
[    0.000000]  [<ffffffff8067fb02>] mtrr_trim_uncached_memory+0x30c/0x35a
[    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
[    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
[    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
[    0.000000]
[    0.000000] ---[ end trace ca143223eefdc828 ]---
[    0.000000] update e820 for mtrr -- holes
[    0.000000] modified physical RAM map:
[    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  modified: 0000000000100000 - 00000000cc000000 (usable)
[    0.000000]  modified: 00000000cc000000 - 00000000cf550000 (reserved)
[    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
[    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 835584) 3 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800
[    0.000000] init_memory_mapping 

...
                                                                                                                       > 
> Thanks
> 
> Yinghai Lu
> 


Gabriel

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v8
  2008-04-29 10:52           ` [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v8 Yinghai Lu
@ 2008-04-29 13:07             ` Ingo Molnar
  2008-04-29 17:25               ` Yinghai Lu
  2008-04-29 20:46             ` Randy Dunlap
  2008-04-30  3:25             ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v8 - fix Yinghai Lu
  2 siblings, 1 reply; 89+ messages in thread
From: Ingo Molnar @ 2008-04-29 13:07 UTC (permalink / raw)
  To: yhlu.kernel
  Cc: Andrew Morton, H. Peter Anvin, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org


a few minor cleanliness observations:

> +#ifdef CONFIG_MTRR_SANITIZER
> +
> +#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT
> +static int enable_mtrr_cleanup __initdata = 1;
> +#else
> +static int enable_mtrr_cleanup __initdata;
> +#endif
> +
> +#else
> +
> +static int enable_mtrr_cleanup __initdata = -1;
> +
> +#endif

this should be a single:

 #ifdef CONFIG_MTRR_SANITIZER
 static int mtrr_cleanup_enabled = CONFIG_MTRR_SANITIZER_DEFAULT;
 #endif

block.

> +#define RANGE_NUM 256

small explaination (comment) about what the limit means.

> +static int __init add_range(struct res_range *range, int nr_range, unsigned long start,
> +			      unsigned long end, int merge)

looks cleaner this way:

  static int __init
  add_range(struct res_range *range, int nr_range, unsigned long start,
            unsigned long end, int merge)

> +{
> +	int i;
> +
> +	if (!merge)
> +		goto addit;
> +
> +	/* try to merge it with old one */
> +	for (i = 0; i < nr_range; i++) {
> +		unsigned long final_start, final_end;
> +		unsigned long common_start, common_end;
> +
> +		if (!range[i].end)
> +			continue;
> +
> +		common_start = max(range[i].start, start);
> +		common_end = min(range[i].end, end);
> +		if (common_start > common_end + 1)
> +			continue;
> +
> +		final_start = min(range[i].start, start);
> +		final_end = max(range[i].end, end);
> +
> +		range[i].start = final_start;
> +		range[i].end =  final_end;
> +		return nr_range;
> +	}
> +
> +addit:

perhaps factor out the loop into a separate function and avoid the goto.

> +static void __init subtract_range(struct res_range *range, unsigned long start,
> +				unsigned long end)

should be:

 static void __init
 subtract_range(struct res_range *range, unsigned long start,
                unsigned long end)

> +	int i;
> +	int j;

can be:

	int i, j;

> +		}
> +
> +

stale newline.

> +		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {

should be some sort of more readable in_range() check?

> +			range[j].end = start - 1;
> +			continue;
> +		}
> +
> +		if (start > range[j].start && end < range[j].end) {
> +			/* find the new spare */
> +			for (i = 0; i < RANGE_NUM; i++) {
> +				if (range[i].end == 0)
> +					break;
> +			}
> +			if (i < RANGE_NUM) {
> +				range[i].end = range[j].end;
> +				range[i].start = end + 1;
> +			} else {
> +				printk(KERN_ERR "run of slot in ranges\n");
> +			}
> +			range[j].end = start - 1;
> +			continue;
> +		}
> +	}
> +}

> +struct var_mtrr_state {
> +	unsigned long range_startk, range_sizek;
> +	unsigned long chunk_sizek;
> +	unsigned long gran_sizek;
> +	unsigned int reg;
> +	unsigned address_bits;
> +};

s/unsigned address_bits/unsigned int address_bits/

also move range_sizek on a separate line.

plus we tend to align structures this way:

> +struct var_mtrr_state {
> +	unsigned long		range_startk;
> +	unsigned long		range_sizek;
> +	unsigned long		chunk_sizek;
> +	unsigned long		gran_sizek;
> +	unsigned int		reg;
> +	unsigned int		address_bits;
> +};

(to put the types and field names into a visually more consistent form)

> +static void __init set_var_mtrr(
> +	unsigned int reg, unsigned long basek, unsigned long sizek,
> +	unsigned char type, unsigned address_bits)

should be:

 static void __init
 set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
              unsigned char type, unsigned address_bits)

> +	u32 base_lo, base_hi, mask_lo, mask_hi;
> +	unsigned address_mask_high;

s/unsigned/unsigned int

hm, will this work on 64-bit? Above-4G is controlled via separate 
mechanisms though so i guess it does.

> +	address_mask_high = ((1u << (address_bits - 32u)) - 1u);

use alignment macros instead.

> +		unsigned long sizek;
> +		/* Compute the maximum size I can make a range */
> +		if (range_startk)

put extra newline between variable definition and code.

> +	var_state.range_startk = 0;
> +	var_state.range_sizek = 0;
> +	var_state.reg = 0;
> +	var_state.address_bits = address_bits;
> +	var_state.chunk_sizek = mtrr_chunk_size >> 10;
> +	var_state.gran_sizek = mtrr_gran_size >> 10;

initialization looks nicer with vertical alignment, i.e.:

> +	var_state.range_startk	= 0;
> +	var_state.range_sizek	= 0;
> +	var_state.reg		= 0;
> +	var_state.address_bits	= address_bits;
> +	var_state.chunk_sizek	= mtrr_chunk_size >> 10;
> +	var_state.gran_sizek	= mtrr_gran_size >> 10;

> +	/* Clear out the extra MTRR's */
> +	while (var_state.reg < num_var_ranges)
> +		set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits);

the ++ is a hard to notice side-effect of the loop. It's cleaner to 
separate it out or to have a for() loop for it.

> +static int __init mtrr_cleanup(unsigned address_bits)
> +{
> +	unsigned long i, base, size, def, dummy;
> +	mtrr_type type;
> +	struct res_range range[RANGE_NUM];
> +	int nr_range;
> +	unsigned long extra_remove_base, extra_remove_size;

try to use a 'christmas tree' ordering of variables, i.e.:

> +	unsigned long extra_remove_base, extra_remove_size;
> +	unsigned long i, base, size, def, dummy;
> +	struct res_range range[RANGE_NUM];
> +	mtrr_type type;
> +	int nr_range;

> +	return 1;
> +
> +}

superfluous newline.

all in one, this is a very useful and nice feature.

	Ingo

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-29 11:51                 ` Gabriel C
@ 2008-04-29 17:11                   ` Yinghai Lu
  2008-04-29 20:25                     ` Gabriel C
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-04-29 17:11 UTC (permalink / raw)
  To: Gabriel C
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

On Tue, Apr 29, 2008 at 4:51 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>
> Yinghai Lu wrote:
>  > On Tue, Apr 29, 2008 at 3:30 AM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>  >> On Tue, Apr 29, 2008 at 2:47 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >>  > Yinghai Lu wrote:
>  >>  >  > some BIOS like to use continus MTRR layout, and may X driver can not add
>  >>  >  > WB entries for graphical cards when 4g or more RAM installed.
>  >>  >  >
>  >>  >  > the patch will change MTRR to discrete.
>  >>  >  >
>  >>  >  > mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
>  >>  >  > default is 256m, could be set according to size of graphics card memory.
>  >>  >  >
>  >>  >  > v2: fix -1 for UC checking
>  >>  >  > v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>  >>  >  >     skip the var state change warning.
>  >>  >  >     remove next_basek in range_to_mtrr()
>  >>  >  > v4: correct warning mask.
>  >>  >  > v5: CONFIG_MTRR_SANITIZER
>  >>  >  > v6: fix 1g, 2g, 512 aligment with extra hole
>  >>  >  > v7: gran_sizek to prevent running out of MTRRs.
>  >>  >  >
>  >>  >
>  >>  >  With this version ( and patch http://lkml.org/lkml/2008/4/29/97 ) applyed on latest linus git tree
>  >>  >  the box OOPS'es early.
>  >>  >
>  >>  >  Sorry I don't have time right now to write down the part of the OOPS I can see on monitor , I can try to find
>  >>  >  some time later.
>  >>  >
>  >>  >  In any way OOPS'es on __free_one_page+0x191/0x21e
>  >>
>  >>  thanks. found one problem with hole_basek ...
>  >>
>  >>  will send you v8, and hope it will be last version.
>  >
>  > please try v8, it should get rid of the 8m entry. it need patch
>  > http://lkml.org/lkml/2008/4/29/97 too.
>
>  Box does boot with v8 but now I get that warning you fixed in v2 again =):
>
>  ....
>  [    0.000000] Linux version 2.6.25-06058-ga01e035-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #805 SMP PREEMPT Tue Apr 29 13:04:49 CEST 2008
>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
>  [    0.000000] BIOS-provided physical RAM map:
>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  [    0.000000] max_pfn_mapped = 1228800
>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  [    0.000000] After WB checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  [    0.000000] After UC checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  [    0.000000] After sorting
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  [    0.000000] range: 00000000c0000000 - 00000000d0000000
>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 256MB, type WB
>  [    0.000000] hole: 00000000cc000000 - 00000000d0000000
>  [    0.000000] Setting variable MTRR 3, base: 3264MB, range: 64MB, type UC
>  [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>  [    0.000000] Setting variable MTRR 4, base: 4096MB, range: 512MB, type WB
>  [    0.000000] range: 0000000120000000 - 0000000130000000
>  [    0.000000] Setting variable MTRR 5, base: 4608MB, range: 256MB, type WB
>  [    0.000000] hole: 000000012c000000 - 0000000130000000
>  [    0.000000] Setting variable MTRR 6, base: 4800MB, range: 64MB, type UC
>  [    0.000000] DONE variable MTRRs
>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  [    0.000000] After WB checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000d0000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000130000
>  [    0.000000] After UC checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  [    0.000000] After sorting
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.

so
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
===>
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000

please try
mtrr_chunk_size=512m mtrr_gran_size=128m
or
mtrr_chunk_size=256m mtrr_gran_size=128m

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 2/2] x86: fix trimming e820 with MTRR holes.
  2008-04-29 11:35             ` Ingo Molnar
@ 2008-04-29 17:18               ` Yinghai Lu
  2008-04-29 17:20                 ` Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-04-29 17:18 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, H. Peter Anvin, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org

On Tue, Apr 29, 2008 at 4:35 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
>  * Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:
>
>  > @@ -760,7 +762,10 @@ void __init update_memory_range(u64 star
>  >                       continue;
>  >               add_memory_region(final_start, final_end - final_start,
>  >                                        new_type);
>  > +             real_updated_size += find_end - final_start;
>
>  s/find_end/final_end
>
>  i hope it boots ;-)

thanks

that report trimmed size. so it still boot. but report wrong trimmed
size. it will upset Gabriel.

it passed compiling, ..., that's funny.

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 2/2] x86: fix trimming e820 with MTRR holes.
  2008-04-29 17:18               ` Yinghai Lu
@ 2008-04-29 17:20                 ` Yinghai Lu
  0 siblings, 0 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-29 17:20 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, H. Peter Anvin, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org

On Tue, Apr 29, 2008 at 10:18 AM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
> On Tue, Apr 29, 2008 at 4:35 AM, Ingo Molnar <mingo@elte.hu> wrote:
>  >
>  >  * Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:
>  >
>  >  > @@ -760,7 +762,10 @@ void __init update_memory_range(u64 star
>  >  >                       continue;
>  >  >               add_memory_region(final_start, final_end - final_start,
>  >  >                                        new_type);
>  >  > +             real_updated_size += find_end - final_start;
>  >
>  >  s/find_end/final_end
>  >
>  >  i hope it boots ;-)
>
>  thanks
>

that is in 32bit...

will send you new one.

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v8
  2008-04-29 13:07             ` Ingo Molnar
@ 2008-04-29 17:25               ` Yinghai Lu
  0 siblings, 0 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-29 17:25 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, H. Peter Anvin, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org

On Tue, Apr 29, 2008 at 6:07 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
>  a few minor cleanliness observations:
>
>
>  > +#ifdef CONFIG_MTRR_SANITIZER
>  > +
>  > +#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT
>  > +static int enable_mtrr_cleanup __initdata = 1;
>  > +#else
>  > +static int enable_mtrr_cleanup __initdata;
>  > +#endif
>  > +
>  > +#else
>  > +
>  > +static int enable_mtrr_cleanup __initdata = -1;
>  > +
>  > +#endif
>
>  this should be a single:
>
>   #ifdef CONFIG_MTRR_SANITIZER
>   static int mtrr_cleanup_enabled = CONFIG_MTRR_SANITIZER_DEFAULT;
>   #endif
>
>  block.
>
>  > +#define RANGE_NUM 256
>
>  small explaination (comment) about what the limit means.
>
>
>  > +static int __init add_range(struct res_range *range, int nr_range, unsigned long start,
>  > +                           unsigned long end, int merge)
>
>  looks cleaner this way:
>
>
>   static int __init
>   add_range(struct res_range *range, int nr_range, unsigned long start,
>             unsigned long end, int merge)
>
>  > +{
>  > +     int i;
>  > +
>  > +     if (!merge)
>  > +             goto addit;
>  > +
>  > +     /* try to merge it with old one */
>  > +     for (i = 0; i < nr_range; i++) {
>  > +             unsigned long final_start, final_end;
>  > +             unsigned long common_start, common_end;
>  > +
>  > +             if (!range[i].end)
>  > +                     continue;
>  > +
>  > +             common_start = max(range[i].start, start);
>  > +             common_end = min(range[i].end, end);
>  > +             if (common_start > common_end + 1)
>  > +                     continue;
>  > +
>  > +             final_start = min(range[i].start, start);
>  > +             final_end = max(range[i].end, end);
>  > +
>  > +             range[i].start = final_start;
>  > +             range[i].end =  final_end;
>  > +             return nr_range;
>  > +     }
>  > +
>  > +addit:
>
>  perhaps factor out the loop into a separate function and avoid the goto.
>
>
>  > +static void __init subtract_range(struct res_range *range, unsigned long start,
>  > +                             unsigned long end)
>
>  should be:
>
>
>   static void __init
>   subtract_range(struct res_range *range, unsigned long start,
>                 unsigned long end)
>
>  > +     int i;
>  > +     int j;
>
>  can be:
>
>         int i, j;
>
>  > +             }
>  > +
>  > +
>
>  stale newline.
>
>
>  > +             if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
>
>  should be some sort of more readable in_range() check?
>
>
>  > +                     range[j].end = start - 1;
>  > +                     continue;
>  > +             }
>  > +
>  > +             if (start > range[j].start && end < range[j].end) {
>  > +                     /* find the new spare */
>  > +                     for (i = 0; i < RANGE_NUM; i++) {
>  > +                             if (range[i].end == 0)
>  > +                                     break;
>  > +                     }
>  > +                     if (i < RANGE_NUM) {
>  > +                             range[i].end = range[j].end;
>  > +                             range[i].start = end + 1;
>  > +                     } else {
>  > +                             printk(KERN_ERR "run of slot in ranges\n");
>  > +                     }
>  > +                     range[j].end = start - 1;
>  > +                     continue;
>  > +             }
>  > +     }
>  > +}
>
>
> > +struct var_mtrr_state {
>  > +     unsigned long range_startk, range_sizek;
>  > +     unsigned long chunk_sizek;
>  > +     unsigned long gran_sizek;
>  > +     unsigned int reg;
>  > +     unsigned address_bits;
>  > +};
>
>  s/unsigned address_bits/unsigned int address_bits/
>
>  also move range_sizek on a separate line.
>
>  plus we tend to align structures this way:
>
>  > +struct var_mtrr_state {
>  > +     unsigned long           range_startk;
>  > +     unsigned long           range_sizek;
>
> > +     unsigned long           chunk_sizek;
>  > +     unsigned long           gran_sizek;
>  > +     unsigned int            reg;
>  > +     unsigned int            address_bits;
>  > +};
>
>  (to put the types and field names into a visually more consistent form)
>
>
>  > +static void __init set_var_mtrr(
>  > +     unsigned int reg, unsigned long basek, unsigned long sizek,
>  > +     unsigned char type, unsigned address_bits)
>
>  should be:
>
>
>   static void __init
>   set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
>               unsigned char type, unsigned address_bits)
>
>  > +     u32 base_lo, base_hi, mask_lo, mask_hi;
>  > +     unsigned address_mask_high;
>
>  s/unsigned/unsigned int
>
>  hm, will this work on 64-bit? Above-4G is controlled via separate
>  mechanisms though so i guess it does.
>
>
>  > +     address_mask_high = ((1u << (address_bits - 32u)) - 1u);
>
>  use alignment macros instead.
>
>
>  > +             unsigned long sizek;
>  > +             /* Compute the maximum size I can make a range */
>  > +             if (range_startk)
>
>  put extra newline between variable definition and code.
>
>
>  > +     var_state.range_startk = 0;
>  > +     var_state.range_sizek = 0;
>  > +     var_state.reg = 0;
>  > +     var_state.address_bits = address_bits;
>  > +     var_state.chunk_sizek = mtrr_chunk_size >> 10;
>  > +     var_state.gran_sizek = mtrr_gran_size >> 10;
>
>  initialization looks nicer with vertical alignment, i.e.:
>
>
>  > +     var_state.range_startk  = 0;
>  > +     var_state.range_sizek   = 0;
>  > +     var_state.reg           = 0;
>  > +     var_state.address_bits  = address_bits;
>  > +     var_state.chunk_sizek   = mtrr_chunk_size >> 10;
>  > +     var_state.gran_sizek    = mtrr_gran_size >> 10;
>
>
> > +     /* Clear out the extra MTRR's */
>  > +     while (var_state.reg < num_var_ranges)
>  > +             set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits);
>
>  the ++ is a hard to notice side-effect of the loop. It's cleaner to
>  separate it out or to have a for() loop for it.
>
>
>  > +static int __init mtrr_cleanup(unsigned address_bits)
>  > +{
>  > +     unsigned long i, base, size, def, dummy;
>  > +     mtrr_type type;
>  > +     struct res_range range[RANGE_NUM];
>  > +     int nr_range;
>  > +     unsigned long extra_remove_base, extra_remove_size;
>
>  try to use a 'christmas tree' ordering of variables, i.e.:
>
>
>  > +     unsigned long extra_remove_base, extra_remove_size;
>
> > +     unsigned long i, base, size, def, dummy;
>
> > +     struct res_range range[RANGE_NUM];
>  > +     mtrr_type type;
>  > +     int nr_range;
>
>  > +     return 1;
>  > +
>  > +}
>
>  superfluous newline.
>
>  all in one, this is a very useful and nice feature.

thanks. will submit a new one with fix.

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5
  2008-04-28 22:05       ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5 Yinghai Lu
                           ` (3 preceding siblings ...)
  2008-04-29  9:00         ` [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7 Yinghai Lu
@ 2008-04-29 19:00         ` Eric W. Biederman
  2008-04-29 20:04           ` Yinghai Lu
  4 siblings, 1 reply; 89+ messages in thread
From: Eric W. Biederman @ 2008-04-29 19:00 UTC (permalink / raw)
  To: yhlu.kernel
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, linux-kernel@vger.kernel.org, Mika Fischer

Yinghai Lu <yhlu.kernel.send@gmail.com> writes:

> some BIOS like to use continus MTRR layout, and may X driver can not add
> WB entries for graphical cards when 4g or more RAM installed.
>
> the patch will change MTRR to discrete.
>
> mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
> default is 256m, could be set according to size of graphics card memory.
>
> v2: fix -1 for UC checking
> v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>     skip the var state change warning.
>     remove next_basek in range_to_mtrr()
> v4: correct warning mask.
> v5: CONFIG_MTRR_SANITIZER
>
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Skimming through the code it looks fairly sane.

I do think it would be good to split this patch into two pieces.
1) The mtrr rewriter/sanitizer/normalize.
   All it does it should do is rewrite the MTRRs with a
   semantically equivalent value.  This code should always be
   safe and work on any system with MTRRs.

   This works around otherwise sane bios's that simply prefer
   to have contiguous MTRRs.

   I don't see a reason why this code should be configurable.

   This approach avoids earlier concerns because it starts
   with the existing MTRR layout and not with the e820 map.

2) The mtrr_chunk_size code that rounds things off and allows
   us to use discrete MTRRs by reducing some RAM to uncacheable.
   Because it makes things uncacheable it has potentially bad
   side effects on performance and thus potentially bad side
   effects on functionality.  For areas like the SMM and ACPI
   especially as they usually occur at the end of RAM just
   below 4G.

   The chunk size code should be configurable and default to off
   because it has potential side effects.  A KConfig option may
   also be appropriate.  It asks an interesting trade off question
   do you want your BIOS to be fast or X.


Eric

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5
  2008-04-29 19:00         ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5 Eric W. Biederman
@ 2008-04-29 20:04           ` Yinghai Lu
  2008-04-29 20:29             ` Eric W. Biederman
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-04-29 20:04 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, linux-kernel@vger.kernel.org, Mika Fischer

On Tue, Apr 29, 2008 at 12:00 PM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Yinghai Lu <yhlu.kernel.send@gmail.com> writes:
>
>  > some BIOS like to use continus MTRR layout, and may X driver can not add
>  > WB entries for graphical cards when 4g or more RAM installed.
>  >
>  > the patch will change MTRR to discrete.
>  >
>  > mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
>  > default is 256m, could be set according to size of graphics card memory.
>  >
>  > v2: fix -1 for UC checking
>  > v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>  >     skip the var state change warning.
>  >     remove next_basek in range_to_mtrr()
>  > v4: correct warning mask.
>  > v5: CONFIG_MTRR_SANITIZER
>  >
>  > Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
>
>  Skimming through the code it looks fairly sane.
>
>  I do think it would be good to split this patch into two pieces.
>  1) The mtrr rewriter/sanitizer/normalize.
>    All it does it should do is rewrite the MTRRs with a
>    semantically equivalent value.  This code should always be
>    safe and work on any system with MTRRs.
>
>    This works around otherwise sane bios's that simply prefer
>    to have contiguous MTRRs.
>
>    I don't see a reason why this code should be configurable.
>
>    This approach avoids earlier concerns because it starts
>    with the existing MTRR layout and not with the e820 map.
>
>  2) The mtrr_chunk_size code that rounds things off and allows
>    us to use discrete MTRRs by reducing some RAM to uncacheable.
>    Because it makes things uncacheable it has potentially bad
>    side effects on performance and thus potentially bad side
>    effects on functionality.  For areas like the SMM and ACPI
>    especially as they usually occur at the end of RAM just
>    below 4G.
>
>    The chunk size code should be configurable and default to off
>    because it has potential side effects.  A KConfig option may
>    also be appropriate.  It asks an interesting trade off question
>    do you want your BIOS to be fast or X.

(less memory + fast X) or  (more 8M RAM + slow...)

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-29 17:11                   ` Yinghai Lu
@ 2008-04-29 20:25                     ` Gabriel C
  2008-04-29 21:49                       ` Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: Gabriel C @ 2008-04-29 20:25 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

Yinghai Lu wrote:
> On Tue, Apr 29, 2008 at 4:51 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>> Yinghai Lu wrote:
>>  > On Tue, Apr 29, 2008 at 3:30 AM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>>  >> On Tue, Apr 29, 2008 at 2:47 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>>  >>  > Yinghai Lu wrote:
>>  >>  >  > some BIOS like to use continus MTRR layout, and may X driver can not add
>>  >>  >  > WB entries for graphical cards when 4g or more RAM installed.
>>  >>  >  >
>>  >>  >  > the patch will change MTRR to discrete.
>>  >>  >  >
>>  >>  >  > mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
>>  >>  >  > default is 256m, could be set according to size of graphics card memory.
>>  >>  >  >
>>  >>  >  > v2: fix -1 for UC checking
>>  >>  >  > v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>>  >>  >  >     skip the var state change warning.
>>  >>  >  >     remove next_basek in range_to_mtrr()
>>  >>  >  > v4: correct warning mask.
>>  >>  >  > v5: CONFIG_MTRR_SANITIZER
>>  >>  >  > v6: fix 1g, 2g, 512 aligment with extra hole
>>  >>  >  > v7: gran_sizek to prevent running out of MTRRs.
>>  >>  >  >
>>  >>  >
>>  >>  >  With this version ( and patch http://lkml.org/lkml/2008/4/29/97 ) applyed on latest linus git tree
>>  >>  >  the box OOPS'es early.
>>  >>  >
>>  >>  >  Sorry I don't have time right now to write down the part of the OOPS I can see on monitor , I can try to find
>>  >>  >  some time later.
>>  >>  >
>>  >>  >  In any way OOPS'es on __free_one_page+0x191/0x21e
>>  >>
>>  >>  thanks. found one problem with hole_basek ...
>>  >>
>>  >>  will send you v8, and hope it will be last version.
>>  >
>>  > please try v8, it should get rid of the 8m entry. it need patch
>>  > http://lkml.org/lkml/2008/4/29/97 too.
>>
>>  Box does boot with v8 but now I get that warning you fixed in v2 again =):
>>
>>  ....
>>  [    0.000000] Linux version 2.6.25-06058-ga01e035-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #805 SMP PREEMPT Tue Apr 29 13:04:49 CEST 2008
>>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
>>  [    0.000000] BIOS-provided physical RAM map:
>>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>>  [    0.000000] max_pfn_mapped = 1228800
>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>>  [    0.000000] After WB checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>>  [    0.000000] After UC checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  [    0.000000] After sorting
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>>  [    0.000000] range: 00000000c0000000 - 00000000d0000000
>>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 256MB, type WB
>>  [    0.000000] hole: 00000000cc000000 - 00000000d0000000
>>  [    0.000000] Setting variable MTRR 3, base: 3264MB, range: 64MB, type UC
>>  [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>>  [    0.000000] Setting variable MTRR 4, base: 4096MB, range: 512MB, type WB
>>  [    0.000000] range: 0000000120000000 - 0000000130000000
>>  [    0.000000] Setting variable MTRR 5, base: 4608MB, range: 256MB, type WB
>>  [    0.000000] hole: 000000012c000000 - 0000000130000000
>>  [    0.000000] Setting variable MTRR 6, base: 4800MB, range: 64MB, type UC
>>  [    0.000000] DONE variable MTRRs
>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>>  [    0.000000] After WB checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000d0000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000130000
>>  [    0.000000] After UC checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  [    0.000000] After sorting
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
> 
> so
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
> ===>
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
> 
> please try

Now I have 2 warnings 

> mtrr_chunk_size=512m mtrr_gran_size=128m

...

[    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_chunk_size=512m mtrr_gran_size=128m 3
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
[    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
[    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
[    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
[    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800
[    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] range0: 0000000000000000 - 00000000c0000000
[    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
[    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
[    0.000000] range: 00000000c0000000 - 00000000c8000000
[    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
[    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
[    0.000000] range0: 0000000100000000 - 0000000120000000
[    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
[    0.000000] range: 0000000120000000 - 0000000128000000
[    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
[    0.000000] DONE variable MTRRs
[    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
[    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 64MB of RAM.
[    0.000000] ------------[ cut here ]------------
[    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1206 mtrr_trim_uncached_memory+0x25b/0x35a()
[    0.000000] Modules linked in:
[    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
[    0.000000]
[    0.000000] Call Trace:
[    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
[    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
[    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
[    0.000000]  [<ffffffff8067fa51>] mtrr_trim_uncached_memory+0x25b/0x35a
[    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
[    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
[    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
[    0.000000]
[    0.000000] ---[ end trace ca143223eefdc828 ]---
[    0.000000] update e820 for mtrr -- end_pfn
[    0.000000] modified physical RAM map:
[    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
[    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
[    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
[    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
[    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
[    0.000000] ------------[ cut here ]------------
[    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x30c/0x35a()
[    0.000000] Modules linked in:
[    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
[    0.000000]
[    0.000000] Call Trace:
[    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
[    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
[    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
[    0.000000]  [<ffffffff8067fb02>] mtrr_trim_uncached_memory+0x30c/0x35a
[    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
[    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
[    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
[    0.000000]
[    0.000000] ---[ end trace ca143223eefdc828 ]---
[    0.000000] update e820 for mtrr -- holes
[    0.000000] modified physical RAM map:
[    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
[    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
[    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
[    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800

...

> or
> mtrr_chunk_size=256m mtrr_gran_size=128m

...

[    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_chunk_size=256m mtrr_gran_size=128m 3
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
[    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
[    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
[    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
[    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800
[    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] range0: 0000000000000000 - 00000000c0000000
[    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
[    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
[    0.000000] range: 00000000c0000000 - 00000000c8000000
[    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
[    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
[    0.000000] range0: 0000000100000000 - 0000000120000000
[    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
[    0.000000] range: 0000000120000000 - 0000000128000000
[    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
[    0.000000] DONE variable MTRRs
[    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
[    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 64MB of RAM.
[    0.000000] ------------[ cut here ]------------
[    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1206 mtrr_trim_uncached_memory+0x25b/0x35a()
[    0.000000] Modules linked in:
[    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
[    0.000000]
[    0.000000] Call Trace:
[    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
[    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
[    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
[    0.000000]  [<ffffffff8067fa51>] mtrr_trim_uncached_memory+0x25b/0x35a
[    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
[    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
[    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
[    0.000000]
[    0.000000] ---[ end trace ca143223eefdc828 ]---
[    0.000000] update e820 for mtrr -- end_pfn
[    0.000000] modified physical RAM map:
[    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
[    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
[    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
[    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
[    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
[    0.000000] ------------[ cut here ]------------
[    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x30c/0x35a()
[    0.000000] Modules linked in:
[    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
[    0.000000]
[    0.000000] Call Trace:
[    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
[    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
[    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
[    0.000000]  [<ffffffff8067fb02>] mtrr_trim_uncached_memory+0x30c/0x35a
[    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
[    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
[    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
[    0.000000]
[    0.000000] ---[ end trace ca143223eefdc828 ]---
[    0.000000] update e820 for mtrr -- holes
[    0.000000] modified physical RAM map:
[    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
[    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
[    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
[    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800

...

> 
> YH
> 


Gabriel

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5
  2008-04-29 20:04           ` Yinghai Lu
@ 2008-04-29 20:29             ` Eric W. Biederman
  2008-04-29 21:57               ` Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: Eric W. Biederman @ 2008-04-29 20:29 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, linux-kernel@vger.kernel.org, Mika Fischer

"Yinghai Lu" <yhlu.kernel@gmail.com> writes:

> (less memory + fast X) or  (more 8M RAM + slow...)

Yes. That is the basic question.  Not all X drivers need it and
potentially the current kernel drm modules can use the
PAT infrastructure that has been merged.

Further a SMM monitor running 100 times or more slower may cause
problems if SMM mode is entered frequently, slowing down the entire
system not just X.

So if you don't have X or you have a crazy SMM monitor this can
be an issue.

Eric

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v8
  2008-04-29 10:52           ` [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v8 Yinghai Lu
  2008-04-29 13:07             ` Ingo Molnar
@ 2008-04-29 20:46             ` Randy Dunlap
  2008-04-29 21:54               ` Yinghai Lu
  2008-04-30  3:25             ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v8 - fix Yinghai Lu
  2 siblings, 1 reply; 89+ messages in thread
From: Randy Dunlap @ 2008-04-29 20:46 UTC (permalink / raw)
  To: yhlu.kernel
  Cc: Yinghai Lu, Andrew Morton, Ingo Molnar, H. Peter Anvin,
	Thomas Gleixner, Gabriel C, Mika Fischer,
	linux-kernel@vger.kernel.org

On Tue, 29 Apr 2008 03:52:33 -0700 Yinghai Lu wrote:

> Index: linux-2.6/Documentation/kernel-parameters.txt
> ===================================================================
> --- linux-2.6.orig/Documentation/kernel-parameters.txt
> +++ linux-2.6/Documentation/kernel-parameters.txt
> @@ -595,6 +595,20 @@ and is between 256 and 4096 characters. 
>  			See drivers/char/README.epca and
>  			Documentation/digiepca.txt.
>  
> +	disable_mtrr_cleanup [X86]
> +	enable_mtrr_cleanup [X86]
> +			The kernel tries to adjust MTRR layout from continuous
> +			to discrete, to make X server driver able to add WB
> +			entry later. This parameter enables/disables that.
> +
> +	mtrr_chunk_size=nn[KMG] [X86]
> +			used for mtrr cleanup. It is largest continous chunk
> +			that could hold holes aka. UC entries.
> +
> +	mtrr_gran_size=nn[KMG] [X86]
> +			used for mtrr cleanup. It is granity of mtrr block.

s/granity/granularity/
I think that's what you mean/want.

How does someone know that size/granity/whatever of an mtrr block?


> +			Big value could prevent small alignment use up MTRRs.
> +
>  	disable_mtrr_trim [X86, Intel and AMD only]
>  			By default the kernel will trim any uncacheable
>  			memory out of your available memory pool based on

---
~Randy

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-29 20:25                     ` Gabriel C
@ 2008-04-29 21:49                       ` Yinghai Lu
  2008-04-29 23:56                         ` Gabriel C
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-04-29 21:49 UTC (permalink / raw)
  To: Gabriel C
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

[-- Attachment #1: Type: text/plain, Size: 13885 bytes --]

On Tue, Apr 29, 2008 at 1:25 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>
> Yinghai Lu wrote:
>  > On Tue, Apr 29, 2008 at 4:51 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >> Yinghai Lu wrote:
>  >>  > On Tue, Apr 29, 2008 at 3:30 AM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>  >>  >> On Tue, Apr 29, 2008 at 2:47 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >>  >>  > Yinghai Lu wrote:
>  >>  >>  >  > some BIOS like to use continus MTRR layout, and may X driver can not add
>  >>  >>  >  > WB entries for graphical cards when 4g or more RAM installed.
>  >>  >>  >  >
>  >>  >>  >  > the patch will change MTRR to discrete.
>  >>  >>  >  >
>  >>  >>  >  > mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
>  >>  >>  >  > default is 256m, could be set according to size of graphics card memory.
>  >>  >>  >  >
>  >>  >>  >  > v2: fix -1 for UC checking
>  >>  >>  >  > v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>  >>  >>  >  >     skip the var state change warning.
>  >>  >>  >  >     remove next_basek in range_to_mtrr()
>  >>  >>  >  > v4: correct warning mask.
>  >>  >>  >  > v5: CONFIG_MTRR_SANITIZER
>  >>  >>  >  > v6: fix 1g, 2g, 512 aligment with extra hole
>  >>  >>  >  > v7: gran_sizek to prevent running out of MTRRs.
>  >>  >>  >  >
>  >>  >>  >
>  >>  >>  >  With this version ( and patch http://lkml.org/lkml/2008/4/29/97 ) applyed on latest linus git tree
>  >>  >>  >  the box OOPS'es early.
>  >>  >>  >
>  >>  >>  >  Sorry I don't have time right now to write down the part of the OOPS I can see on monitor , I can try to find
>  >>  >>  >  some time later.
>  >>  >>  >
>  >>  >>  >  In any way OOPS'es on __free_one_page+0x191/0x21e
>  >>  >>
>  >>  >>  thanks. found one problem with hole_basek ...
>  >>  >>
>  >>  >>  will send you v8, and hope it will be last version.
>  >>  >
>  >>  > please try v8, it should get rid of the 8m entry. it need patch
>  >>  > http://lkml.org/lkml/2008/4/29/97 too.
>  >>
>  >>  Box does boot with v8 but now I get that warning you fixed in v2 again =):
>  >>
>  >>  ....
>  >>  [    0.000000] Linux version 2.6.25-06058-ga01e035-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #805 SMP PREEMPT Tue Apr 29 13:04:49 CEST 2008
>  >>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
>  >>  [    0.000000] BIOS-provided physical RAM map:
>  >>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  >>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  >>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  >>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  >>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  >>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  >>  [    0.000000] max_pfn_mapped = 1228800
>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  >>  [    0.000000] After WB checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  >>  [    0.000000] After UC checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  [    0.000000] After sorting
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  >>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  >>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  >>  [    0.000000] range: 00000000c0000000 - 00000000d0000000
>  >>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 256MB, type WB
>  >>  [    0.000000] hole: 00000000cc000000 - 00000000d0000000
>  >>  [    0.000000] Setting variable MTRR 3, base: 3264MB, range: 64MB, type UC
>  >>  [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>  >>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>  >>  [    0.000000] Setting variable MTRR 4, base: 4096MB, range: 512MB, type WB
>  >>  [    0.000000] range: 0000000120000000 - 0000000130000000
>  >>  [    0.000000] Setting variable MTRR 5, base: 4608MB, range: 256MB, type WB
>  >>  [    0.000000] hole: 000000012c000000 - 0000000130000000
>  >>  [    0.000000] Setting variable MTRR 6, base: 4800MB, range: 64MB, type UC
>  >>  [    0.000000] DONE variable MTRRs
>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  >>  [    0.000000] After WB checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000d0000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000130000
>  >>  [    0.000000] After UC checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  [    0.000000] After sorting
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>  >
>  > so
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  > ===>
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >
>  > please try
>
>  Now I have 2 warnings
>
>
>  > mtrr_chunk_size=512m mtrr_gran_size=128m
>
>  ...
>
>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_chunk_size=512m mtrr_gran_size=128m 3
>
>
> [    0.000000] BIOS-provided physical RAM map:
>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  [    0.000000] max_pfn_mapped = 1228800
>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  [    0.000000] After WB checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  [    0.000000] After UC checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  [    0.000000] After sorting
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  [    0.000000] range: 00000000c0000000 - 00000000c8000000
>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>
> [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>  [    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
>  [    0.000000] range: 0000000120000000 - 0000000128000000
>  [    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
>
> [    0.000000] DONE variable MTRRs
>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  [    0.000000] After WB checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>
> [    0.000000] After UC checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  [    0.000000] After sorting
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 64MB of RAM.
>  [    0.000000] ------------[ cut here ]------------
>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1206 mtrr_trim_uncached_memory+0x25b/0x35a()
>
> [    0.000000] Modules linked in:
>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
>  [    0.000000]
>  [    0.000000] Call Trace:
>  [    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
>  [    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
>  [    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
>  [    0.000000]  [<ffffffff8067fa51>] mtrr_trim_uncached_memory+0x25b/0x35a
>
> [    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
>  [    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
>  [    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
>  [    0.000000]
>  [    0.000000] ---[ end trace ca143223eefdc828 ]---
>  [    0.000000] update e820 for mtrr -- end_pfn
>  [    0.000000] modified physical RAM map:
>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  [    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  [    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
>  [    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
>  [    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
>
> [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>
> [    0.000000] ------------[ cut here ]------------
>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x30c/0x35a()
>  [    0.000000] Modules linked in:
>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
>  [    0.000000]
>  [    0.000000] Call Trace:
>  [    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
>  [    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
>  [    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
>  [    0.000000]  [<ffffffff8067fb02>] mtrr_trim_uncached_memory+0x30c/0x35a
>  [    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
>  [    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
>  [    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
>  [    0.000000]
>  [    0.000000] ---[ end trace ca143223eefdc828 ]---
>  [    0.000000] update e820 for mtrr -- holes
>  [    0.000000] modified physical RAM map:
>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  [    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
>  [    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>
> [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used

please try attached trim_holes_fix.patch..., it will fix the trim hole problem.

then check if X server works well.

then try mtrr_cleanup_fix.patch for... ==> address ingo request about style etc.

Thanks

Yinghai Lu

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: trim_holes_fix.patch --]
[-- Type: text/x-patch; name=trim_holes_fix.patch, Size: 573 bytes --]

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -1214,7 +1230,7 @@ int __init mtrr_trim_uncached_memory(uns
 	if (range[0].start)
 		total_real_trim_size += real_trim_memory(0, range[0].start);
 
-	for (i = 0; i < nr_range - 1; i--) {
+	for (i = 0; i < nr_range - 1; i++) {
 		if (range[i].end + 1 < range[i+1].start)
 			total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start);
 	}

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: mtrr_cleanup_fix.patch --]
[-- Type: text/x-patch; name=mtrr_cleanup_fix.patch, Size: 10197 bytes --]

Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig
+++ linux-2.6/arch/x86/Kconfig
@@ -1092,13 +1092,12 @@ config MTRR_SANITIZER
 	  If unsure, say Y.
 
 config MTRR_SANITIZER_ENABLE_DEFAULT
-	def_bool y
-	prompt "Enable MTRR cleanup by default"
+	int "MTRR cleanup enable value (0-1)"
+	range 0 1
+	default "1"
 	depends on MTRR_SANITIZER
 	help
-	  Enable mtrr cleanup by default
-
-	  If unsure, say Y.
+	  Enable mtrr cleanup default value
 
 config X86_PAT
 	bool
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -611,17 +611,9 @@ static struct sysdev_driver mtrr_sysdev_
 };
 
 #ifdef CONFIG_MTRR_SANITIZER
-
-#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT
-static int enable_mtrr_cleanup __initdata = 1;
-#else
-static int enable_mtrr_cleanup __initdata;
-#endif
-
+static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
 #else
-
 static int enable_mtrr_cleanup __initdata = -1;
-
 #endif
 
 static int __init disable_mtrr_cleanup_setup(char *str)
@@ -640,6 +632,7 @@ static int __init enable_mtrr_cleanup_se
 }
 early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
 
+/* should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM 256
 
 struct res_range {
@@ -647,13 +640,27 @@ struct res_range {
 	unsigned long end;
 };
 
-static int __init add_range(struct res_range *range, int nr_range, unsigned long start,
-			      unsigned long end, int merge)
+static int __init
+add_range(struct res_range *range, int nr_range, unsigned long start,
+			      unsigned long end)
 {
-	int i;
+	/* out of slots */
+	if (nr_range >= RANGE_NUM)
+		return nr_range;
 
-	if (!merge)
-		goto addit;
+	range[nr_range].start = start;
+	range[nr_range].end = end;
+
+	nr_range++;
+
+	return nr_range;
+}
+
+static int __init
+add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
+			      unsigned long end)
+{
+	int i;
 
 	/* try to merge it with old one */
 	for (i = 0; i < nr_range; i++) {
@@ -676,24 +683,14 @@ static int __init add_range(struct res_r
 		return nr_range;
 	}
 
-addit:
 	/* need to add that */
-	if (nr_range >= RANGE_NUM)
-		return nr_range;
-
-	range[nr_range].start = start;
-	range[nr_range].end = end;
-
-	nr_range++;
-
-	return nr_range;
-
+	return add_range(range, nr_range, start, end);
 }
-static void __init subtract_range(struct res_range *range, unsigned long start,
-				unsigned long end)
+
+static void __init
+subtract_range(struct res_range *range, unsigned long start, unsigned long end)
 {
-	int i;
-	int j;
+	int i, j;
 
 	for (j = 0; j < RANGE_NUM; j++) {
 		if (!range[j].end)
@@ -747,46 +744,47 @@ static int __init cmp_range(const void *
 }
 
 struct var_mtrr_state {
-	unsigned long range_startk, range_sizek;
-	unsigned long chunk_sizek;
-	unsigned long gran_sizek;
-	unsigned int reg;
-	unsigned address_bits;
+	unsigned long	range_startk;
+	unsigned long	range_sizek;
+	unsigned long	chunk_sizek;
+	unsigned long	gran_sizek;
+	unsigned int	reg;
+	unsigned int	address_bits;
 };
 
-static void __init set_var_mtrr(
-	unsigned int reg, unsigned long basek, unsigned long sizek,
-	unsigned char type, unsigned address_bits)
+static void __init
+set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+		unsigned char type, unsigned address_bits)
 {
 	u32 base_lo, base_hi, mask_lo, mask_hi;
-	unsigned address_mask_high;
+	u64 base, mask;
 
 	if (!sizek) {
 		fill_mtrr_var_range(reg, 0, 0, 0, 0);
 		return;
 	}
 
-	address_mask_high = ((1u << (address_bits - 32u)) - 1u);
+	mask = (1ULL << address_bits) - 1;
+	mask &= ~((((u64)sizek) << 10) - 1);
 
-	base_hi = basek >> 22;
-	base_lo  = basek << 10;
+	base  = ((u64)basek) << 10;
 
-	if (sizek < 4*1024*1024) {
-		mask_hi = address_mask_high;
-		mask_lo = ~((sizek << 10) - 1);
-	} else {
-		mask_hi = address_mask_high & (~((sizek >> 22) - 1));
-		mask_lo = 0;
-	}
+	base |= type;
+	mask |= 0x800;
+
+	base_lo = base & ((1ULL<<32) - 1);
+	base_hi = base >> 32;
+
+	mask_lo = mask & ((1ULL<<32) - 1);
+	mask_hi = mask >> 32;
 
-	base_lo |= type;
-	mask_lo |= 0x800;
 	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
 }
 
-static unsigned int __init range_to_mtrr(unsigned int reg,
-	unsigned long range_startk, unsigned long range_sizek,
-	unsigned char type, unsigned address_bits)
+static unsigned int __init
+range_to_mtrr(unsigned int reg, unsigned long range_startk,
+	      unsigned long range_sizek, unsigned char type,
+	      unsigned address_bits)
 {
 	if (!range_sizek || (reg >= num_var_ranges))
 		return reg;
@@ -794,6 +792,7 @@ static unsigned int __init range_to_mtrr
 	while (range_sizek) {
 		unsigned long max_align, align;
 		unsigned long sizek;
+
 		/* Compute the maximum size I can make a range */
 		if (range_startk)
 			max_align = ffs(range_startk) - 1;
@@ -818,7 +817,8 @@ static unsigned int __init range_to_mtrr
 	return reg;
 }
 
-static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
+static void __init
+range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
 {
 	unsigned long hole_basek, hole_sizek;
 	unsigned long range0_basek, range0_sizek;
@@ -848,23 +848,31 @@ static void __init range_to_mtrr_with_ho
 	/* try to append some small hole */
 	range0_basek = state->range_startk;
 	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
-	if ((range0_sizek == state->range_sizek) ||
-	    ((range0_basek + range0_sizek - chunk_sizek > basek) && basek)) {
+	if (range0_sizek == state->range_sizek) {
 			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10);
 			state->reg = range_to_mtrr(state->reg, range0_basek,
 				state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
 		return;
+	} else if (basek) {
+	    while (range0_basek + range0_sizek - chunk_sizek > basek) {
+		range0_sizek -= chunk_sizek;
+		if (!range0_sizek)
+			break;
+	    }
 	}
 
 
-	range0_sizek -= chunk_sizek;
+	if (range0_sizek > chunk_sizek)
+		range0_sizek -= chunk_sizek;
 	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
 	state->reg = range_to_mtrr(state->reg, range0_basek,
 			range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
 
 	range_basek = range0_basek + range0_sizek;
 	range_sizek = chunk_sizek;
-	if (range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) {
+
+	if ((range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) &&
+	    (range_basek + range_sizek <= basek)) {
 		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
 		hole_basek = range_basek + range_sizek - hole_sizek;
 	} else
@@ -880,7 +888,9 @@ static void __init range_to_mtrr_with_ho
 	}
 }
 
-static void __init set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, unsigned long size_pfn)
+static void __init
+set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
+		   unsigned long size_pfn)
 {
 	unsigned long basek, sizek;
 
@@ -921,7 +931,7 @@ static int __init parse_mtrr_chunk_size_
 early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
 
 /* granity of mtrr of block */
-static u64 mtrr_gran_size __initdata = (64ULL<<20);
+static u64 mtrr_gran_size __initdata = (1ULL<<20);
 
 static int __init parse_mtrr_gran_size_opt(char *p)
 {
@@ -932,17 +942,19 @@ static int __init parse_mtrr_gran_size_o
 }
 early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
 
-static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, unsigned address_bits)
+static void __init
+x86_setup_var_mtrrs(struct res_range *range, int nr_range,
+		    unsigned address_bits)
 {
 	struct var_mtrr_state var_state;
 	int i;
 
-	var_state.range_startk = 0;
-	var_state.range_sizek = 0;
-	var_state.reg = 0;
-	var_state.address_bits = address_bits;
-	var_state.chunk_sizek = mtrr_chunk_size >> 10;
-	var_state.gran_sizek = mtrr_gran_size >> 10;
+	var_state.range_startk	= 0;
+	var_state.range_sizek	= 0;
+	var_state.reg		= 0;
+	var_state.address_bits	= address_bits;
+	var_state.chunk_sizek	= mtrr_chunk_size >> 10;
+	var_state.gran_sizek	= mtrr_gran_size >> 10;
 
 	/* Write the range etc */
 	for (i = 0; i < nr_range; i++)
@@ -952,11 +964,16 @@ static void __init x86_setup_var_mtrrs(s
 	range_to_mtrr_with_hole(&var_state, 0);
 	printk(KERN_INFO "DONE variable MTRRs\n");
 	/* Clear out the extra MTRR's */
-	while (var_state.reg < num_var_ranges)
-		set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits);
+	while (var_state.reg < num_var_ranges) {
+		set_var_mtrr(var_state.reg, 0, 0, 0, var_state.address_bits);
+		var_state.reg++;
+	}
 }
 
-static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, unsigned long extra_remove_base, unsigned long extra_remove_size)
+static int __init
+x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+		       unsigned long extra_remove_base,
+		       unsigned long extra_remove_size)
 {
 	unsigned long i, base, size;
 	mtrr_type type;
@@ -965,7 +982,7 @@ static int __init x86_get_mtrr_mem_range
 		mtrr_if->get(i, &base, &size, &type);
 		if (type != MTRR_TYPE_WRBACK)
 			continue;
-		nr_range = add_range(range, nr_range, base, base + size - 1, 1);
+		nr_range = add_range_with_merge(range, nr_range, base, base + size - 1);
 	}
 	printk(KERN_INFO "After WB checking\n");
 	for (i = 0; i < nr_range; i++)
@@ -1005,11 +1022,11 @@ static int __init x86_get_mtrr_mem_range
 
 static int __init mtrr_cleanup(unsigned address_bits)
 {
+	unsigned long extra_remove_base, extra_remove_size;
 	unsigned long i, base, size, def, dummy;
-	mtrr_type type;
 	struct res_range range[RANGE_NUM];
+	mtrr_type type;
 	int nr_range;
-	unsigned long extra_remove_base, extra_remove_size;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1053,7 +1070,6 @@ static int __init mtrr_cleanup(unsigned 
 	x86_setup_var_mtrrs(range, nr_range, address_bits);
 
 	return 1;
-
 }
 
 static int disable_mtrr_trim;

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v8
  2008-04-29 20:46             ` Randy Dunlap
@ 2008-04-29 21:54               ` Yinghai Lu
  0 siblings, 0 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-29 21:54 UTC (permalink / raw)
  To: Randy Dunlap
  Cc: Yinghai Lu, Andrew Morton, Ingo Molnar, H. Peter Anvin,
	Thomas Gleixner, Gabriel C, Mika Fischer,
	linux-kernel@vger.kernel.org

On Tue, Apr 29, 2008 at 1:46 PM, Randy Dunlap <randy.dunlap@oracle.com> wrote:
> On Tue, 29 Apr 2008 03:52:33 -0700 Yinghai Lu wrote:
>
>  > Index: linux-2.6/Documentation/kernel-parameters.txt
>  > ===================================================================
>  > --- linux-2.6.orig/Documentation/kernel-parameters.txt
>  > +++ linux-2.6/Documentation/kernel-parameters.txt
>  > @@ -595,6 +595,20 @@ and is between 256 and 4096 characters.
>  >                       See drivers/char/README.epca and
>  >                       Documentation/digiepca.txt.
>  >
>  > +     disable_mtrr_cleanup [X86]
>  > +     enable_mtrr_cleanup [X86]
>  > +                     The kernel tries to adjust MTRR layout from continuous
>  > +                     to discrete, to make X server driver able to add WB
>  > +                     entry later. This parameter enables/disables that.
>  > +
>  > +     mtrr_chunk_size=nn[KMG] [X86]
>  > +                     used for mtrr cleanup. It is largest continous chunk
>  > +                     that could hold holes aka. UC entries.
>  > +
>  > +     mtrr_gran_size=nn[KMG] [X86]
>  > +                     used for mtrr cleanup. It is granity of mtrr block.
>
>  s/granity/granularity/
>  I think that's what you mean/want.

thanks.

>
>  How does someone know that size/granity/whatever of an mtrr block?

considering to auto test to find optimal value for mtrr_chunk_size and
mtrr_gran_size...

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5
  2008-04-29 20:29             ` Eric W. Biederman
@ 2008-04-29 21:57               ` Yinghai Lu
  2008-04-29 22:09                 ` Ingo Molnar
  2008-04-29 22:14                 ` Eric W. Biederman
  0 siblings, 2 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-29 21:57 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, linux-kernel@vger.kernel.org, Mika Fischer

On Tue, Apr 29, 2008 at 1:29 PM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> "Yinghai Lu" <yhlu.kernel@gmail.com> writes:
>
>  > (less memory + fast X) or  (more 8M RAM + slow...)
>
>  Yes. That is the basic question.  Not all X drivers need it and
>  potentially the current kernel drm modules can use the
>  PAT infrastructure that has been merged.
>
>  Further a SMM monitor running 100 times or more slower may cause
>  problems if SMM mode is entered frequently, slowing down the entire
>  system not just X.
>
>  So if you don't have X or you have a crazy SMM monitor this can
>  be an issue.

agreed. so that feature is compiled in but disable by default.

BTW: is any chance for OS to disable SMI etc? to verify is the
unstatbility is caused by SMI?

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5
  2008-04-29 21:57               ` Yinghai Lu
@ 2008-04-29 22:09                 ` Ingo Molnar
  2008-04-29 22:18                   ` Yinghai Lu
  2008-04-29 22:14                 ` Eric W. Biederman
  1 sibling, 1 reply; 89+ messages in thread
From: Ingo Molnar @ 2008-04-29 22:09 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Eric W. Biederman, Andrew Morton, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, linux-kernel@vger.kernel.org, Mika Fischer


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> agreed. so that feature is compiled in but disable by default.
> 
> BTW: is any chance for OS to disable SMI etc? to verify is the 
> unstatbility is caused by SMI?

i dont think there's any documented way for that. SMI might be the 
mechanism that ensures blue-smoke type of system reliability (CPU fan, 
temperature, etc.) so it would be extremely dangerous to mess with it.

	Ingo

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5
  2008-04-29 21:57               ` Yinghai Lu
  2008-04-29 22:09                 ` Ingo Molnar
@ 2008-04-29 22:14                 ` Eric W. Biederman
  2008-04-29 22:54                   ` Thomas Gleixner
  1 sibling, 1 reply; 89+ messages in thread
From: Eric W. Biederman @ 2008-04-29 22:14 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, linux-kernel@vger.kernel.org, Mika Fischer

"Yinghai Lu" <yhlu.kernel@gmail.com> writes:

> On Tue, Apr 29, 2008 at 1:29 PM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>> "Yinghai Lu" <yhlu.kernel@gmail.com> writes:
>>
>>  > (less memory + fast X) or  (more 8M RAM + slow...)
>>
>>  Yes. That is the basic question.  Not all X drivers need it and
>>  potentially the current kernel drm modules can use the
>>  PAT infrastructure that has been merged.
>>
>>  Further a SMM monitor running 100 times or more slower may cause
>>  problems if SMM mode is entered frequently, slowing down the entire
>>  system not just X.
>>
>>  So if you don't have X or you have a crazy SMM monitor this can
>>  be an issue.
>
> agreed. so that feature is compiled in but disable by default.
>
> BTW: is any chance for OS to disable SMI etc? to verify is the
> unstatbility is caused by SMI?

Not in general no.  Frequently you can get at the registers that
will enable/disable an SMI but that is chipset specific.

Think of SMM mode is a lightweight hypervisor that we can't get rid
of, if you want to understand the worst case.

In theory SMM mode is completely unnecessary as soon as we enable
ACPI.  In practice ACPI appears to frequently trap into SMM mode.

Eric


^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5
  2008-04-29 22:09                 ` Ingo Molnar
@ 2008-04-29 22:18                   ` Yinghai Lu
  0 siblings, 0 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-29 22:18 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Eric W. Biederman, Andrew Morton, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, linux-kernel@vger.kernel.org, Mika Fischer

On Tue, Apr 29, 2008 at 3:09 PM, Ingo Molnar <mingo@elte.hu> wrote:
>
>  * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
>  > agreed. so that feature is compiled in but disable by default.
>  >
>  > BTW: is any chance for OS to disable SMI etc? to verify is the
>  > unstatbility is caused by SMI?
>
>  i dont think there's any documented way for that. SMI might be the
>  mechanism that ensures blue-smoke type of system reliability (CPU fan,
>  temperature, etc.) so it would be extremely dangerous to mess with it.

then that is bad and sick HW design.

for example. cpu fan is supposed to full speed, if SW send insane
instruction and lose connection.
also, CPU should shutdown by thermal strip is FAN is stopped.

when we were working on LinuxBIOS, found one MB cpu fan need to be
started by BIOS, and another one is auto full speed if BIOS don't
touch it. We always like the second design.

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5
  2008-04-29 22:14                 ` Eric W. Biederman
@ 2008-04-29 22:54                   ` Thomas Gleixner
  2008-04-30  1:16                     ` Eric W. Biederman
  0 siblings, 1 reply; 89+ messages in thread
From: Thomas Gleixner @ 2008-04-29 22:54 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Yinghai Lu, Andrew Morton, Ingo Molnar, H. Peter Anvin, Gabriel C,
	linux-kernel@vger.kernel.org, Mika Fischer

On Tue, 29 Apr 2008, Eric W. Biederman wrote:
> Think of SMM mode is a lightweight hypervisor that we can't get rid
> of, if you want to understand the worst case.
> 
> In theory SMM mode is completely unnecessary as soon as we enable
> ACPI.  In practice ACPI appears to frequently trap into SMM mode.

SMM does more than that. It emulates legacy hardware and fixes
chip(set) bugs as well. Disabling it just makes your box stop
working. There are certain types of systems where essential safety
nets rely on SMIs (you can deep-fry P4s by disabling SMIs).

Thanks,
	tglx

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-29 21:49                       ` Yinghai Lu
@ 2008-04-29 23:56                         ` Gabriel C
  2008-04-30  0:06                           ` Gabriel C
  2008-04-30  0:13                           ` Yinghai Lu
  0 siblings, 2 replies; 89+ messages in thread
From: Gabriel C @ 2008-04-29 23:56 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

Yinghai Lu wrote:
> On Tue, Apr 29, 2008 at 1:25 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>> Yinghai Lu wrote:
>>  > On Tue, Apr 29, 2008 at 4:51 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>>  >> Yinghai Lu wrote:
>>  >>  > On Tue, Apr 29, 2008 at 3:30 AM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>>  >>  >> On Tue, Apr 29, 2008 at 2:47 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>>  >>  >>  > Yinghai Lu wrote:
>>  >>  >>  >  > some BIOS like to use continus MTRR layout, and may X driver can not add
>>  >>  >>  >  > WB entries for graphical cards when 4g or more RAM installed.
>>  >>  >>  >  >
>>  >>  >>  >  > the patch will change MTRR to discrete.
>>  >>  >>  >  >
>>  >>  >>  >  > mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
>>  >>  >>  >  > default is 256m, could be set according to size of graphics card memory.
>>  >>  >>  >  >
>>  >>  >>  >  > v2: fix -1 for UC checking
>>  >>  >>  >  > v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>>  >>  >>  >  >     skip the var state change warning.
>>  >>  >>  >  >     remove next_basek in range_to_mtrr()
>>  >>  >>  >  > v4: correct warning mask.
>>  >>  >>  >  > v5: CONFIG_MTRR_SANITIZER
>>  >>  >>  >  > v6: fix 1g, 2g, 512 aligment with extra hole
>>  >>  >>  >  > v7: gran_sizek to prevent running out of MTRRs.
>>  >>  >>  >  >
>>  >>  >>  >
>>  >>  >>  >  With this version ( and patch http://lkml.org/lkml/2008/4/29/97 ) applyed on latest linus git tree
>>  >>  >>  >  the box OOPS'es early.
>>  >>  >>  >
>>  >>  >>  >  Sorry I don't have time right now to write down the part of the OOPS I can see on monitor , I can try to find
>>  >>  >>  >  some time later.
>>  >>  >>  >
>>  >>  >>  >  In any way OOPS'es on __free_one_page+0x191/0x21e
>>  >>  >>
>>  >>  >>  thanks. found one problem with hole_basek ...
>>  >>  >>
>>  >>  >>  will send you v8, and hope it will be last version.
>>  >>  >
>>  >>  > please try v8, it should get rid of the 8m entry. it need patch
>>  >>  > http://lkml.org/lkml/2008/4/29/97 too.
>>  >>
>>  >>  Box does boot with v8 but now I get that warning you fixed in v2 again =):
>>  >>
>>  >>  ....
>>  >>  [    0.000000] Linux version 2.6.25-06058-ga01e035-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #805 SMP PREEMPT Tue Apr 29 13:04:49 CEST 2008
>>  >>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
>>  >>  [    0.000000] BIOS-provided physical RAM map:
>>  >>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>>  >>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>>  >>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>>  >>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>>  >>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  >>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  >>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  >>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>>  >>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>>  >>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>>  >>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>>  >>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>>  >>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>>  >>  [    0.000000] max_pfn_mapped = 1228800
>>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>>  >>  [    0.000000] After WB checking
>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>>  >>  [    0.000000] After UC checking
>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  >>  [    0.000000] After sorting
>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  >>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>>  >>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>>  >>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>>  >>  [    0.000000] range: 00000000c0000000 - 00000000d0000000
>>  >>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 256MB, type WB
>>  >>  [    0.000000] hole: 00000000cc000000 - 00000000d0000000
>>  >>  [    0.000000] Setting variable MTRR 3, base: 3264MB, range: 64MB, type UC
>>  >>  [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>>  >>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>>  >>  [    0.000000] Setting variable MTRR 4, base: 4096MB, range: 512MB, type WB
>>  >>  [    0.000000] range: 0000000120000000 - 0000000130000000
>>  >>  [    0.000000] Setting variable MTRR 5, base: 4608MB, range: 256MB, type WB
>>  >>  [    0.000000] hole: 000000012c000000 - 0000000130000000
>>  >>  [    0.000000] Setting variable MTRR 6, base: 4800MB, range: 64MB, type UC
>>  >>  [    0.000000] DONE variable MTRRs
>>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>>  >>  [    0.000000] After WB checking
>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000d0000
>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000130000
>>  >>  [    0.000000] After UC checking
>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  >>  [    0.000000] After sorting
>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  >>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>>  >
>>  > so
>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  > ===>
>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  >
>>  > please try
>>
>>  Now I have 2 warnings
>>
>>
>>  > mtrr_chunk_size=512m mtrr_gran_size=128m
>>
>>  ...
>>
>>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_chunk_size=512m mtrr_gran_size=128m 3
>>
>>
>> [    0.000000] BIOS-provided physical RAM map:
>>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>>  [    0.000000] max_pfn_mapped = 1228800
>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>>  [    0.000000] After WB checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>>  [    0.000000] After UC checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  [    0.000000] After sorting
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>>  [    0.000000] range: 00000000c0000000 - 00000000c8000000
>>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>>
>> [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>>  [    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
>>  [    0.000000] range: 0000000120000000 - 0000000128000000
>>  [    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
>>
>> [    0.000000] DONE variable MTRRs
>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>>  [    0.000000] After WB checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>>
>> [    0.000000] After UC checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>>  [    0.000000] After sorting
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 64MB of RAM.
>>  [    0.000000] ------------[ cut here ]------------
>>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1206 mtrr_trim_uncached_memory+0x25b/0x35a()
>>
>> [    0.000000] Modules linked in:
>>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
>>  [    0.000000]
>>  [    0.000000] Call Trace:
>>  [    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
>>  [    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
>>  [    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
>>  [    0.000000]  [<ffffffff8067fa51>] mtrr_trim_uncached_memory+0x25b/0x35a
>>
>> [    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
>>  [    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
>>  [    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
>>  [    0.000000]
>>  [    0.000000] ---[ end trace ca143223eefdc828 ]---
>>  [    0.000000] update e820 for mtrr -- end_pfn
>>  [    0.000000] modified physical RAM map:
>>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>>  [    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
>>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>>  [    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
>>  [    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
>>  [    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
>>
>> [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>>
>> [    0.000000] ------------[ cut here ]------------
>>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x30c/0x35a()
>>  [    0.000000] Modules linked in:
>>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
>>  [    0.000000]
>>  [    0.000000] Call Trace:
>>  [    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
>>  [    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
>>  [    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
>>  [    0.000000]  [<ffffffff8067fb02>] mtrr_trim_uncached_memory+0x30c/0x35a
>>  [    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
>>  [    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
>>  [    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
>>  [    0.000000]
>>  [    0.000000] ---[ end trace ca143223eefdc828 ]---
>>  [    0.000000] update e820 for mtrr -- holes
>>  [    0.000000] modified physical RAM map:
>>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>>  [    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
>>  [    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
>>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>>  [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>>
>> [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>>  [    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
> 
> please try attached trim_holes_fix.patch..., it will fix the trim hole problem.

Tested but warning is still there. I try to boot with mtrr_chunk_size / mtrr_gran_size and see what I get. 

...

[    0.000000] Linux version 2.6.25-06589-gc65a350-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #808 SMP PREEMPT Wed Apr 30 01:37:38 CEST 2008
[    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
[    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
[    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
[    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
[    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800
[    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] range0: 0000000000000000 - 00000000c0000000
[    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
[    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
[    0.000000] range: 00000000c0000000 - 00000000d0000000
[    0.000000] Setting variable MTRR 2, base: 3072MB, range: 256MB, type WB
[    0.000000] hole: 00000000cc000000 - 00000000d0000000
[    0.000000] Setting variable MTRR 3, base: 3264MB, range: 64MB, type UC
[    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
[    0.000000] range0: 0000000100000000 - 0000000120000000
[    0.000000] Setting variable MTRR 4, base: 4096MB, range: 512MB, type WB
[    0.000000] range: 0000000120000000 - 0000000130000000
[    0.000000] Setting variable MTRR 5, base: 4608MB, range: 256MB, type WB
[    0.000000] hole: 000000012c000000 - 0000000130000000
[    0.000000] Setting variable MTRR 6, base: 4800MB, range: 64MB, type UC
[    0.000000] DONE variable MTRRs
[    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000d0000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000130000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
[    0.000000] ------------[ cut here ]------------
[    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x313/0x361()
[    0.000000] Modules linked in:
[    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06589-gc65a350-dirty #808
[    0.000000]
[    0.000000] Call Trace:
[    0.000000]  [<ffffffff8022d699>] warn_on_slowpath+0x51/0x8c
[    0.000000]  [<ffffffff806810b4>] x86_get_mtrr_mem_range+0x241/0x25c
[    0.000000]  [<ffffffff8067f915>] update_memory_range+0x95/0xb7
[    0.000000]  [<ffffffff80681ae1>] mtrr_trim_uncached_memory+0x313/0x361
[    0.000000]  [<ffffffff8067ea16>] setup_arch+0x280/0x4a6
[    0.000000]  [<ffffffff80678a3d>] start_kernel+0x6e/0x30d
[    0.000000]  [<ffffffff80678432>] x86_64_start_kernel+0x241/0x24a
[    0.000000]
[    0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---
[    0.000000] update e820 for mtrr -- holes
[    0.000000] modified physical RAM map:
[    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  modified: 0000000000100000 - 00000000cc000000 (usable)
[    0.000000]  modified: 00000000cc000000 - 00000000cf550000 (reserved)
[    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
[    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 835584) 3 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800
[    0.000000] init_memory_mapping

...

> 
> then check if X server works well.

X seems to work fine even with that warning ..

> 
> then try mtrr_cleanup_fix.patch for... ==> address ingo request about style etc.

I will try this one after some reboots.

> 
> Thanks
> 
> Yinghai Lu
> 


Gabriel

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-29 23:56                         ` Gabriel C
@ 2008-04-30  0:06                           ` Gabriel C
  2008-04-30  0:38                             ` Yinghai Lu
  2008-04-30  0:13                           ` Yinghai Lu
  1 sibling, 1 reply; 89+ messages in thread
From: Gabriel C @ 2008-04-30  0:06 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

Gabriel C wrote:
> Yinghai Lu wrote:
>> On Tue, Apr 29, 2008 at 1:25 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>>> Yinghai Lu wrote:
>>>  > On Tue, Apr 29, 2008 at 4:51 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>>>  >> Yinghai Lu wrote:
>>>  >>  > On Tue, Apr 29, 2008 at 3:30 AM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>>>  >>  >> On Tue, Apr 29, 2008 at 2:47 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>>>  >>  >>  > Yinghai Lu wrote:
>>>  >>  >>  >  > some BIOS like to use continus MTRR layout, and may X driver can not add
>>>  >>  >>  >  > WB entries for graphical cards when 4g or more RAM installed.
>>>  >>  >>  >  >
>>>  >>  >>  >  > the patch will change MTRR to discrete.
>>>  >>  >>  >  >
>>>  >>  >>  >  > mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
>>>  >>  >>  >  > default is 256m, could be set according to size of graphics card memory.
>>>  >>  >>  >  >
>>>  >>  >>  >  > v2: fix -1 for UC checking
>>>  >>  >>  >  > v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>>>  >>  >>  >  >     skip the var state change warning.
>>>  >>  >>  >  >     remove next_basek in range_to_mtrr()
>>>  >>  >>  >  > v4: correct warning mask.
>>>  >>  >>  >  > v5: CONFIG_MTRR_SANITIZER
>>>  >>  >>  >  > v6: fix 1g, 2g, 512 aligment with extra hole
>>>  >>  >>  >  > v7: gran_sizek to prevent running out of MTRRs.
>>>  >>  >>  >  >
>>>  >>  >>  >
>>>  >>  >>  >  With this version ( and patch http://lkml.org/lkml/2008/4/29/97 ) applyed on latest linus git tree
>>>  >>  >>  >  the box OOPS'es early.
>>>  >>  >>  >
>>>  >>  >>  >  Sorry I don't have time right now to write down the part of the OOPS I can see on monitor , I can try to find
>>>  >>  >>  >  some time later.
>>>  >>  >>  >
>>>  >>  >>  >  In any way OOPS'es on __free_one_page+0x191/0x21e
>>>  >>  >>
>>>  >>  >>  thanks. found one problem with hole_basek ...
>>>  >>  >>
>>>  >>  >>  will send you v8, and hope it will be last version.
>>>  >>  >
>>>  >>  > please try v8, it should get rid of the 8m entry. it need patch
>>>  >>  > http://lkml.org/lkml/2008/4/29/97 too.
>>>  >>
>>>  >>  Box does boot with v8 but now I get that warning you fixed in v2 again =):
>>>  >>
>>>  >>  ....
>>>  >>  [    0.000000] Linux version 2.6.25-06058-ga01e035-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #805 SMP PREEMPT Tue Apr 29 13:04:49 CEST 2008
>>>  >>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
>>>  >>  [    0.000000] BIOS-provided physical RAM map:
>>>  >>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>>>  >>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>>>  >>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>>>  >>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>>>  >>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>>  >>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>>  >>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>>  >>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>>>  >>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>>>  >>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>>>  >>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>>>  >>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>>>  >>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>>>  >>  [    0.000000] max_pfn_mapped = 1228800
>>>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>>>  >>  [    0.000000] After WB checking
>>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>>>  >>  [    0.000000] After UC checking
>>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>>  >>  [    0.000000] After sorting
>>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>>  >>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>>>  >>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>>>  >>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>>>  >>  [    0.000000] range: 00000000c0000000 - 00000000d0000000
>>>  >>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 256MB, type WB
>>>  >>  [    0.000000] hole: 00000000cc000000 - 00000000d0000000
>>>  >>  [    0.000000] Setting variable MTRR 3, base: 3264MB, range: 64MB, type UC
>>>  >>  [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>>>  >>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>>>  >>  [    0.000000] Setting variable MTRR 4, base: 4096MB, range: 512MB, type WB
>>>  >>  [    0.000000] range: 0000000120000000 - 0000000130000000
>>>  >>  [    0.000000] Setting variable MTRR 5, base: 4608MB, range: 256MB, type WB
>>>  >>  [    0.000000] hole: 000000012c000000 - 0000000130000000
>>>  >>  [    0.000000] Setting variable MTRR 6, base: 4800MB, range: 64MB, type UC
>>>  >>  [    0.000000] DONE variable MTRRs
>>>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>>>  >>  [    0.000000] After WB checking
>>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000d0000
>>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000130000
>>>  >>  [    0.000000] After UC checking
>>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>>  >>  [    0.000000] After sorting
>>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>>  >>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>>>  >
>>>  > so
>>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>>  > ===>
>>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>>  >
>>>  > please try
>>>
>>>  Now I have 2 warnings
>>>
>>>
>>>  > mtrr_chunk_size=512m mtrr_gran_size=128m
>>>
>>>  ...
>>>
>>>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_chunk_size=512m mtrr_gran_size=128m 3
>>>
>>>
>>> [    0.000000] BIOS-provided physical RAM map:
>>>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>>>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>>>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>>>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>>>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>>>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>>>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>>>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>>>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>>>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>>>  [    0.000000] max_pfn_mapped = 1228800
>>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>>>  [    0.000000] After WB checking
>>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>>>  [    0.000000] After UC checking
>>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>>  [    0.000000] After sorting
>>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>>>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>>>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>>>  [    0.000000] range: 00000000c0000000 - 00000000c8000000
>>>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>>>
>>> [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>>>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>>>  [    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
>>>  [    0.000000] range: 0000000120000000 - 0000000128000000
>>>  [    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
>>>
>>> [    0.000000] DONE variable MTRRs
>>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>>>  [    0.000000] After WB checking
>>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>>>
>>> [    0.000000] After UC checking
>>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>>>  [    0.000000] After sorting
>>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>>>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 64MB of RAM.
>>>  [    0.000000] ------------[ cut here ]------------
>>>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1206 mtrr_trim_uncached_memory+0x25b/0x35a()
>>>
>>> [    0.000000] Modules linked in:
>>>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
>>>  [    0.000000]
>>>  [    0.000000] Call Trace:
>>>  [    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
>>>  [    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
>>>  [    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
>>>  [    0.000000]  [<ffffffff8067fa51>] mtrr_trim_uncached_memory+0x25b/0x35a
>>>
>>> [    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
>>>  [    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
>>>  [    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
>>>  [    0.000000]
>>>  [    0.000000] ---[ end trace ca143223eefdc828 ]---
>>>  [    0.000000] update e820 for mtrr -- end_pfn
>>>  [    0.000000] modified physical RAM map:
>>>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>>>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>>>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>>>  [    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
>>>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>>>  [    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
>>>  [    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
>>>  [    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
>>>
>>> [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>>>
>>> [    0.000000] ------------[ cut here ]------------
>>>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x30c/0x35a()
>>>  [    0.000000] Modules linked in:
>>>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
>>>  [    0.000000]
>>>  [    0.000000] Call Trace:
>>>  [    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
>>>  [    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
>>>  [    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
>>>  [    0.000000]  [<ffffffff8067fb02>] mtrr_trim_uncached_memory+0x30c/0x35a
>>>  [    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
>>>  [    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
>>>  [    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
>>>  [    0.000000]
>>>  [    0.000000] ---[ end trace ca143223eefdc828 ]---
>>>  [    0.000000] update e820 for mtrr -- holes
>>>  [    0.000000] modified physical RAM map:
>>>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>>>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>>>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>>>  [    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
>>>  [    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
>>>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>>>  [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>>>
>>> [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>>>  [    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
>> please try attached trim_holes_fix.patch..., it will fix the trim hole problem.
> 
> Tested but warning is still there. I try to boot with mtrr_chunk_size / mtrr_gran_size and see what I get. 
> 
> ...
> 
> [    0.000000] Linux version 2.6.25-06589-gc65a350-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #808 SMP PREEMPT Wed Apr 30 01:37:38 CEST 2008
> [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
> [    0.000000] BIOS-provided physical RAM map:
> [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
> [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
> [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
> [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
> [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
> [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
> [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
> [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
> [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
> [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
> [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
> [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
> [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
> [    0.000000] max_pfn_mapped = 1228800
> [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
> [    0.000000] After WB checking
> [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
> [    0.000000] After UC checking
> [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
> [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
> [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
> [    0.000000] After sorting
> [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
> [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
> [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
> [    0.000000] range0: 0000000000000000 - 00000000c0000000
> [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
> [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
> [    0.000000] range: 00000000c0000000 - 00000000d0000000
> [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 256MB, type WB
> [    0.000000] hole: 00000000cc000000 - 00000000d0000000
> [    0.000000] Setting variable MTRR 3, base: 3264MB, range: 64MB, type UC
> [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
> [    0.000000] range0: 0000000100000000 - 0000000120000000
> [    0.000000] Setting variable MTRR 4, base: 4096MB, range: 512MB, type WB
> [    0.000000] range: 0000000120000000 - 0000000130000000
> [    0.000000] Setting variable MTRR 5, base: 4608MB, range: 256MB, type WB
> [    0.000000] hole: 000000012c000000 - 0000000130000000
> [    0.000000] Setting variable MTRR 6, base: 4800MB, range: 64MB, type UC
> [    0.000000] DONE variable MTRRs
> [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
> [    0.000000] After WB checking
> [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000d0000
> [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000130000
> [    0.000000] After UC checking
> [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
> [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
> [    0.000000] After sorting
> [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
> [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
> [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
> [    0.000000] ------------[ cut here ]------------
> [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x313/0x361()
> [    0.000000] Modules linked in:
> [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06589-gc65a350-dirty #808
> [    0.000000]
> [    0.000000] Call Trace:
> [    0.000000]  [<ffffffff8022d699>] warn_on_slowpath+0x51/0x8c
> [    0.000000]  [<ffffffff806810b4>] x86_get_mtrr_mem_range+0x241/0x25c
> [    0.000000]  [<ffffffff8067f915>] update_memory_range+0x95/0xb7
> [    0.000000]  [<ffffffff80681ae1>] mtrr_trim_uncached_memory+0x313/0x361
> [    0.000000]  [<ffffffff8067ea16>] setup_arch+0x280/0x4a6
> [    0.000000]  [<ffffffff80678a3d>] start_kernel+0x6e/0x30d
> [    0.000000]  [<ffffffff80678432>] x86_64_start_kernel+0x241/0x24a
> [    0.000000]
> [    0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---
> [    0.000000] update e820 for mtrr -- holes
> [    0.000000] modified physical RAM map:
> [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
> [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
> [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
> [    0.000000]  modified: 0000000000100000 - 00000000cc000000 (usable)
> [    0.000000]  modified: 00000000cc000000 - 00000000cf550000 (reserved)
> [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
> [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
> [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
> [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
> [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
> [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
> [    0.000000] Entering add_active_range(0, 256, 835584) 3 entries of 256 used
> [    0.000000] max_pfn_mapped = 1228800
> [    0.000000] init_memory_mapping
> 
> ...
> 

with mtrr_chunk_size=256m mtrr_gran_size=128m I'm getting :

...

[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
[    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
[    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
[    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
[    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800
[    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] range0: 0000000000000000 - 00000000c0000000
[    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
[    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
[    0.000000] range: 00000000c0000000 - 00000000c8000000
[    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
[    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
[    0.000000] range0: 0000000100000000 - 0000000120000000
[    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
[    0.000000] range: 0000000120000000 - 0000000128000000
[    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
[    0.000000] DONE variable MTRRs
[    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
[    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 64MB of RAM.
[    0.000000] ------------[ cut here ]------------
[    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1206 mtrr_trim_uncached_memory+0x25b/0x361()
[    0.000000] Modules linked in:
[    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06589-gc65a350-dirty #808
[    0.000000]
[    0.000000] Call Trace:
[    0.000000]  [<ffffffff8022d699>] warn_on_slowpath+0x51/0x8c
[    0.000000]  [<ffffffff806810b4>] x86_get_mtrr_mem_range+0x241/0x25c
[    0.000000]  [<ffffffff8067f915>] update_memory_range+0x95/0xb7
[    0.000000]  [<ffffffff80681a29>] mtrr_trim_uncached_memory+0x25b/0x361
[    0.000000]  [<ffffffff8067ea16>] setup_arch+0x280/0x4a6
[    0.000000]  [<ffffffff80678a3d>] start_kernel+0x6e/0x30d
[    0.000000]  [<ffffffff80678432>] x86_64_start_kernel+0x241/0x24a
[    0.000000]
[    0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---
[    0.000000] update e820 for mtrr -- end_pfn
[    0.000000] modified physical RAM map:
[    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
[    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
[    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
[    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
[    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
[    0.000000] ------------[ cut here ]------------
[    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x313/0x361()
[    0.000000] Modules linked in:
[    0.000000] Pid: 0, comm: swapper Tainted: G        W 2.6.25-06589-gc65a350-dirty #808
[    0.000000]
[    0.000000] Call Trace:
[    0.000000]  [<ffffffff8022d699>] warn_on_slowpath+0x51/0x8c
[    0.000000]  [<ffffffff806810b4>] x86_get_mtrr_mem_range+0x241/0x25c
[    0.000000]  [<ffffffff8067f915>] update_memory_range+0x95/0xb7
[    0.000000]  [<ffffffff80681ae1>] mtrr_trim_uncached_memory+0x313/0x361
[    0.000000]  [<ffffffff8067ea16>] setup_arch+0x280/0x4a6
[    0.000000]  [<ffffffff80678a3d>] start_kernel+0x6e/0x30d
[    0.000000]  [<ffffffff80678432>] x86_64_start_kernel+0x241/0x24a
[    0.000000]
[    0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---
[    0.000000] update e820 for mtrr -- holes
[    0.000000] modified physical RAM map:
[    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
[    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
[    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
[    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800
[    0.000000] init_memory_mapping                                                                                                                          

...

Gabriel

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-29 23:56                         ` Gabriel C
  2008-04-30  0:06                           ` Gabriel C
@ 2008-04-30  0:13                           ` Yinghai Lu
  1 sibling, 0 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-30  0:13 UTC (permalink / raw)
  To: Gabriel C
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

On Tue, Apr 29, 2008 at 4:56 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>
> Yinghai Lu wrote:
>  > On Tue, Apr 29, 2008 at 1:25 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >> Yinghai Lu wrote:
>  >>  > On Tue, Apr 29, 2008 at 4:51 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >>  >> Yinghai Lu wrote:
>  >>  >>  > On Tue, Apr 29, 2008 at 3:30 AM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>  >>  >>  >> On Tue, Apr 29, 2008 at 2:47 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >>  >>  >>  > Yinghai Lu wrote:
>  >>  >>  >>  >  > some BIOS like to use continus MTRR layout, and may X driver can not add
>  >>  >>  >>  >  > WB entries for graphical cards when 4g or more RAM installed.
>  >>  >>  >>  >  >
>  >>  >>  >>  >  > the patch will change MTRR to discrete.
>  >>  >>  >>  >  >
>  >>  >>  >>  >  > mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
>  >>  >>  >>  >  > default is 256m, could be set according to size of graphics card memory.
>  >>  >>  >>  >  >
>  >>  >>  >>  >  > v2: fix -1 for UC checking
>  >>  >>  >>  >  > v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>  >>  >>  >>  >  >     skip the var state change warning.
>  >>  >>  >>  >  >     remove next_basek in range_to_mtrr()
>  >>  >>  >>  >  > v4: correct warning mask.
>  >>  >>  >>  >  > v5: CONFIG_MTRR_SANITIZER
>  >>  >>  >>  >  > v6: fix 1g, 2g, 512 aligment with extra hole
>  >>  >>  >>  >  > v7: gran_sizek to prevent running out of MTRRs.
>  >>  >>  >>  >  >
>  >>  >>  >>  >
>  >>  >>  >>  >  With this version ( and patch http://lkml.org/lkml/2008/4/29/97 ) applyed on latest linus git tree
>  >>  >>  >>  >  the box OOPS'es early.
>  >>  >>  >>  >
>  >>  >>  >>  >  Sorry I don't have time right now to write down the part of the OOPS I can see on monitor , I can try to find
>  >>  >>  >>  >  some time later.
>  >>  >>  >>  >
>  >>  >>  >>  >  In any way OOPS'es on __free_one_page+0x191/0x21e
>  >>  >>  >>
>  >>  >>  >>  thanks. found one problem with hole_basek ...
>  >>  >>  >>
>  >>  >>  >>  will send you v8, and hope it will be last version.
>  >>  >>  >
>  >>  >>  > please try v8, it should get rid of the 8m entry. it need patch
>  >>  >>  > http://lkml.org/lkml/2008/4/29/97 too.
>  >>  >>
>  >>  >>  Box does boot with v8 but now I get that warning you fixed in v2 again =):
>  >>  >>
>  >>  >>  ....
>  >>  >>  [    0.000000] Linux version 2.6.25-06058-ga01e035-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #805 SMP PREEMPT Tue Apr 29 13:04:49 CEST 2008
>  >>  >>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
>  >>  >>  [    0.000000] BIOS-provided physical RAM map:
>  >>  >>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  >>  >>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  >>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  >>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  >>  >>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  >>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  >>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  >>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>  >>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  >>  >>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  >>  >>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  >>  >>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  >>  >>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  >>  >>  [    0.000000] max_pfn_mapped = 1228800
>  >>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  >>  >>  [    0.000000] After WB checking
>  >>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  >>  >>  [    0.000000] After UC checking
>  >>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  >>  [    0.000000] After sorting
>  >>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  >>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  >>  >>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  >>  >>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  >>  >>  [    0.000000] range: 00000000c0000000 - 00000000d0000000
>  >>  >>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 256MB, type WB
>  >>  >>  [    0.000000] hole: 00000000cc000000 - 00000000d0000000
>  >>  >>  [    0.000000] Setting variable MTRR 3, base: 3264MB, range: 64MB, type UC
>  >>  >>  [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>  >>  >>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>  >>  >>  [    0.000000] Setting variable MTRR 4, base: 4096MB, range: 512MB, type WB
>  >>  >>  [    0.000000] range: 0000000120000000 - 0000000130000000
>  >>  >>  [    0.000000] Setting variable MTRR 5, base: 4608MB, range: 256MB, type WB
>  >>  >>  [    0.000000] hole: 000000012c000000 - 0000000130000000
>  >>  >>  [    0.000000] Setting variable MTRR 6, base: 4800MB, range: 64MB, type UC
>  >>  >>  [    0.000000] DONE variable MTRRs
>  >>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  >>  >>  [    0.000000] After WB checking
>  >>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000d0000
>  >>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000130000
>  >>  >>  [    0.000000] After UC checking
>  >>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  >>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  >>  [    0.000000] After sorting
>  >>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  >>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  >>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>  >>  >
>  >>  > so
>  >>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  > ===>
>  >>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  >>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  >
>  >>  > please try
>  >>
>  >>  Now I have 2 warnings
>  >>
>  >>
>  >>  > mtrr_chunk_size=512m mtrr_gran_size=128m
>  >>
>  >>  ...
>  >>
>  >>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_chunk_size=512m mtrr_gran_size=128m 3
>  >>
>  >>
>  >> [    0.000000] BIOS-provided physical RAM map:
>  >>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  >>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  >>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  >>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  >>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  >>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  >>  [    0.000000] max_pfn_mapped = 1228800
>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  >>  [    0.000000] After WB checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  >>  [    0.000000] After UC checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  [    0.000000] After sorting
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  >>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  >>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  >>  [    0.000000] range: 00000000c0000000 - 00000000c8000000
>  >>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>  >>
>  >> [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>  >>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>  >>  [    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
>  >>  [    0.000000] range: 0000000120000000 - 0000000128000000
>  >>  [    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
>  >>
>  >> [    0.000000] DONE variable MTRRs
>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  >>  [    0.000000] After WB checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  >>
>  >> [    0.000000] After UC checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  >>  [    0.000000] After sorting
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  >>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 64MB of RAM.
>  >>  [    0.000000] ------------[ cut here ]------------
>  >>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1206 mtrr_trim_uncached_memory+0x25b/0x35a()
>  >>
>  >> [    0.000000] Modules linked in:
>  >>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
>  >>  [    0.000000]
>  >>  [    0.000000] Call Trace:
>  >>  [    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
>  >>  [    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
>  >>  [    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
>  >>  [    0.000000]  [<ffffffff8067fa51>] mtrr_trim_uncached_memory+0x25b/0x35a
>  >>
>  >> [    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
>  >>  [    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
>  >>  [    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
>  >>  [    0.000000]
>  >>  [    0.000000] ---[ end trace ca143223eefdc828 ]---
>  >>  [    0.000000] update e820 for mtrr -- end_pfn
>  >>  [    0.000000] modified physical RAM map:
>  >>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  >>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  [    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
>  >>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>  [    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
>  >>  [    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
>  >>  [    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
>  >>
>  >> [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>  >>
>  >> [    0.000000] ------------[ cut here ]------------
>  >>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x30c/0x35a()
>  >>  [    0.000000] Modules linked in:
>  >>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
>  >>  [    0.000000]
>  >>  [    0.000000] Call Trace:
>  >>  [    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
>  >>  [    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
>  >>  [    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
>  >>  [    0.000000]  [<ffffffff8067fb02>] mtrr_trim_uncached_memory+0x30c/0x35a
>  >>  [    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
>  >>  [    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
>  >>  [    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
>  >>  [    0.000000]
>  >>  [    0.000000] ---[ end trace ca143223eefdc828 ]---
>  >>  [    0.000000] update e820 for mtrr -- holes
>  >>  [    0.000000] modified physical RAM map:
>  >>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  >>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  [    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
>  >>  [    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
>  >>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>  [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>  >>
>  >> [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>  >>  [    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
>  >
>  > please try attached trim_holes_fix.patch..., it will fix the trim hole problem.
>
>  Tested but warning is still there. I try to boot with mtrr_chunk_size / mtrr_gran_size and see what I get.

warning is OK, we can remove that if we are using.

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-30  0:06                           ` Gabriel C
@ 2008-04-30  0:38                             ` Yinghai Lu
  2008-04-30  1:02                               ` Gabriel C
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-04-30  0:38 UTC (permalink / raw)
  To: Gabriel C
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

[-- Attachment #1: Type: text/plain, Size: 28208 bytes --]

On Tue, Apr 29, 2008 at 5:06 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>
> Gabriel C wrote:
>  > Yinghai Lu wrote:
>  >> On Tue, Apr 29, 2008 at 1:25 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >>> Yinghai Lu wrote:
>  >>>  > On Tue, Apr 29, 2008 at 4:51 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >>>  >> Yinghai Lu wrote:
>  >>>  >>  > On Tue, Apr 29, 2008 at 3:30 AM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>  >>>  >>  >> On Tue, Apr 29, 2008 at 2:47 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >>>  >>  >>  > Yinghai Lu wrote:
>  >>>  >>  >>  >  > some BIOS like to use continus MTRR layout, and may X driver can not add
>  >>>  >>  >>  >  > WB entries for graphical cards when 4g or more RAM installed.
>  >>>  >>  >>  >  >
>  >>>  >>  >>  >  > the patch will change MTRR to discrete.
>  >>>  >>  >>  >  >
>  >>>  >>  >>  >  > mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
>  >>>  >>  >>  >  > default is 256m, could be set according to size of graphics card memory.
>  >>>  >>  >>  >  >
>  >>>  >>  >>  >  > v2: fix -1 for UC checking
>  >>>  >>  >>  >  > v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>  >>>  >>  >>  >  >     skip the var state change warning.
>  >>>  >>  >>  >  >     remove next_basek in range_to_mtrr()
>  >>>  >>  >>  >  > v4: correct warning mask.
>  >>>  >>  >>  >  > v5: CONFIG_MTRR_SANITIZER
>  >>>  >>  >>  >  > v6: fix 1g, 2g, 512 aligment with extra hole
>  >>>  >>  >>  >  > v7: gran_sizek to prevent running out of MTRRs.
>  >>>  >>  >>  >  >
>  >>>  >>  >>  >
>  >>>  >>  >>  >  With this version ( and patch http://lkml.org/lkml/2008/4/29/97 ) applyed on latest linus git tree
>  >>>  >>  >>  >  the box OOPS'es early.
>  >>>  >>  >>  >
>  >>>  >>  >>  >  Sorry I don't have time right now to write down the part of the OOPS I can see on monitor , I can try to find
>  >>>  >>  >>  >  some time later.
>  >>>  >>  >>  >
>  >>>  >>  >>  >  In any way OOPS'es on __free_one_page+0x191/0x21e
>  >>>  >>  >>
>  >>>  >>  >>  thanks. found one problem with hole_basek ...
>  >>>  >>  >>
>  >>>  >>  >>  will send you v8, and hope it will be last version.
>  >>>  >>  >
>  >>>  >>  > please try v8, it should get rid of the 8m entry. it need patch
>  >>>  >>  > http://lkml.org/lkml/2008/4/29/97 too.
>  >>>  >>
>  >>>  >>  Box does boot with v8 but now I get that warning you fixed in v2 again =):
>  >>>  >>
>  >>>  >>  ....
>  >>>  >>  [    0.000000] Linux version 2.6.25-06058-ga01e035-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #805 SMP PREEMPT Tue Apr 29 13:04:49 CEST 2008
>  >>>  >>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
>  >>>  >>  [    0.000000] BIOS-provided physical RAM map:
>  >>>  >>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  >>>  >>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>>  >>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  >>>  >>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  >>>  >>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>>  >>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>>  >>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>>  >>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>>  >>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  >>>  >>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  >>>  >>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  >>>  >>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  >>>  >>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  >>>  >>  [    0.000000] max_pfn_mapped = 1228800
>  >>>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  >>>  >>  [    0.000000] After WB checking
>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  >>>  >>  [    0.000000] After UC checking
>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>>  >>  [    0.000000] After sorting
>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>>  >>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  >>>  >>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  >>>  >>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  >>>  >>  [    0.000000] range: 00000000c0000000 - 00000000d0000000
>  >>>  >>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 256MB, type WB
>  >>>  >>  [    0.000000] hole: 00000000cc000000 - 00000000d0000000
>  >>>  >>  [    0.000000] Setting variable MTRR 3, base: 3264MB, range: 64MB, type UC
>  >>>  >>  [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>  >>>  >>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>  >>>  >>  [    0.000000] Setting variable MTRR 4, base: 4096MB, range: 512MB, type WB
>  >>>  >>  [    0.000000] range: 0000000120000000 - 0000000130000000
>  >>>  >>  [    0.000000] Setting variable MTRR 5, base: 4608MB, range: 256MB, type WB
>  >>>  >>  [    0.000000] hole: 000000012c000000 - 0000000130000000
>  >>>  >>  [    0.000000] Setting variable MTRR 6, base: 4800MB, range: 64MB, type UC
>  >>>  >>  [    0.000000] DONE variable MTRRs
>  >>>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  >>>  >>  [    0.000000] After WB checking
>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000d0000
>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000130000
>  >>>  >>  [    0.000000] After UC checking
>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>>  >>  [    0.000000] After sorting
>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>>  >>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>  >>>  >
>  >>>  > so
>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>>  > ===>
>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>>  >
>  >>>  > please try
>  >>>
>  >>>  Now I have 2 warnings
>  >>>
>  >>>
>  >>>  > mtrr_chunk_size=512m mtrr_gran_size=128m
>  >>>
>  >>>  ...
>  >>>
>  >>>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_chunk_size=512m mtrr_gran_size=128m 3
>  >>>
>  >>>
>  >>> [    0.000000] BIOS-provided physical RAM map:
>  >>>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  >>>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  >>>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  >>>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  >>>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  >>>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  >>>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  >>>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  >>>  [    0.000000] max_pfn_mapped = 1228800
>  >>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  >>>  [    0.000000] After WB checking
>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  >>>  [    0.000000] After UC checking
>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>>  [    0.000000] After sorting
>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  >>>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  >>>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  >>>  [    0.000000] range: 00000000c0000000 - 00000000c8000000
>  >>>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>  >>>
>  >>> [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>  >>>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>  >>>  [    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
>  >>>  [    0.000000] range: 0000000120000000 - 0000000128000000
>  >>>  [    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
>  >>>
>  >>> [    0.000000] DONE variable MTRRs
>  >>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  >>>  [    0.000000] After WB checking
>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  >>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  >>>
>  >>> [    0.000000] After UC checking
>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  >>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  >>>  [    0.000000] After sorting
>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  >>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  >>>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 64MB of RAM.
>  >>>  [    0.000000] ------------[ cut here ]------------
>  >>>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1206 mtrr_trim_uncached_memory+0x25b/0x35a()
>  >>>
>  >>> [    0.000000] Modules linked in:
>  >>>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
>  >>>  [    0.000000]
>  >>>  [    0.000000] Call Trace:
>  >>>  [    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
>  >>>  [    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
>  >>>  [    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
>  >>>  [    0.000000]  [<ffffffff8067fa51>] mtrr_trim_uncached_memory+0x25b/0x35a
>  >>>
>  >>> [    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
>  >>>  [    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
>  >>>  [    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
>  >>>  [    0.000000]
>  >>>  [    0.000000] ---[ end trace ca143223eefdc828 ]---
>  >>>  [    0.000000] update e820 for mtrr -- end_pfn
>  >>>  [    0.000000] modified physical RAM map:
>  >>>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  >>>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  >>>  [    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
>  >>>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>>  [    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
>  >>>  [    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
>  >>>  [    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
>  >>>
>  >>> [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>  >>>
>  >>> [    0.000000] ------------[ cut here ]------------
>  >>>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x30c/0x35a()
>  >>>  [    0.000000] Modules linked in:
>  >>>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
>  >>>  [    0.000000]
>  >>>  [    0.000000] Call Trace:
>  >>>  [    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
>  >>>  [    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
>  >>>  [    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
>  >>>  [    0.000000]  [<ffffffff8067fb02>] mtrr_trim_uncached_memory+0x30c/0x35a
>  >>>  [    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
>  >>>  [    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
>  >>>  [    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
>  >>>  [    0.000000]
>  >>>  [    0.000000] ---[ end trace ca143223eefdc828 ]---
>  >>>  [    0.000000] update e820 for mtrr -- holes
>  >>>  [    0.000000] modified physical RAM map:
>  >>>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  >>>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  >>>  [    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
>  >>>  [    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
>  >>>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>>  [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>  >>>
>  >>> [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>  >>>  [    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
>  >> please try attached trim_holes_fix.patch..., it will fix the trim hole problem.
>  >
>  > Tested but warning is still there. I try to boot with mtrr_chunk_size / mtrr_gran_size and see what I get.
>  >
>  > ...
>  >
>  > [    0.000000] Linux version 2.6.25-06589-gc65a350-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #808 SMP PREEMPT Wed Apr 30 01:37:38 CEST 2008
>  > [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
>  > [    0.000000] BIOS-provided physical RAM map:
>  > [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  > [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  > [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  > [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  > [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  > [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  > [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  > [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  > [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  > [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  > [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  > [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  > [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  > [    0.000000] max_pfn_mapped = 1228800
>  > [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  > [    0.000000] After WB checking
>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  > [    0.000000] After UC checking
>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  > [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  > [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  > [    0.000000] After sorting
>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  > [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  > [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  > [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  > [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  > [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  > [    0.000000] range: 00000000c0000000 - 00000000d0000000
>  > [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 256MB, type WB
>  > [    0.000000] hole: 00000000cc000000 - 00000000d0000000
>  > [    0.000000] Setting variable MTRR 3, base: 3264MB, range: 64MB, type UC
>  > [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>  > [    0.000000] range0: 0000000100000000 - 0000000120000000
>  > [    0.000000] Setting variable MTRR 4, base: 4096MB, range: 512MB, type WB
>  > [    0.000000] range: 0000000120000000 - 0000000130000000
>  > [    0.000000] Setting variable MTRR 5, base: 4608MB, range: 256MB, type WB
>  > [    0.000000] hole: 000000012c000000 - 0000000130000000
>  > [    0.000000] Setting variable MTRR 6, base: 4800MB, range: 64MB, type UC
>  > [    0.000000] DONE variable MTRRs
>  > [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  > [    0.000000] After WB checking
>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000d0000
>  > [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000130000
>  > [    0.000000] After UC checking
>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  > [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  > [    0.000000] After sorting
>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  > [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  > [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>  > [    0.000000] ------------[ cut here ]------------
>  > [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x313/0x361()
>  > [    0.000000] Modules linked in:
>  > [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06589-gc65a350-dirty #808
>  > [    0.000000]
>  > [    0.000000] Call Trace:
>  > [    0.000000]  [<ffffffff8022d699>] warn_on_slowpath+0x51/0x8c
>  > [    0.000000]  [<ffffffff806810b4>] x86_get_mtrr_mem_range+0x241/0x25c
>  > [    0.000000]  [<ffffffff8067f915>] update_memory_range+0x95/0xb7
>  > [    0.000000]  [<ffffffff80681ae1>] mtrr_trim_uncached_memory+0x313/0x361
>  > [    0.000000]  [<ffffffff8067ea16>] setup_arch+0x280/0x4a6
>  > [    0.000000]  [<ffffffff80678a3d>] start_kernel+0x6e/0x30d
>  > [    0.000000]  [<ffffffff80678432>] x86_64_start_kernel+0x241/0x24a
>  > [    0.000000]
>  > [    0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---
>  > [    0.000000] update e820 for mtrr -- holes
>  > [    0.000000] modified physical RAM map:
>  > [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  > [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  > [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  > [    0.000000]  modified: 0000000000100000 - 00000000cc000000 (usable)
>  > [    0.000000]  modified: 00000000cc000000 - 00000000cf550000 (reserved)
>  > [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  > [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  > [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  > [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  > [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>  > [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>  > [    0.000000] Entering add_active_range(0, 256, 835584) 3 entries of 256 used
>  > [    0.000000] max_pfn_mapped = 1228800
>  > [    0.000000] init_memory_mapping
>  >
>  > ...
>  >
>
>  with mtrr_chunk_size=256m mtrr_gran_size=128m I'm getting :
>
>  ...
>
>
>
>  [    0.000000] BIOS-provided physical RAM map:
>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  [    0.000000] max_pfn_mapped = 1228800
>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  [    0.000000] After WB checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  [    0.000000] After UC checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  [    0.000000] After sorting
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  [    0.000000] range: 00000000c0000000 - 00000000c8000000
>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>  [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>  [    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
>  [    0.000000] range: 0000000120000000 - 0000000128000000
>  [    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
>  [    0.000000] DONE variable MTRRs
>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  [    0.000000] After WB checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  [    0.000000] After UC checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  [    0.000000] After sorting
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 64MB of RAM.
>  [    0.000000] ------------[ cut here ]------------
>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1206 mtrr_trim_uncached_memory+0x25b/0x361()
>
> [    0.000000] Modules linked in:
>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06589-gc65a350-dirty #808
>  [    0.000000]
>  [    0.000000] Call Trace:
>  [    0.000000]  [<ffffffff8022d699>] warn_on_slowpath+0x51/0x8c
>  [    0.000000]  [<ffffffff806810b4>] x86_get_mtrr_mem_range+0x241/0x25c
>  [    0.000000]  [<ffffffff8067f915>] update_memory_range+0x95/0xb7
>  [    0.000000]  [<ffffffff80681a29>] mtrr_trim_uncached_memory+0x25b/0x361
>
> [    0.000000]  [<ffffffff8067ea16>] setup_arch+0x280/0x4a6
>  [    0.000000]  [<ffffffff80678a3d>] start_kernel+0x6e/0x30d
>  [    0.000000]  [<ffffffff80678432>] x86_64_start_kernel+0x241/0x24a
>  [    0.000000]
>  [    0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---
>
> [    0.000000] update e820 for mtrr -- end_pfn
>  [    0.000000] modified physical RAM map:
>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  [    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  [    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
>  [    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
>  [    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>  [    0.000000] ------------[ cut here ]------------
>
> [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x313/0x361()
>  [    0.000000] Modules linked in:
>  [    0.000000] Pid: 0, comm: swapper Tainted: G        W 2.6.25-06589-gc65a350-dirty #808
>
> [    0.000000]
>  [    0.000000] Call Trace:
>  [    0.000000]  [<ffffffff8022d699>] warn_on_slowpath+0x51/0x8c
>  [    0.000000]  [<ffffffff806810b4>] x86_get_mtrr_mem_range+0x241/0x25c
>  [    0.000000]  [<ffffffff8067f915>] update_memory_range+0x95/0xb7
>  [    0.000000]  [<ffffffff80681ae1>] mtrr_trim_uncached_memory+0x313/0x361
>  [    0.000000]  [<ffffffff8067ea16>] setup_arch+0x280/0x4a6
>  [    0.000000]  [<ffffffff80678a3d>] start_kernel+0x6e/0x30d
>  [    0.000000]  [<ffffffff80678432>] x86_64_start_kernel+0x241/0x24a
>  [    0.000000]
>  [    0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---
>  [    0.000000] update e820 for mtrr -- holes
>  [    0.000000] modified physical RAM map:
>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>
> [    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
>  [    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>  [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
>
please check attached debug patch...

THanks

Yinghai Lu

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: trim_holes_fix.patch --]
[-- Type: text/x-patch; name=trim_holes_fix.patch, Size: 1469 bytes --]

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -1103,6 +1103,9 @@ static u64 __init real_trim_memory(unsig
 	trim_size = limit_pfn;
 	trim_size <<= PAGE_SHIFT;
 	trim_size -= trim_start;
+
+	printk(KERN_INFO "trimm e820 for MTRR MAP (UC): %016lx - %016lx\n", start_pfn, limit_pfn);
+
 	return update_memory_range(trim_start, trim_size, E820_RAM,
 				E820_RESERVED);
 }
@@ -1203,7 +1206,8 @@ int __init mtrr_trim_uncached_memory(uns
 			" all of memory, losing %lluMB of RAM.\n",
 			total_real_trim_size >> 20);
 
-		WARN_ON(1);
+		if (enable_mtrr_cleanup < 1)
+			WARN_ON(1);
 
 		printk(KERN_INFO "update e820 for mtrr -- end_pfn\n");
 		update_e820();
@@ -1214,7 +1218,7 @@ int __init mtrr_trim_uncached_memory(uns
 	if (range[0].start)
 		total_real_trim_size += real_trim_memory(0, range[0].start);
 
-	for (i = 0; i < nr_range - 1; i--) {
+	for (i = 0; i < nr_range - 1; i++) {
 		if (range[i].end + 1 < range[i+1].start)
 			total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start);
 	}
@@ -1224,7 +1228,8 @@ int __init mtrr_trim_uncached_memory(uns
 			" all of memory, losing %lluMB of RAM.\n",
 			total_real_trim_size >> 20);
 
-		WARN_ON(1);
+		if (enable_mtrr_cleanup < 1)
+			WARN_ON(1);
 
 		printk(KERN_INFO "update e820 for mtrr -- holes\n");
 		update_e820();

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-30  0:38                             ` Yinghai Lu
@ 2008-04-30  1:02                               ` Gabriel C
  2008-04-30  3:00                                 ` Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: Gabriel C @ 2008-04-30  1:02 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

Yinghai Lu wrote:
> On Tue, Apr 29, 2008 at 5:06 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>> Gabriel C wrote:
>>  > Yinghai Lu wrote:
>>  >> On Tue, Apr 29, 2008 at 1:25 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>>  >>> Yinghai Lu wrote:
>>  >>>  > On Tue, Apr 29, 2008 at 4:51 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>>  >>>  >> Yinghai Lu wrote:
>>  >>>  >>  > On Tue, Apr 29, 2008 at 3:30 AM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>>  >>>  >>  >> On Tue, Apr 29, 2008 at 2:47 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>>  >>>  >>  >>  > Yinghai Lu wrote:
>>  >>>  >>  >>  >  > some BIOS like to use continus MTRR layout, and may X driver can not add
>>  >>>  >>  >>  >  > WB entries for graphical cards when 4g or more RAM installed.
>>  >>>  >>  >>  >  >
>>  >>>  >>  >>  >  > the patch will change MTRR to discrete.
>>  >>>  >>  >>  >  >
>>  >>>  >>  >>  >  > mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
>>  >>>  >>  >>  >  > default is 256m, could be set according to size of graphics card memory.
>>  >>>  >>  >>  >  >
>>  >>>  >>  >>  >  > v2: fix -1 for UC checking
>>  >>>  >>  >>  >  > v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>>  >>>  >>  >>  >  >     skip the var state change warning.
>>  >>>  >>  >>  >  >     remove next_basek in range_to_mtrr()
>>  >>>  >>  >>  >  > v4: correct warning mask.
>>  >>>  >>  >>  >  > v5: CONFIG_MTRR_SANITIZER
>>  >>>  >>  >>  >  > v6: fix 1g, 2g, 512 aligment with extra hole
>>  >>>  >>  >>  >  > v7: gran_sizek to prevent running out of MTRRs.
>>  >>>  >>  >>  >  >
>>  >>>  >>  >>  >
>>  >>>  >>  >>  >  With this version ( and patch http://lkml.org/lkml/2008/4/29/97 ) applyed on latest linus git tree
>>  >>>  >>  >>  >  the box OOPS'es early.
>>  >>>  >>  >>  >
>>  >>>  >>  >>  >  Sorry I don't have time right now to write down the part of the OOPS I can see on monitor , I can try to find
>>  >>>  >>  >>  >  some time later.
>>  >>>  >>  >>  >
>>  >>>  >>  >>  >  In any way OOPS'es on __free_one_page+0x191/0x21e
>>  >>>  >>  >>
>>  >>>  >>  >>  thanks. found one problem with hole_basek ...
>>  >>>  >>  >>
>>  >>>  >>  >>  will send you v8, and hope it will be last version.
>>  >>>  >>  >
>>  >>>  >>  > please try v8, it should get rid of the 8m entry. it need patch
>>  >>>  >>  > http://lkml.org/lkml/2008/4/29/97 too.
>>  >>>  >>
>>  >>>  >>  Box does boot with v8 but now I get that warning you fixed in v2 again =):
>>  >>>  >>
>>  >>>  >>  ....
>>  >>>  >>  [    0.000000] Linux version 2.6.25-06058-ga01e035-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #805 SMP PREEMPT Tue Apr 29 13:04:49 CEST 2008
>>  >>>  >>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
>>  >>>  >>  [    0.000000] BIOS-provided physical RAM map:
>>  >>>  >>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>>  >>>  >>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>>  >>>  >>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>>  >>>  >>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>>  >>>  >>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  >>>  >>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  >>>  >>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  >>>  >>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>>  >>>  >>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>>  >>>  >>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>>  >>>  >>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>>  >>>  >>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>>  >>>  >>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>>  >>>  >>  [    0.000000] max_pfn_mapped = 1228800
>>  >>>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>>  >>>  >>  [    0.000000] After WB checking
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>>  >>>  >>  [    0.000000] After UC checking
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  >>>  >>  [    0.000000] After sorting
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  >>>  >>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>>  >>>  >>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>>  >>>  >>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>>  >>>  >>  [    0.000000] range: 00000000c0000000 - 00000000d0000000
>>  >>>  >>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 256MB, type WB
>>  >>>  >>  [    0.000000] hole: 00000000cc000000 - 00000000d0000000
>>  >>>  >>  [    0.000000] Setting variable MTRR 3, base: 3264MB, range: 64MB, type UC
>>  >>>  >>  [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>>  >>>  >>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>>  >>>  >>  [    0.000000] Setting variable MTRR 4, base: 4096MB, range: 512MB, type WB
>>  >>>  >>  [    0.000000] range: 0000000120000000 - 0000000130000000
>>  >>>  >>  [    0.000000] Setting variable MTRR 5, base: 4608MB, range: 256MB, type WB
>>  >>>  >>  [    0.000000] hole: 000000012c000000 - 0000000130000000
>>  >>>  >>  [    0.000000] Setting variable MTRR 6, base: 4800MB, range: 64MB, type UC
>>  >>>  >>  [    0.000000] DONE variable MTRRs
>>  >>>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>>  >>>  >>  [    0.000000] After WB checking
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000d0000
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000130000
>>  >>>  >>  [    0.000000] After UC checking
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  >>>  >>  [    0.000000] After sorting
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  >>>  >>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>>  >>>  >
>>  >>>  > so
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  >>>  > ===>
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  >>>  >
>>  >>>  > please try
>>  >>>
>>  >>>  Now I have 2 warnings
>>  >>>
>>  >>>
>>  >>>  > mtrr_chunk_size=512m mtrr_gran_size=128m
>>  >>>
>>  >>>  ...
>>  >>>
>>  >>>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_chunk_size=512m mtrr_gran_size=128m 3
>>  >>>
>>  >>>
>>  >>> [    0.000000] BIOS-provided physical RAM map:
>>  >>>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>>  >>>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>>  >>>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>>  >>>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>>  >>>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  >>>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  >>>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  >>>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>>  >>>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>>  >>>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>>  >>>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>>  >>>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>>  >>>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>>  >>>  [    0.000000] max_pfn_mapped = 1228800
>>  >>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>>  >>>  [    0.000000] After WB checking
>>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>>  >>>  [    0.000000] After UC checking
>>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  >>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  >>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  >>>  [    0.000000] After sorting
>>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  >>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  >>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  >>>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>>  >>>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>>  >>>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>>  >>>  [    0.000000] range: 00000000c0000000 - 00000000c8000000
>>  >>>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>>  >>>
>>  >>> [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>>  >>>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>>  >>>  [    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
>>  >>>  [    0.000000] range: 0000000120000000 - 0000000128000000
>>  >>>  [    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
>>  >>>
>>  >>> [    0.000000] DONE variable MTRRs
>>  >>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>>  >>>  [    0.000000] After WB checking
>>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>>  >>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>>  >>>
>>  >>> [    0.000000] After UC checking
>>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>>  >>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>>  >>>  [    0.000000] After sorting
>>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>>  >>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>>  >>>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 64MB of RAM.
>>  >>>  [    0.000000] ------------[ cut here ]------------
>>  >>>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1206 mtrr_trim_uncached_memory+0x25b/0x35a()
>>  >>>
>>  >>> [    0.000000] Modules linked in:
>>  >>>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
>>  >>>  [    0.000000]
>>  >>>  [    0.000000] Call Trace:
>>  >>>  [    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
>>  >>>  [    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
>>  >>>  [    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
>>  >>>  [    0.000000]  [<ffffffff8067fa51>] mtrr_trim_uncached_memory+0x25b/0x35a
>>  >>>
>>  >>> [    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
>>  >>>  [    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
>>  >>>  [    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
>>  >>>  [    0.000000]
>>  >>>  [    0.000000] ---[ end trace ca143223eefdc828 ]---
>>  >>>  [    0.000000] update e820 for mtrr -- end_pfn
>>  >>>  [    0.000000] modified physical RAM map:
>>  >>>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>>  >>>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>>  >>>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>>  >>>  [    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
>>  >>>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  >>>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  >>>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  >>>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>>  >>>  [    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
>>  >>>  [    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
>>  >>>  [    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
>>  >>>
>>  >>> [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>>  >>>
>>  >>> [    0.000000] ------------[ cut here ]------------
>>  >>>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x30c/0x35a()
>>  >>>  [    0.000000] Modules linked in:
>>  >>>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
>>  >>>  [    0.000000]
>>  >>>  [    0.000000] Call Trace:
>>  >>>  [    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
>>  >>>  [    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
>>  >>>  [    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
>>  >>>  [    0.000000]  [<ffffffff8067fb02>] mtrr_trim_uncached_memory+0x30c/0x35a
>>  >>>  [    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
>>  >>>  [    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
>>  >>>  [    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
>>  >>>  [    0.000000]
>>  >>>  [    0.000000] ---[ end trace ca143223eefdc828 ]---
>>  >>>  [    0.000000] update e820 for mtrr -- holes
>>  >>>  [    0.000000] modified physical RAM map:
>>  >>>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>>  >>>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>>  >>>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>>  >>>  [    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
>>  >>>  [    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
>>  >>>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  >>>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  >>>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  >>>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>>  >>>  [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>>  >>>
>>  >>> [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>>  >>>  [    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
>>  >> please try attached trim_holes_fix.patch..., it will fix the trim hole problem.
>>  >
>>  > Tested but warning is still there. I try to boot with mtrr_chunk_size / mtrr_gran_size and see what I get.
>>  >
>>  > ...
>>  >
>>  > [    0.000000] Linux version 2.6.25-06589-gc65a350-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #808 SMP PREEMPT Wed Apr 30 01:37:38 CEST 2008
>>  > [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
>>  > [    0.000000] BIOS-provided physical RAM map:
>>  > [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>>  > [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>>  > [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>>  > [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>>  > [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  > [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  > [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  > [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>>  > [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>>  > [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>>  > [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>>  > [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>>  > [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>>  > [    0.000000] max_pfn_mapped = 1228800
>>  > [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>>  > [    0.000000] After WB checking
>>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>>  > [    0.000000] After UC checking
>>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  > [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  > [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  > [    0.000000] After sorting
>>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  > [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  > [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  > [    0.000000] range0: 0000000000000000 - 00000000c0000000
>>  > [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>>  > [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>>  > [    0.000000] range: 00000000c0000000 - 00000000d0000000
>>  > [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 256MB, type WB
>>  > [    0.000000] hole: 00000000cc000000 - 00000000d0000000
>>  > [    0.000000] Setting variable MTRR 3, base: 3264MB, range: 64MB, type UC
>>  > [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>>  > [    0.000000] range0: 0000000100000000 - 0000000120000000
>>  > [    0.000000] Setting variable MTRR 4, base: 4096MB, range: 512MB, type WB
>>  > [    0.000000] range: 0000000120000000 - 0000000130000000
>>  > [    0.000000] Setting variable MTRR 5, base: 4608MB, range: 256MB, type WB
>>  > [    0.000000] hole: 000000012c000000 - 0000000130000000
>>  > [    0.000000] Setting variable MTRR 6, base: 4800MB, range: 64MB, type UC
>>  > [    0.000000] DONE variable MTRRs
>>  > [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>>  > [    0.000000] After WB checking
>>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000d0000
>>  > [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000130000
>>  > [    0.000000] After UC checking
>>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>>  > [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  > [    0.000000] After sorting
>>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>>  > [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  > [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>>  > [    0.000000] ------------[ cut here ]------------
>>  > [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x313/0x361()
>>  > [    0.000000] Modules linked in:
>>  > [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06589-gc65a350-dirty #808
>>  > [    0.000000]
>>  > [    0.000000] Call Trace:
>>  > [    0.000000]  [<ffffffff8022d699>] warn_on_slowpath+0x51/0x8c
>>  > [    0.000000]  [<ffffffff806810b4>] x86_get_mtrr_mem_range+0x241/0x25c
>>  > [    0.000000]  [<ffffffff8067f915>] update_memory_range+0x95/0xb7
>>  > [    0.000000]  [<ffffffff80681ae1>] mtrr_trim_uncached_memory+0x313/0x361
>>  > [    0.000000]  [<ffffffff8067ea16>] setup_arch+0x280/0x4a6
>>  > [    0.000000]  [<ffffffff80678a3d>] start_kernel+0x6e/0x30d
>>  > [    0.000000]  [<ffffffff80678432>] x86_64_start_kernel+0x241/0x24a
>>  > [    0.000000]
>>  > [    0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---
>>  > [    0.000000] update e820 for mtrr -- holes
>>  > [    0.000000] modified physical RAM map:
>>  > [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>>  > [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>>  > [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>>  > [    0.000000]  modified: 0000000000100000 - 00000000cc000000 (usable)
>>  > [    0.000000]  modified: 00000000cc000000 - 00000000cf550000 (reserved)
>>  > [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  > [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  > [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  > [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>>  > [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>>  > [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>>  > [    0.000000] Entering add_active_range(0, 256, 835584) 3 entries of 256 used
>>  > [    0.000000] max_pfn_mapped = 1228800
>>  > [    0.000000] init_memory_mapping
>>  >
>>  > ...
>>  >
>>
>>  with mtrr_chunk_size=256m mtrr_gran_size=128m I'm getting :
>>
>>  ...
>>
>>
>>
>>  [    0.000000] BIOS-provided physical RAM map:
>>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>>  [    0.000000] max_pfn_mapped = 1228800
>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>>  [    0.000000] After WB checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>>  [    0.000000] After UC checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  [    0.000000] After sorting
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>>  [    0.000000] range: 00000000c0000000 - 00000000c8000000
>>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>>  [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>>  [    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
>>  [    0.000000] range: 0000000120000000 - 0000000128000000
>>  [    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
>>  [    0.000000] DONE variable MTRRs
>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>>  [    0.000000] After WB checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>>  [    0.000000] After UC checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>>  [    0.000000] After sorting
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 64MB of RAM.
>>  [    0.000000] ------------[ cut here ]------------
>>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1206 mtrr_trim_uncached_memory+0x25b/0x361()
>>
>> [    0.000000] Modules linked in:
>>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06589-gc65a350-dirty #808
>>  [    0.000000]
>>  [    0.000000] Call Trace:
>>  [    0.000000]  [<ffffffff8022d699>] warn_on_slowpath+0x51/0x8c
>>  [    0.000000]  [<ffffffff806810b4>] x86_get_mtrr_mem_range+0x241/0x25c
>>  [    0.000000]  [<ffffffff8067f915>] update_memory_range+0x95/0xb7
>>  [    0.000000]  [<ffffffff80681a29>] mtrr_trim_uncached_memory+0x25b/0x361
>>
>> [    0.000000]  [<ffffffff8067ea16>] setup_arch+0x280/0x4a6
>>  [    0.000000]  [<ffffffff80678a3d>] start_kernel+0x6e/0x30d
>>  [    0.000000]  [<ffffffff80678432>] x86_64_start_kernel+0x241/0x24a
>>  [    0.000000]
>>  [    0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---
>>
>> [    0.000000] update e820 for mtrr -- end_pfn
>>  [    0.000000] modified physical RAM map:
>>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>>  [    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
>>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>>  [    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
>>  [    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
>>  [    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
>>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>>  [    0.000000] ------------[ cut here ]------------
>>
>> [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x313/0x361()
>>  [    0.000000] Modules linked in:
>>  [    0.000000] Pid: 0, comm: swapper Tainted: G        W 2.6.25-06589-gc65a350-dirty #808
>>
>> [    0.000000]
>>  [    0.000000] Call Trace:
>>  [    0.000000]  [<ffffffff8022d699>] warn_on_slowpath+0x51/0x8c
>>  [    0.000000]  [<ffffffff806810b4>] x86_get_mtrr_mem_range+0x241/0x25c
>>  [    0.000000]  [<ffffffff8067f915>] update_memory_range+0x95/0xb7
>>  [    0.000000]  [<ffffffff80681ae1>] mtrr_trim_uncached_memory+0x313/0x361
>>  [    0.000000]  [<ffffffff8067ea16>] setup_arch+0x280/0x4a6
>>  [    0.000000]  [<ffffffff80678a3d>] start_kernel+0x6e/0x30d
>>  [    0.000000]  [<ffffffff80678432>] x86_64_start_kernel+0x241/0x24a
>>  [    0.000000]
>>  [    0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---
>>  [    0.000000] update e820 for mtrr -- holes
>>  [    0.000000] modified physical RAM map:
>>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>>
>> [    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
>>  [    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
>>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>>  [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>>  [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>>  [    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
>>
> please check attached debug patch...

Here the output :

...

[    0.000000] Linux version 2.6.25-06598-g33ae0cd-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #814 SMP PREEMPT Wed Apr 30 02:47:52 CEST 2008
[    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_chunk_size=256m mtrr_gran_size=128m
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
[    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
[    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
[    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
[    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800
[    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] range0: 0000000000000000 - 00000000c0000000
[    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
[    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
[    0.000000] range: 00000000c0000000 - 00000000c8000000
[    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
[    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
[    0.000000] range0: 0000000100000000 - 0000000120000000
[    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
[    0.000000] range: 0000000120000000 - 0000000128000000
[    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
[    0.000000] DONE variable MTRRs
[    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
[    0.000000] trimm e820 for MTRR MAP (UC): 0000000000128000 - 000000000012c000
[    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 64MB of RAM.
[    0.000000] update e820 for mtrr -- end_pfn
[    0.000000] modified physical RAM map:
[    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
[    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
[    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
[    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
[    0.000000] trimm e820 for MTRR MAP (UC): 00000000000c8000 - 0000000000100000
[    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
[    0.000000] update e820 for mtrr -- holes
[    0.000000] modified physical RAM map:
[    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
[    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
[    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
[    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800
[    0.000000] init_memory_mapping

...

( this kernel is still not build with your second cleanup patch will do so in next run )

Also let me know if you want me to boot with any other mtrr_chunk_size / mtrr_gran_size values.


> 
> THanks
> 
> Yinghai Lu
> 


Gabriel

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5
  2008-04-29 22:54                   ` Thomas Gleixner
@ 2008-04-30  1:16                     ` Eric W. Biederman
  2008-04-30  9:57                       ` Alan Cox
  0 siblings, 1 reply; 89+ messages in thread
From: Eric W. Biederman @ 2008-04-30  1:16 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Yinghai Lu, Andrew Morton, Ingo Molnar, H. Peter Anvin, Gabriel C,
	linux-kernel@vger.kernel.org, Mika Fischer

Thomas Gleixner <tglx@linutronix.de> writes:

> On Tue, 29 Apr 2008, Eric W. Biederman wrote:
>> Think of SMM mode is a lightweight hypervisor that we can't get rid
>> of, if you want to understand the worst case.
>> 
>> In theory SMM mode is completely unnecessary as soon as we enable
>> ACPI.  In practice ACPI appears to frequently trap into SMM mode.
>
> SMM does more than that. It emulates legacy hardware and fixes
> chip(set) bugs as well. Disabling it just makes your box stop
> working. There are certain types of systems where essential safety
> nets rely on SMIs (you can deep-fry P4s by disabling SMIs).

There is truth in that but it is over dramatic.  P4s don't deep fry
they almost always turn off before they overheat (you make take
physical damage to your motherboard though).


The best definition I have heard of SMM mode is: smack the stupid OS
that isn't doing what it should be doing at runtime mode.

It is the way board designers and BIOS writers can work around what
they perceive as broken OS code, that keeps them from doing what
they need to do.  Getting them to give up SMM mode even though
technically possible is requesting they give up a degree of control
and thus a major social engineering challenge for anyone who wishes
to achieve it.


So any time we tread on territory that could mess up SMM mode
we need to be careful, especially as we can not turn it off to
diagnose problems.  The interactions can be hard to root cause.


Replacing overlapping MTRRs with a non overlapping set to allow
X to set a WB region as YH is doing appears safe and reasonable, and
worth doing. 

Going one step farther and reducing some of the WB memory to UC
so we can free up an MTRR for video and to accelerate X is a
bit chancy and something I don't feel comfortable with enabling
by default.  Especially as we have a better long term fix on
the way.

This problem is hitting enough people and the odds of something
really bad happening when you take a 100x or 1000x slowdown in 
SMM are pretty low so I do think it is useful to have a kernel
option that rounds down the amount of memory you have converts WB
memory to UC to accelerate X.

Hopefully by this point we are all now reminded how this can
interact with SMM mode (although no one has ever seen a bad
interaction) and how interacting with SMM mode can be a problem.

Eric

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-30  1:02                               ` Gabriel C
@ 2008-04-30  3:00                                 ` Yinghai Lu
  2008-04-30  3:29                                   ` Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-04-30  3:00 UTC (permalink / raw)
  To: Gabriel C
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

[-- Attachment #1: Type: text/plain, Size: 36277 bytes --]

On Tue, Apr 29, 2008 at 6:02 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>
> Yinghai Lu wrote:
>  > On Tue, Apr 29, 2008 at 5:06 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >> Gabriel C wrote:
>  >>  > Yinghai Lu wrote:
>  >>  >> On Tue, Apr 29, 2008 at 1:25 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >>  >>> Yinghai Lu wrote:
>  >>  >>>  > On Tue, Apr 29, 2008 at 4:51 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >>  >>>  >> Yinghai Lu wrote:
>  >>  >>>  >>  > On Tue, Apr 29, 2008 at 3:30 AM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>  >>  >>>  >>  >> On Tue, Apr 29, 2008 at 2:47 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >>  >>>  >>  >>  > Yinghai Lu wrote:
>  >>  >>>  >>  >>  >  > some BIOS like to use continus MTRR layout, and may X driver can not add
>  >>  >>>  >>  >>  >  > WB entries for graphical cards when 4g or more RAM installed.
>  >>  >>>  >>  >>  >  >
>  >>  >>>  >>  >>  >  > the patch will change MTRR to discrete.
>  >>  >>>  >>  >>  >  >
>  >>  >>>  >>  >>  >  > mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
>  >>  >>>  >>  >>  >  > default is 256m, could be set according to size of graphics card memory.
>  >>  >>>  >>  >>  >  >
>  >>  >>>  >>  >>  >  > v2: fix -1 for UC checking
>  >>  >>>  >>  >>  >  > v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
>  >>  >>>  >>  >>  >  >     skip the var state change warning.
>  >>  >>>  >>  >>  >  >     remove next_basek in range_to_mtrr()
>  >>  >>>  >>  >>  >  > v4: correct warning mask.
>  >>  >>>  >>  >>  >  > v5: CONFIG_MTRR_SANITIZER
>  >>  >>>  >>  >>  >  > v6: fix 1g, 2g, 512 aligment with extra hole
>  >>  >>>  >>  >>  >  > v7: gran_sizek to prevent running out of MTRRs.
>  >>  >>>  >>  >>  >  >
>  >>  >>>  >>  >>  >
>  >>  >>>  >>  >>  >  With this version ( and patch http://lkml.org/lkml/2008/4/29/97 ) applyed on latest linus git tree
>  >>  >>>  >>  >>  >  the box OOPS'es early.
>  >>  >>>  >>  >>  >
>  >>  >>>  >>  >>  >  Sorry I don't have time right now to write down the part of the OOPS I can see on monitor , I can try to find
>  >>  >>>  >>  >>  >  some time later.
>  >>  >>>  >>  >>  >
>  >>  >>>  >>  >>  >  In any way OOPS'es on __free_one_page+0x191/0x21e
>  >>  >>>  >>  >>
>  >>  >>>  >>  >>  thanks. found one problem with hole_basek ...
>  >>  >>>  >>  >>
>  >>  >>>  >>  >>  will send you v8, and hope it will be last version.
>  >>  >>>  >>  >
>  >>  >>>  >>  > please try v8, it should get rid of the 8m entry. it need patch
>  >>  >>>  >>  > http://lkml.org/lkml/2008/4/29/97 too.
>  >>  >>>  >>
>  >>  >>>  >>  Box does boot with v8 but now I get that warning you fixed in v2 again =):
>  >>  >>>  >>
>  >>  >>>  >>  ....
>  >>  >>>  >>  [    0.000000] Linux version 2.6.25-06058-ga01e035-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #805 SMP PREEMPT Tue Apr 29 13:04:49 CEST 2008
>  >>  >>>  >>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
>  >>  >>>  >>  [    0.000000] BIOS-provided physical RAM map:
>  >>  >>>  >>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  >>  >>>  >>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  >>>  >>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  >>>  >>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  >>  >>>  >>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  >>>  >>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  >>>  >>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  >>>  >>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>  >>>  >>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  >>  >>>  >>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  >>  >>>  >>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  >>  >>>  >>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  >>  >>>  >>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  >>  >>>  >>  [    0.000000] max_pfn_mapped = 1228800
>  >>  >>>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  >>  >>>  >>  [    0.000000] After WB checking
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  >>  >>>  >>  [    0.000000] After UC checking
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  >>>  >>  [    0.000000] After sorting
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  >>>  >>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  >>  >>>  >>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  >>  >>>  >>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  >>  >>>  >>  [    0.000000] range: 00000000c0000000 - 00000000d0000000
>  >>  >>>  >>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 256MB, type WB
>  >>  >>>  >>  [    0.000000] hole: 00000000cc000000 - 00000000d0000000
>  >>  >>>  >>  [    0.000000] Setting variable MTRR 3, base: 3264MB, range: 64MB, type UC
>  >>  >>>  >>  [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>  >>  >>>  >>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>  >>  >>>  >>  [    0.000000] Setting variable MTRR 4, base: 4096MB, range: 512MB, type WB
>  >>  >>>  >>  [    0.000000] range: 0000000120000000 - 0000000130000000
>  >>  >>>  >>  [    0.000000] Setting variable MTRR 5, base: 4608MB, range: 256MB, type WB
>  >>  >>>  >>  [    0.000000] hole: 000000012c000000 - 0000000130000000
>  >>  >>>  >>  [    0.000000] Setting variable MTRR 6, base: 4800MB, range: 64MB, type UC
>  >>  >>>  >>  [    0.000000] DONE variable MTRRs
>  >>  >>>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  >>  >>>  >>  [    0.000000] After WB checking
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000d0000
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000130000
>  >>  >>>  >>  [    0.000000] After UC checking
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  >>>  >>  [    0.000000] After sorting
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  >>>  >>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>  >>  >>>  >
>  >>  >>>  > so
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  >>>  > ===>
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  >>  >>>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  >>>  >
>  >>  >>>  > please try
>  >>  >>>
>  >>  >>>  Now I have 2 warnings
>  >>  >>>
>  >>  >>>
>  >>  >>>  > mtrr_chunk_size=512m mtrr_gran_size=128m
>  >>  >>>
>  >>  >>>  ...
>  >>  >>>
>  >>  >>>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_chunk_size=512m mtrr_gran_size=128m 3
>  >>  >>>
>  >>  >>>
>  >>  >>> [    0.000000] BIOS-provided physical RAM map:
>  >>  >>>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  >>  >>>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  >>>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  >>>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  >>  >>>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  >>>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  >>>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  >>>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>  >>>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  >>  >>>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  >>  >>>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  >>  >>>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  >>  >>>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  >>  >>>  [    0.000000] max_pfn_mapped = 1228800
>  >>  >>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  >>  >>>  [    0.000000] After WB checking
>  >>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  >>  >>>  [    0.000000] After UC checking
>  >>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  >>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  >>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  >>>  [    0.000000] After sorting
>  >>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  >>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  >>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  >>>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  >>  >>>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  >>  >>>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  >>  >>>  [    0.000000] range: 00000000c0000000 - 00000000c8000000
>  >>  >>>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>  >>  >>>
>  >>  >>> [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>  >>  >>>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>  >>  >>>  [    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
>  >>  >>>  [    0.000000] range: 0000000120000000 - 0000000128000000
>  >>  >>>  [    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
>  >>  >>>
>  >>  >>> [    0.000000] DONE variable MTRRs
>  >>  >>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  >>  >>>  [    0.000000] After WB checking
>  >>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  >>  >>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  >>  >>>
>  >>  >>> [    0.000000] After UC checking
>  >>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  >>  >>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  >>  >>>  [    0.000000] After sorting
>  >>  >>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  >>  >>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  >>  >>>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 64MB of RAM.
>  >>  >>>  [    0.000000] ------------[ cut here ]------------
>  >>  >>>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1206 mtrr_trim_uncached_memory+0x25b/0x35a()
>  >>  >>>
>  >>  >>> [    0.000000] Modules linked in:
>  >>  >>>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
>  >>  >>>  [    0.000000]
>  >>  >>>  [    0.000000] Call Trace:
>  >>  >>>  [    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
>  >>  >>>  [    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
>  >>  >>>  [    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
>  >>  >>>  [    0.000000]  [<ffffffff8067fa51>] mtrr_trim_uncached_memory+0x25b/0x35a
>  >>  >>>
>  >>  >>> [    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
>  >>  >>>  [    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
>  >>  >>>  [    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
>  >>  >>>  [    0.000000]
>  >>  >>>  [    0.000000] ---[ end trace ca143223eefdc828 ]---
>  >>  >>>  [    0.000000] update e820 for mtrr -- end_pfn
>  >>  >>>  [    0.000000] modified physical RAM map:
>  >>  >>>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  >>  >>>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  >>>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  >>>  [    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
>  >>  >>>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  >>>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  >>>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  >>>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>  >>>  [    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
>  >>  >>>  [    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
>  >>  >>>  [    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
>  >>  >>>
>  >>  >>> [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>  >>  >>>
>  >>  >>> [    0.000000] ------------[ cut here ]------------
>  >>  >>>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x30c/0x35a()
>  >>  >>>  [    0.000000] Modules linked in:
>  >>  >>>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06058-ga01e035-dirty #805
>  >>  >>>  [    0.000000]
>  >>  >>>  [    0.000000] Call Trace:
>  >>  >>>  [    0.000000]  [<ffffffff8022d661>] warn_on_slowpath+0x51/0x78
>  >>  >>>  [    0.000000]  [<ffffffff8067f0dc>] x86_get_mtrr_mem_range+0x241/0x25c
>  >>  >>>  [    0.000000]  [<ffffffff8067d93e>] update_memory_range+0x95/0xb7
>  >>  >>>  [    0.000000]  [<ffffffff8067fb02>] mtrr_trim_uncached_memory+0x30c/0x35a
>  >>  >>>  [    0.000000]  [<ffffffff8067ca43>] setup_arch+0x280/0x4a2
>  >>  >>>  [    0.000000]  [<ffffffff80676aa6>] start_kernel+0x6e/0x308
>  >>  >>>  [    0.000000]  [<ffffffff80676432>] x86_64_start_kernel+0x241/0x24a
>  >>  >>>  [    0.000000]
>  >>  >>>  [    0.000000] ---[ end trace ca143223eefdc828 ]---
>  >>  >>>  [    0.000000] update e820 for mtrr -- holes
>  >>  >>>  [    0.000000] modified physical RAM map:
>  >>  >>>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  >>  >>>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  >>>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  >>>  [    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
>  >>  >>>  [    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
>  >>  >>>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  >>>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  >>>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  >>>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>  >>>  [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>  >>  >>>
>  >>  >>> [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>  >>  >>>  [    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
>  >>  >> please try attached trim_holes_fix.patch..., it will fix the trim hole problem.
>  >>  >
>  >>  > Tested but warning is still there. I try to boot with mtrr_chunk_size / mtrr_gran_size and see what I get.
>  >>  >
>  >>  > ...
>  >>  >
>  >>  > [    0.000000] Linux version 2.6.25-06589-gc65a350-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #808 SMP PREEMPT Wed Apr 30 01:37:38 CEST 2008
>  >>  > [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
>  >>  > [    0.000000] BIOS-provided physical RAM map:
>  >>  > [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  >>  > [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  > [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  > [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  >>  > [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  > [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  > [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  > [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>  > [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  >>  > [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  >>  > [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  >>  > [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  >>  > [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  >>  > [    0.000000] max_pfn_mapped = 1228800
>  >>  > [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  >>  > [    0.000000] After WB checking
>  >>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  >>  > [    0.000000] After UC checking
>  >>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  > [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  > [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  > [    0.000000] After sorting
>  >>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  > [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  > [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  > [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  >>  > [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  >>  > [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  >>  > [    0.000000] range: 00000000c0000000 - 00000000d0000000
>  >>  > [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 256MB, type WB
>  >>  > [    0.000000] hole: 00000000cc000000 - 00000000d0000000
>  >>  > [    0.000000] Setting variable MTRR 3, base: 3264MB, range: 64MB, type UC
>  >>  > [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>  >>  > [    0.000000] range0: 0000000100000000 - 0000000120000000
>  >>  > [    0.000000] Setting variable MTRR 4, base: 4096MB, range: 512MB, type WB
>  >>  > [    0.000000] range: 0000000120000000 - 0000000130000000
>  >>  > [    0.000000] Setting variable MTRR 5, base: 4608MB, range: 256MB, type WB
>  >>  > [    0.000000] hole: 000000012c000000 - 0000000130000000
>  >>  > [    0.000000] Setting variable MTRR 6, base: 4800MB, range: 64MB, type UC
>  >>  > [    0.000000] DONE variable MTRRs
>  >>  > [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  >>  > [    0.000000] After WB checking
>  >>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000d0000
>  >>  > [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000130000
>  >>  > [    0.000000] After UC checking
>  >>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  >>  > [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  > [    0.000000] After sorting
>  >>  > [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cc000
>  >>  > [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  > [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>  >>  > [    0.000000] ------------[ cut here ]------------
>  >>  > [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x313/0x361()
>  >>  > [    0.000000] Modules linked in:
>  >>  > [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06589-gc65a350-dirty #808
>  >>  > [    0.000000]
>  >>  > [    0.000000] Call Trace:
>  >>  > [    0.000000]  [<ffffffff8022d699>] warn_on_slowpath+0x51/0x8c
>  >>  > [    0.000000]  [<ffffffff806810b4>] x86_get_mtrr_mem_range+0x241/0x25c
>  >>  > [    0.000000]  [<ffffffff8067f915>] update_memory_range+0x95/0xb7
>  >>  > [    0.000000]  [<ffffffff80681ae1>] mtrr_trim_uncached_memory+0x313/0x361
>  >>  > [    0.000000]  [<ffffffff8067ea16>] setup_arch+0x280/0x4a6
>  >>  > [    0.000000]  [<ffffffff80678a3d>] start_kernel+0x6e/0x30d
>  >>  > [    0.000000]  [<ffffffff80678432>] x86_64_start_kernel+0x241/0x24a
>  >>  > [    0.000000]
>  >>  > [    0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---
>  >>  > [    0.000000] update e820 for mtrr -- holes
>  >>  > [    0.000000] modified physical RAM map:
>  >>  > [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  >>  > [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  > [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  > [    0.000000]  modified: 0000000000100000 - 00000000cc000000 (usable)
>  >>  > [    0.000000]  modified: 00000000cc000000 - 00000000cf550000 (reserved)
>  >>  > [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  > [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  > [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  > [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>  > [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>  >>  > [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>  >>  > [    0.000000] Entering add_active_range(0, 256, 835584) 3 entries of 256 used
>  >>  > [    0.000000] max_pfn_mapped = 1228800
>  >>  > [    0.000000] init_memory_mapping
>  >>  >
>  >>  > ...
>  >>  >
>  >>
>  >>  with mtrr_chunk_size=256m mtrr_gran_size=128m I'm getting :
>  >>
>  >>  ...
>  >>
>  >>
>  >>
>  >>  [    0.000000] BIOS-provided physical RAM map:
>  >>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  >>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  >>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  >>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  >>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  >>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  >>  [    0.000000] max_pfn_mapped = 1228800
>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  >>  [    0.000000] After WB checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  >>  [    0.000000] After UC checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  [    0.000000] After sorting
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  >>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  >>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  >>  [    0.000000] range: 00000000c0000000 - 00000000c8000000
>  >>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>  >>  [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>  >>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>  >>  [    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
>  >>  [    0.000000] range: 0000000120000000 - 0000000128000000
>  >>  [    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
>  >>  [    0.000000] DONE variable MTRRs
>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  >>  [    0.000000] After WB checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  >>  [    0.000000] After UC checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  >>  [    0.000000] After sorting
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  >>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 64MB of RAM.
>  >>  [    0.000000] ------------[ cut here ]------------
>  >>  [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1206 mtrr_trim_uncached_memory+0x25b/0x361()
>  >>
>  >> [    0.000000] Modules linked in:
>  >>  [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.25-06589-gc65a350-dirty #808
>  >>  [    0.000000]
>  >>  [    0.000000] Call Trace:
>  >>  [    0.000000]  [<ffffffff8022d699>] warn_on_slowpath+0x51/0x8c
>  >>  [    0.000000]  [<ffffffff806810b4>] x86_get_mtrr_mem_range+0x241/0x25c
>  >>  [    0.000000]  [<ffffffff8067f915>] update_memory_range+0x95/0xb7
>  >>  [    0.000000]  [<ffffffff80681a29>] mtrr_trim_uncached_memory+0x25b/0x361
>  >>
>  >> [    0.000000]  [<ffffffff8067ea16>] setup_arch+0x280/0x4a6
>  >>  [    0.000000]  [<ffffffff80678a3d>] start_kernel+0x6e/0x30d
>  >>  [    0.000000]  [<ffffffff80678432>] x86_64_start_kernel+0x241/0x24a
>  >>  [    0.000000]
>  >>  [    0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---
>  >>
>  >> [    0.000000] update e820 for mtrr -- end_pfn
>  >>  [    0.000000] modified physical RAM map:
>  >>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  >>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  [    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
>  >>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>  [    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
>  >>  [    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
>  >>  [    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
>  >>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>  >>  [    0.000000] ------------[ cut here ]------------
>  >>
>  >> [    0.000000] WARNING: at arch/x86/kernel/cpu/mtrr/main.c:1227 mtrr_trim_uncached_memory+0x313/0x361()
>  >>  [    0.000000] Modules linked in:
>  >>  [    0.000000] Pid: 0, comm: swapper Tainted: G        W 2.6.25-06589-gc65a350-dirty #808
>  >>
>  >> [    0.000000]
>  >>  [    0.000000] Call Trace:
>  >>  [    0.000000]  [<ffffffff8022d699>] warn_on_slowpath+0x51/0x8c
>  >>  [    0.000000]  [<ffffffff806810b4>] x86_get_mtrr_mem_range+0x241/0x25c
>  >>  [    0.000000]  [<ffffffff8067f915>] update_memory_range+0x95/0xb7
>  >>  [    0.000000]  [<ffffffff80681ae1>] mtrr_trim_uncached_memory+0x313/0x361
>  >>  [    0.000000]  [<ffffffff8067ea16>] setup_arch+0x280/0x4a6
>  >>  [    0.000000]  [<ffffffff80678a3d>] start_kernel+0x6e/0x30d
>  >>  [    0.000000]  [<ffffffff80678432>] x86_64_start_kernel+0x241/0x24a
>  >>  [    0.000000]
>  >>  [    0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---
>  >>  [    0.000000] update e820 for mtrr -- holes
>  >>  [    0.000000] modified physical RAM map:
>  >>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  >>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  >>
>  >> [    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
>  >>  [    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
>  >>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>  [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>  >>  [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>  >>  [    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
>  >>
>  > please check attached debug patch...
>
>  Here the output :
>
>  ...
>
>  [    0.000000] Linux version 2.6.25-06598-g33ae0cd-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #814 SMP PREEMPT Wed Apr 30 02:47:52 CEST 2008
>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_chunk_size=256m mtrr_gran_size=128m
>
>
> [    0.000000] BIOS-provided physical RAM map:
>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  [    0.000000] max_pfn_mapped = 1228800
>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  [    0.000000] After WB checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  [    0.000000] After UC checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  [    0.000000] After sorting
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  [    0.000000] range: 00000000c0000000 - 00000000c8000000
>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>  [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>  [    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
>  [    0.000000] range: 0000000120000000 - 0000000128000000
>  [    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
>  [    0.000000] DONE variable MTRRs
>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  [    0.000000] After WB checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  [    0.000000] After UC checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  [    0.000000] After sorting
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  [    0.000000] trimm e820 for MTRR MAP (UC): 0000000000128000 - 000000000012c000
>
> [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 64MB of RAM.
>
> [    0.000000] update e820 for mtrr -- end_pfn
>  [    0.000000] modified physical RAM map:
>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  [    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  [    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
>  [    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
>  [    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
>  [    0.000000] trimm e820 for MTRR MAP (UC): 00000000000c8000 - 0000000000100000
>
> [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 757MB of RAM.
>
> [    0.000000] update e820 for mtrr -- holes
>  [    0.000000] modified physical RAM map:
>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  [    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
>  [    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>  [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
>
> [    0.000000] max_pfn_mapped = 1228800
>  [    0.000000] init_memory_mapping
>
>  ...
>
>  ( this kernel is still not build with your second cleanup patch will do so in next run )
>
>  Also let me know if you want me to boot with any other mtrr_chunk_size / mtrr_gran_size values.
>
i can not find problem with the code, wonder why it trim out
0000000100000000 - 0000000128000000

please try the attached debug patch in addtion to

http://people.redhat.com/mingo/x86.git/README

YH

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: trim_holes_fix.patch --]
[-- Type: text/x-patch; name=trim_holes_fix.patch, Size: 3206 bytes --]

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -1103,6 +1103,9 @@ static u64 __init real_trim_memory(unsig
 	trim_size = limit_pfn;
 	trim_size <<= PAGE_SHIFT;
 	trim_size -= trim_start;
+
+	printk(KERN_INFO "trim e820 with MTRR MAP (UC): %016lx - %016lx\n", start_pfn, limit_pfn);
+
 	return update_memory_range(trim_start, trim_size, E820_RAM,
 				E820_RESERVED);
 }
@@ -1124,7 +1127,6 @@ int __init mtrr_trim_uncached_memory(uns
 	struct res_range range[RANGE_NUM];
 	int nr_range;
 	u64 total_real_trim_size;
-	int changed;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1189,49 +1191,35 @@ int __init mtrr_trim_uncached_memory(uns
 	}
 	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
 
-	changed = 0;
-	total_real_trim_size = 0;
-
-	/* check the top at first */
-	i = nr_range - 1;
-	if (range[i].end + 1 < end_pfn) {
-			total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn);
-	}
-
-	if (total_real_trim_size) {
-		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
-			" all of memory, losing %lluMB of RAM.\n",
-			total_real_trim_size >> 20);
-
-		WARN_ON(1);
-
-		printk(KERN_INFO "update e820 for mtrr -- end_pfn\n");
-		update_e820();
-		changed = 1;
-	}
-
 	total_real_trim_size = 0;
+	/* check the head */
 	if (range[0].start)
 		total_real_trim_size += real_trim_memory(0, range[0].start);
-
-	for (i = 0; i < nr_range - 1; i--) {
+	/* check the holes */
+	for (i = 0; i < nr_range - 1; i++) {
 		if (range[i].end + 1 < range[i+1].start)
 			total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start);
 	}
+	/* check the top */
+	i = nr_range - 1;
+	if (range[i].end + 1 < end_pfn)
+		total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn);
 
 	if (total_real_trim_size) {
 		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
 			" all of memory, losing %lluMB of RAM.\n",
 			total_real_trim_size >> 20);
 
-		WARN_ON(1);
+		if (enable_mtrr_cleanup < 1)
+			WARN_ON(1);
 
-		printk(KERN_INFO "update e820 for mtrr -- holes\n");
+		printk(KERN_INFO "update e820 for mtrr\n");
 		update_e820();
-		changed = 1;
+
+		return 1;
 	}
 
-	return changed;
+	return 0;
 }
 
 /**
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -846,6 +846,8 @@ u64 __init update_memory_range(u64 start
 		if (ei->addr >= start && ei->size <= size) {
 			ei->type = new_type;
 			real_updated_size += ei->size;
+			printk(KERN_INFO "modified physical RAM map - whole cover with %d:\n", i);
+			e820_print_map("modified");
 			continue;
 		}
 		/* partially covered */
@@ -856,6 +858,8 @@ u64 __init update_memory_range(u64 start
 		add_memory_region(final_start, final_end - final_start,
 					 new_type);
 		real_updated_size += final_end - final_start;
+		printk(KERN_INFO "modified physical RAM map - add one with %d:\n", i);
+		e820_print_map("modified");
 	}
 	return real_updated_size;
 }

^ permalink raw reply	[flat|nested] 89+ messages in thread

* [PATCH] x86: fix trimming e820 with MTRR holes. - fix
  2008-04-29  8:59           ` [PATCH 2/2] x86: fix trimming e820 with MTRR holes Yinghai Lu
  2008-04-29 11:35             ` Ingo Molnar
@ 2008-04-30  3:25             ` Yinghai Lu
  2008-04-30 12:09               ` Ingo Molnar
  1 sibling, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-04-30  3:25 UTC (permalink / raw)
  To: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, Mika Fischer
  Cc: linux-kernel@vger.kernel.org


v2: process hole then end_pfn
    fix update_memory_range with whole cover comparing

Signed-off-by: Yinghai Lu <yinghai.lu@gmail.com>

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -1098,11 +1098,12 @@ int __init amd_special_default_mtrr(void
 static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
 {
 	u64 trim_start, trim_size;
-	trim_start =  start_pfn;
+	trim_start = start_pfn;
 	trim_start <<= PAGE_SHIFT;
 	trim_size = limit_pfn;
 	trim_size <<= PAGE_SHIFT;
 	trim_size -= trim_start;
+
 	return update_memory_range(trim_start, trim_size, E820_RAM,
 				E820_RESERVED);
 }
@@ -1124,7 +1125,6 @@ int __init mtrr_trim_uncached_memory(uns
 	struct res_range range[RANGE_NUM];
 	int nr_range;
 	u64 total_real_trim_size;
-	int changed;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1189,49 +1189,35 @@ int __init mtrr_trim_uncached_memory(uns
 	}
 	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
 
-	changed = 0;
-	total_real_trim_size = 0;
-
-	/* check the top at first */
-	i = nr_range - 1;
-	if (range[i].end + 1 < end_pfn) {
-			total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn);
-	}
-
-	if (total_real_trim_size) {
-		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
-			" all of memory, losing %lluMB of RAM.\n",
-			total_real_trim_size >> 20);
-
-		WARN_ON(1);
-
-		printk(KERN_INFO "update e820 for mtrr -- end_pfn\n");
-		update_e820();
-		changed = 1;
-	}
-
 	total_real_trim_size = 0;
+	/* check the head */
 	if (range[0].start)
 		total_real_trim_size += real_trim_memory(0, range[0].start);
-
-	for (i = 0; i < nr_range - 1; i--) {
+	/* check the holes */
+	for (i = 0; i < nr_range - 1; i++) {
 		if (range[i].end + 1 < range[i+1].start)
 			total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start);
 	}
+	/* check the top */
+	i = nr_range - 1;
+	if (range[i].end + 1 < end_pfn)
+		total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn);
 
 	if (total_real_trim_size) {
 		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
 			" all of memory, losing %lluMB of RAM.\n",
 			total_real_trim_size >> 20);
 
-		WARN_ON(1);
+		if (enable_mtrr_cleanup < 1)
+			WARN_ON(1);
 
-		printk(KERN_INFO "update e820 for mtrr -- holes\n");
+		printk(KERN_INFO "update e820 for mtrr\n");
 		update_e820();
-		changed = 1;
+
+		return 1;
 	}
 
-	return changed;
+	return 0;
 }
 
 /**
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -843,7 +843,8 @@ u64 __init update_memory_range(u64 start
 		if (ei->type != old_type)
 			continue;
 		/* totally covered? */
-		if (ei->addr >= start && ei->size <= size) {
+		if (ei->addr >= start &&
+		    (ei->addr + ei->size) <= (start + size)) {
 			ei->type = new_type;
 			real_updated_size += ei->size;
 			continue;
Index: linux-2.6/arch/x86/kernel/e820_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_32.c
+++ linux-2.6/arch/x86/kernel/e820_32.c
@@ -797,7 +797,8 @@ u64 __init update_memory_range(u64 start
 		if (ei->type != old_type)
 			continue;
 		/* totally covered? */
-		if (ei->addr >= start && ei->size <= size) {
+		if (ei->addr >= start &&
+		    (ei->addr + ei->size) <= (start + size)) {
 			ei->type = new_type;
 			real_updated_size += ei->size;
 			continue;

^ permalink raw reply	[flat|nested] 89+ messages in thread

* [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v8 - fix
  2008-04-29 10:52           ` [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v8 Yinghai Lu
  2008-04-29 13:07             ` Ingo Molnar
  2008-04-29 20:46             ` Randy Dunlap
@ 2008-04-30  3:25             ` Yinghai Lu
  2008-04-30 12:09               ` Ingo Molnar
  2008-05-01  8:00               ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect Yinghai Lu
  2 siblings, 2 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-30  3:25 UTC (permalink / raw)
  To: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, Mika Fischer
  Cc: linux-kernel@vger.kernel.org


v9: address format change requests by Ingo
    more case handling in range_to_var_with_hole

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig
+++ linux-2.6/arch/x86/Kconfig
@@ -1092,13 +1092,12 @@ config MTRR_SANITIZER
 	  If unsure, say Y.
 
 config MTRR_SANITIZER_ENABLE_DEFAULT
-	def_bool y
-	prompt "Enable MTRR cleanup by default"
+	int "MTRR cleanup enable value (0-1)"
+	range 0 1
+	default "0"
 	depends on MTRR_SANITIZER
 	help
-	  Enable mtrr cleanup by default
-
-	  If unsure, say Y.
+	  Enable mtrr cleanup default value
 
 config X86_PAT
 	bool
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -611,17 +611,9 @@ static struct sysdev_driver mtrr_sysdev_
 };
 
 #ifdef CONFIG_MTRR_SANITIZER
-
-#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT
-static int enable_mtrr_cleanup __initdata = 1;
-#else
-static int enable_mtrr_cleanup __initdata;
-#endif
-
+static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
 #else
-
 static int enable_mtrr_cleanup __initdata = -1;
-
 #endif
 
 static int __init disable_mtrr_cleanup_setup(char *str)
@@ -640,6 +632,7 @@ static int __init enable_mtrr_cleanup_se
 }
 early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
 
+/* should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM 256
 
 struct res_range {
@@ -647,13 +640,27 @@ struct res_range {
 	unsigned long end;
 };
 
-static int __init add_range(struct res_range *range, int nr_range, unsigned long start,
-			      unsigned long end, int merge)
+static int __init
+add_range(struct res_range *range, int nr_range, unsigned long start,
+			      unsigned long end)
 {
-	int i;
+	/* out of slots */
+	if (nr_range >= RANGE_NUM)
+		return nr_range;
 
-	if (!merge)
-		goto addit;
+	range[nr_range].start = start;
+	range[nr_range].end = end;
+
+	nr_range++;
+
+	return nr_range;
+}
+
+static int __init
+add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
+			      unsigned long end)
+{
+	int i;
 
 	/* try to merge it with old one */
 	for (i = 0; i < nr_range; i++) {
@@ -676,24 +683,14 @@ static int __init add_range(struct res_r
 		return nr_range;
 	}
 
-addit:
 	/* need to add that */
-	if (nr_range >= RANGE_NUM)
-		return nr_range;
-
-	range[nr_range].start = start;
-	range[nr_range].end = end;
-
-	nr_range++;
-
-	return nr_range;
-
+	return add_range(range, nr_range, start, end);
 }
-static void __init subtract_range(struct res_range *range, unsigned long start,
-				unsigned long end)
+
+static void __init
+subtract_range(struct res_range *range, unsigned long start, unsigned long end)
 {
-	int i;
-	int j;
+	int i, j;
 
 	for (j = 0; j < RANGE_NUM; j++) {
 		if (!range[j].end)
@@ -747,46 +744,47 @@ static int __init cmp_range(const void *
 }
 
 struct var_mtrr_state {
-	unsigned long range_startk, range_sizek;
-	unsigned long chunk_sizek;
-	unsigned long gran_sizek;
-	unsigned int reg;
-	unsigned address_bits;
+	unsigned long	range_startk;
+	unsigned long	range_sizek;
+	unsigned long	chunk_sizek;
+	unsigned long	gran_sizek;
+	unsigned int	reg;
+	unsigned int	address_bits;
 };
 
-static void __init set_var_mtrr(
-	unsigned int reg, unsigned long basek, unsigned long sizek,
-	unsigned char type, unsigned address_bits)
+static void __init
+set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+		unsigned char type, unsigned address_bits)
 {
 	u32 base_lo, base_hi, mask_lo, mask_hi;
-	unsigned address_mask_high;
+	u64 base, mask;
 
 	if (!sizek) {
 		fill_mtrr_var_range(reg, 0, 0, 0, 0);
 		return;
 	}
 
-	address_mask_high = ((1u << (address_bits - 32u)) - 1u);
+	mask = (1ULL << address_bits) - 1;
+	mask &= ~((((u64)sizek) << 10) - 1);
 
-	base_hi = basek >> 22;
-	base_lo  = basek << 10;
+	base  = ((u64)basek) << 10;
 
-	if (sizek < 4*1024*1024) {
-		mask_hi = address_mask_high;
-		mask_lo = ~((sizek << 10) - 1);
-	} else {
-		mask_hi = address_mask_high & (~((sizek >> 22) - 1));
-		mask_lo = 0;
-	}
+	base |= type;
+	mask |= 0x800;
+
+	base_lo = base & ((1ULL<<32) - 1);
+	base_hi = base >> 32;
+
+	mask_lo = mask & ((1ULL<<32) - 1);
+	mask_hi = mask >> 32;
 
-	base_lo |= type;
-	mask_lo |= 0x800;
 	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
 }
 
-static unsigned int __init range_to_mtrr(unsigned int reg,
-	unsigned long range_startk, unsigned long range_sizek,
-	unsigned char type, unsigned address_bits)
+static unsigned int __init
+range_to_mtrr(unsigned int reg, unsigned long range_startk,
+	      unsigned long range_sizek, unsigned char type,
+	      unsigned address_bits)
 {
 	if (!range_sizek || (reg >= num_var_ranges))
 		return reg;
@@ -794,6 +792,7 @@ static unsigned int __init range_to_mtrr
 	while (range_sizek) {
 		unsigned long max_align, align;
 		unsigned long sizek;
+
 		/* Compute the maximum size I can make a range */
 		if (range_startk)
 			max_align = ffs(range_startk) - 1;
@@ -818,7 +817,8 @@ static unsigned int __init range_to_mtrr
 	return reg;
 }
 
-static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
+static void __init
+range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
 {
 	unsigned long hole_basek, hole_sizek;
 	unsigned long range0_basek, range0_sizek;
@@ -848,23 +848,31 @@ static void __init range_to_mtrr_with_ho
 	/* try to append some small hole */
 	range0_basek = state->range_startk;
 	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
-	if ((range0_sizek == state->range_sizek) ||
-	    ((range0_basek + range0_sizek - chunk_sizek > basek) && basek)) {
+	if (range0_sizek == state->range_sizek) {
 			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10);
 			state->reg = range_to_mtrr(state->reg, range0_basek,
 				state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
 		return;
+	} else if (basek) {
+	    while (range0_basek + range0_sizek - chunk_sizek > basek) {
+		range0_sizek -= chunk_sizek;
+		if (!range0_sizek)
+			break;
+	    }
 	}
 
 
-	range0_sizek -= chunk_sizek;
+	if (range0_sizek > chunk_sizek)
+		range0_sizek -= chunk_sizek;
 	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
 	state->reg = range_to_mtrr(state->reg, range0_basek,
 			range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
 
 	range_basek = range0_basek + range0_sizek;
 	range_sizek = chunk_sizek;
-	if (range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) {
+
+	if ((range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) &&
+	    (range_basek + range_sizek <= basek)) {
 		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
 		hole_basek = range_basek + range_sizek - hole_sizek;
 	} else
@@ -880,7 +888,9 @@ static void __init range_to_mtrr_with_ho
 	}
 }
 
-static void __init set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, unsigned long size_pfn)
+static void __init
+set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
+		   unsigned long size_pfn)
 {
 	unsigned long basek, sizek;
 
@@ -921,7 +931,7 @@ static int __init parse_mtrr_chunk_size_
 early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
 
 /* granity of mtrr of block */
-static u64 mtrr_gran_size __initdata = (64ULL<<20);
+static u64 mtrr_gran_size __initdata = (1ULL<<20);
 
 static int __init parse_mtrr_gran_size_opt(char *p)
 {
@@ -932,17 +942,19 @@ static int __init parse_mtrr_gran_size_o
 }
 early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
 
-static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, unsigned address_bits)
+static void __init
+x86_setup_var_mtrrs(struct res_range *range, int nr_range,
+		    unsigned address_bits)
 {
 	struct var_mtrr_state var_state;
 	int i;
 
-	var_state.range_startk = 0;
-	var_state.range_sizek = 0;
-	var_state.reg = 0;
-	var_state.address_bits = address_bits;
-	var_state.chunk_sizek = mtrr_chunk_size >> 10;
-	var_state.gran_sizek = mtrr_gran_size >> 10;
+	var_state.range_startk	= 0;
+	var_state.range_sizek	= 0;
+	var_state.reg		= 0;
+	var_state.address_bits	= address_bits;
+	var_state.chunk_sizek	= mtrr_chunk_size >> 10;
+	var_state.gran_sizek	= mtrr_gran_size >> 10;
 
 	/* Write the range etc */
 	for (i = 0; i < nr_range; i++)
@@ -952,11 +964,16 @@ static void __init x86_setup_var_mtrrs(s
 	range_to_mtrr_with_hole(&var_state, 0);
 	printk(KERN_INFO "DONE variable MTRRs\n");
 	/* Clear out the extra MTRR's */
-	while (var_state.reg < num_var_ranges)
-		set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits);
+	while (var_state.reg < num_var_ranges) {
+		set_var_mtrr(var_state.reg, 0, 0, 0, var_state.address_bits);
+		var_state.reg++;
+	}
 }
 
-static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, unsigned long extra_remove_base, unsigned long extra_remove_size)
+static int __init
+x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+		       unsigned long extra_remove_base,
+		       unsigned long extra_remove_size)
 {
 	unsigned long i, base, size;
 	mtrr_type type;
@@ -965,7 +982,7 @@ static int __init x86_get_mtrr_mem_range
 		mtrr_if->get(i, &base, &size, &type);
 		if (type != MTRR_TYPE_WRBACK)
 			continue;
-		nr_range = add_range(range, nr_range, base, base + size - 1, 1);
+		nr_range = add_range_with_merge(range, nr_range, base, base + size - 1);
 	}
 	printk(KERN_INFO "After WB checking\n");
 	for (i = 0; i < nr_range; i++)
@@ -1005,11 +1022,11 @@ static int __init x86_get_mtrr_mem_range
 
 static int __init mtrr_cleanup(unsigned address_bits)
 {
+	unsigned long extra_remove_base, extra_remove_size;
 	unsigned long i, base, size, def, dummy;
-	mtrr_type type;
 	struct res_range range[RANGE_NUM];
+	mtrr_type type;
 	int nr_range;
-	unsigned long extra_remove_base, extra_remove_size;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1053,7 +1070,6 @@ static int __init mtrr_cleanup(unsigned 
 	x86_setup_var_mtrrs(range, nr_range, address_bits);
 
 	return 1;
-
 }
 
 static int disable_mtrr_trim;

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-30  3:00                                 ` Yinghai Lu
@ 2008-04-30  3:29                                   ` Yinghai Lu
  2008-04-30  4:12                                     ` Gabriel C
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-04-30  3:29 UTC (permalink / raw)
  To: Gabriel C
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

please try the patches

from
http://lkml.org/lkml/2008/4/29/754
http://lkml.org/lkml/2008/4/29/753

in addtion to

http://people.redhat.com/mingo/x86.git/README
( it has v8 already)

and try with mtrr_gran_size=128m

Thanks

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-30  3:29                                   ` Yinghai Lu
@ 2008-04-30  4:12                                     ` Gabriel C
  2008-04-30  4:25                                       ` Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: Gabriel C @ 2008-04-30  4:12 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

Yinghai Lu wrote:
> please try the patches
> 
> from
> http://lkml.org/lkml/2008/4/29/754
> http://lkml.org/lkml/2008/4/29/753
> 
> in addtion to
> 
> http://people.redhat.com/mingo/x86.git/README
> ( it has v8 already)
> 
> and try with mtrr_gran_size=128m

Without any value I get :

...

[    0.000000] Linux version 2.6.25-x86-latest.git-06598-g6a2c2ff-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #1 SMP PREEMPT Wed Apr 30 05:51:39 CEST 2008
[    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
[    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
[    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
[    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
[    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800
[    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] range0: 0000000000000000 - 00000000c0000000
[    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
[    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
[    0.000000] range: 00000000c0000000 - 00000000cf600000
[    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
[    0.000000] Setting variable MTRR 3, base: 3200MB, range: 64MB, type WB
[    0.000000] Setting variable MTRR 4, base: 3264MB, range: 32MB, type WB
[    0.000000] Setting variable MTRR 5, base: 3296MB, range: 16MB, type WB
[    0.000000] Setting variable MTRR 6, base: 3312MB, range: 4MB, type WB
[    0.000000] Setting variable MTRR 7, base: 3316MB, range: 2MB, type WB
[    0.000000] range0: 00000000cf800000 - 00000000df800000
[    0.000000] range: 00000000df800000 - 00000000d0000000
[    0.000000] DONE variable MTRRs
[    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 704MB of RAM.
[    0.000000] update e820 for mtrr
[    0.000000] modified physical RAM map:
[    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
[    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
[    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 849232) 3 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800
[    0.000000] init_memory_mapping                                                                                                                          

...

[   20.984343] [drm] Initialized i915 1.6.0 20060119 on minor 0
[   21.450368] mtrr: no more MTRRs available

...

with mtrr_gran_size=128m I get :

...

[    0.000000] Linux version 2.6.25-x86-latest.git-06598-g6a2c2ff-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #1 SMP PREEMPT Wed Apr 30 05:51:39 CEST 2008
[    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_gran_size=128m
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
[    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
[    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
[    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
[    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800
[    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] range0: 0000000000000000 - 00000000c0000000
[    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
[    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
[    0.000000] range: 00000000c0000000 - 00000000c8000000
[    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
[    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
[    0.000000] range0: 0000000100000000 - 0000000120000000
[    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
[    0.000000] range: 0000000120000000 - 0000000128000000
[    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
[    0.000000] DONE variable MTRRs
[    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
[    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 181MB of RAM.
[    0.000000] update e820 for mtrr
[    0.000000] modified physical RAM map:
[    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
[    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
[    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
[    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
[    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
[    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
[    0.000000] Entering add_active_range(0, 1048576, 1212416) 3 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800
[    0.000000] init_memory_mapping

...

I will try tomorrow some more boot options but now I need some sleep ;)

                                                                                                                         
> 
> Thanks
> 
> YH
> 


Gabriel

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-30  4:12                                     ` Gabriel C
@ 2008-04-30  4:25                                       ` Yinghai Lu
  2008-04-30 12:04                                         ` Gabriel C
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-04-30  4:25 UTC (permalink / raw)
  To: Gabriel C
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

On Tue, Apr 29, 2008 at 9:12 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>
> Yinghai Lu wrote:
>  > please try the patches
>  >
>  > from
>  > http://lkml.org/lkml/2008/4/29/754
>  > http://lkml.org/lkml/2008/4/29/753
>  >
>  > in addtion to
>  >
>  > http://people.redhat.com/mingo/x86.git/README
>  > ( it has v8 already)
>  >
>  > and try with mtrr_gran_size=128m
>
>  Without any value I get :
>
>  ...
>
>  [    0.000000] Linux version 2.6.25-x86-latest.git-06598-g6a2c2ff-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #1 SMP PREEMPT Wed Apr 30 05:51:39 CEST 2008
>
>
> [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
>  [    0.000000] BIOS-provided physical RAM map:
>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  [    0.000000] max_pfn_mapped = 1228800
>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  [    0.000000] After WB checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  [    0.000000] After UC checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  [    0.000000] After sorting
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  [    0.000000] range: 00000000c0000000 - 00000000cf600000
>
> [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>  [    0.000000] Setting variable MTRR 3, base: 3200MB, range: 64MB, type WB
>  [    0.000000] Setting variable MTRR 4, base: 3264MB, range: 32MB, type WB
>  [    0.000000] Setting variable MTRR 5, base: 3296MB, range: 16MB, type WB
>  [    0.000000] Setting variable MTRR 6, base: 3312MB, range: 4MB, type WB
>  [    0.000000] Setting variable MTRR 7, base: 3316MB, range: 2MB, type WB
>  [    0.000000] range0: 00000000cf800000 - 00000000df800000
>  [    0.000000] range: 00000000df800000 - 00000000d0000000
>
> [    0.000000] DONE variable MTRRs
>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  [    0.000000] After WB checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>
> [    0.000000] After UC checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>
> [    0.000000] After sorting
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 704MB of RAM.
>
> [    0.000000] update e820 for mtrr
>
> [    0.000000] modified physical RAM map:
>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  [    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>
> [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>  [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 256, 849232) 3 entries of 256 used
>
> [    0.000000] max_pfn_mapped = 1228800
>  [    0.000000] init_memory_mapping
>
>  ...
>
>  [   20.984343] [drm] Initialized i915 1.6.0 20060119 on minor 0
>  [   21.450368] mtrr: no more MTRRs available
>
>  ...
>
>  with mtrr_gran_size=128m I get :
>
>  ...
>
>  [    0.000000] Linux version 2.6.25-x86-latest.git-06598-g6a2c2ff-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #1 SMP PREEMPT Wed Apr 30 05:51:39 CEST 2008
>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_gran_size=128m
>
>
> [    0.000000] BIOS-provided physical RAM map:
>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  [    0.000000] max_pfn_mapped = 1228800
>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  [    0.000000] After WB checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  [    0.000000] After UC checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  [    0.000000] After sorting
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  [    0.000000] range: 00000000c0000000 - 00000000c8000000
>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>  [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>  [    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
>  [    0.000000] range: 0000000120000000 - 0000000128000000
>  [    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
>  [    0.000000] DONE variable MTRRs
>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  [    0.000000] After WB checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  [    0.000000] After UC checking
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  [    0.000000] After sorting
>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 181MB of RAM.
>
> [    0.000000] update e820 for mtrr
>
> [    0.000000] modified physical RAM map:
>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  [    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
>  [    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>
> [    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
>  [    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
>  [    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
>
> [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
>  [    0.000000] Entering add_active_range(0, 1048576, 1212416) 3 entries of 256 used
>
> [    0.000000] max_pfn_mapped = 1228800
>  [    0.000000] init_memory_mapping
>
>  ...
>
>  I will try tomorrow some more boot options but now I need some sleep ;)

thanks.  let's try different mtrr_chunk_size/mtrr_gran_size to get
back more ram.
under mtrr_gran_size=128m, does the your X server work well..., fast or slow?

YH
YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5
  2008-04-30  1:16                     ` Eric W. Biederman
@ 2008-04-30  9:57                       ` Alan Cox
  0 siblings, 0 replies; 89+ messages in thread
From: Alan Cox @ 2008-04-30  9:57 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Thomas Gleixner, Yinghai Lu, Andrew Morton, Ingo Molnar,
	H. Peter Anvin, Gabriel C, linux-kernel@vger.kernel.org,
	Mika Fischer

> The best definition I have heard of SMM mode is: smack the stupid OS
> that isn't doing what it should be doing at runtime mode.

Its also used for all sorts of ugly horrible hacks - like some boards
with broken latches on timers where the firmware uses SMM to emulate the
hardware and spins until the low byte flips before loading the time into
the RTC.

Thankfully on the newest cpus SMM mode is a bit more elegantly designed,
the original is a weird not quite real mode with extra bugs while nowdays
its more of a virtual machine

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-30  4:25                                       ` Yinghai Lu
@ 2008-04-30 12:04                                         ` Gabriel C
  2008-04-30 16:26                                           ` Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: Gabriel C @ 2008-04-30 12:04 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

Yinghai Lu wrote:
> On Tue, Apr 29, 2008 at 9:12 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>> Yinghai Lu wrote:
>>  > please try the patches
>>  >
>>  > from
>>  > http://lkml.org/lkml/2008/4/29/754
>>  > http://lkml.org/lkml/2008/4/29/753
>>  >
>>  > in addtion to
>>  >
>>  > http://people.redhat.com/mingo/x86.git/README
>>  > ( it has v8 already)
>>  >
>>  > and try with mtrr_gran_size=128m
>>
>>  Without any value I get :
>>
>>  ...
>>
>>  [    0.000000] Linux version 2.6.25-x86-latest.git-06598-g6a2c2ff-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #1 SMP PREEMPT Wed Apr 30 05:51:39 CEST 2008
>>
>>
>> [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
>>  [    0.000000] BIOS-provided physical RAM map:
>>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>>  [    0.000000] max_pfn_mapped = 1228800
>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>>  [    0.000000] After WB checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>>  [    0.000000] After UC checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  [    0.000000] After sorting
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>>  [    0.000000] range: 00000000c0000000 - 00000000cf600000
>>
>> [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>>  [    0.000000] Setting variable MTRR 3, base: 3200MB, range: 64MB, type WB
>>  [    0.000000] Setting variable MTRR 4, base: 3264MB, range: 32MB, type WB
>>  [    0.000000] Setting variable MTRR 5, base: 3296MB, range: 16MB, type WB
>>  [    0.000000] Setting variable MTRR 6, base: 3312MB, range: 4MB, type WB
>>  [    0.000000] Setting variable MTRR 7, base: 3316MB, range: 2MB, type WB
>>  [    0.000000] range0: 00000000cf800000 - 00000000df800000
>>  [    0.000000] range: 00000000df800000 - 00000000d0000000
>>
>> [    0.000000] DONE variable MTRRs
>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>>  [    0.000000] After WB checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>
>> [    0.000000] After UC checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>
>> [    0.000000] After sorting
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 704MB of RAM.
>>
>> [    0.000000] update e820 for mtrr
>>
>> [    0.000000] modified physical RAM map:
>>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>>  [    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
>>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>>
>> [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>>  [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>>  [    0.000000] Entering add_active_range(0, 256, 849232) 3 entries of 256 used
>>
>> [    0.000000] max_pfn_mapped = 1228800
>>  [    0.000000] init_memory_mapping
>>
>>  ...
>>
>>  [   20.984343] [drm] Initialized i915 1.6.0 20060119 on minor 0
>>  [   21.450368] mtrr: no more MTRRs available
>>
>>  ...
>>
>>  with mtrr_gran_size=128m I get :
>>
>>  ...
>>
>>  [    0.000000] Linux version 2.6.25-x86-latest.git-06598-g6a2c2ff-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #1 SMP PREEMPT Wed Apr 30 05:51:39 CEST 2008
>>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_gran_size=128m
>>
>>
>> [    0.000000] BIOS-provided physical RAM map:
>>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>>  [    0.000000] max_pfn_mapped = 1228800
>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>>  [    0.000000] After WB checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>>  [    0.000000] After UC checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  [    0.000000] After sorting
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>>  [    0.000000] range: 00000000c0000000 - 00000000c8000000
>>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>>  [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>>  [    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
>>  [    0.000000] range: 0000000120000000 - 0000000128000000
>>  [    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
>>  [    0.000000] DONE variable MTRRs
>>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>>  [    0.000000] After WB checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>>  [    0.000000] After UC checking
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>>  [    0.000000] After sorting
>>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 181MB of RAM.
>>
>> [    0.000000] update e820 for mtrr
>>
>> [    0.000000] modified physical RAM map:
>>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>>  [    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
>>  [    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
>>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>>
>> [    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
>>  [    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
>>  [    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
>>
>> [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>>  [    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
>>  [    0.000000] Entering add_active_range(0, 1048576, 1212416) 3 entries of 256 used
>>
>> [    0.000000] max_pfn_mapped = 1228800
>>  [    0.000000] init_memory_mapping
>>
>>  ...
>>
>>  I will try tomorrow some more boot options but now I need some sleep ;)
> 
> thanks.  let's try different mtrr_chunk_size/mtrr_gran_size to get
> back more ram.
> under mtrr_gran_size=128m, does the your X server work well..., fast or slow?

Yes X is fine and fast , it is even fine ( slower from my felling ) when I lose the 704MB.
In general with x86-latest.git tree things seems faster on that box , maybe there are some other bug fixes too.

I've tested some mtrr_chunk_size/mtrr_gran_size combos, dmesg's are uploaded there :

http://frugalware.org/~crazy/dmesg/mtrr/

Also setting lower values on mtrr_gran_size seems to give more RAM back , 
mtrr_chunk_size 256/512 eats 704 MB and 128 doesn't seems to do something ?

Other things I noticed ( probably you could add a note about in kernel-parameter.txt or some doc file ):

Setting mtrr_gran_size to high , on my box >=512m hangs the box on boot , 
setting it to low , on my box <=8m , will cause X to die with such a message :

xf86MapVidMem: Could not mmap framebuffer (0xd0000000,0x10000000) (Invalid argument)

If you want I can test such values for mtrr_chunk_size too , just let me know.

To be honest I'm even fine when losing 700 - 800 MB as long X and everything else does work. 
The other alternative for me for that problem without your patches will be to buy new ram ( 2 x 1G ) 
and then I lose near 2,3G compared to now or live with broken X until xorg-server will support and 
*work fine* with PAT ( most probably not that soon ).

Gabriel 

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: fix trimming e820 with MTRR holes. - fix
  2008-04-30  3:25             ` [PATCH] x86: fix trimming e820 with MTRR holes. - fix Yinghai Lu
@ 2008-04-30 12:09               ` Ingo Molnar
  0 siblings, 0 replies; 89+ messages in thread
From: Ingo Molnar @ 2008-04-30 12:09 UTC (permalink / raw)
  To: yhlu.kernel
  Cc: Andrew Morton, H. Peter Anvin, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org


* Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:

> v2: process hole then end_pfn
>     fix update_memory_range with whole cover comparing

thanks, applied.

	Ingo

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v8 - fix
  2008-04-30  3:25             ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v8 - fix Yinghai Lu
@ 2008-04-30 12:09               ` Ingo Molnar
  2008-05-01  8:00               ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect Yinghai Lu
  1 sibling, 0 replies; 89+ messages in thread
From: Ingo Molnar @ 2008-04-30 12:09 UTC (permalink / raw)
  To: yhlu.kernel
  Cc: Andrew Morton, H. Peter Anvin, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org


* Yinghai Lu <yhlu.kernel.send@gmail.com> wrote:

> v9: address format change requests by Ingo
>     more case handling in range_to_var_with_hole

thanks, applied.

	Ingo

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7
  2008-04-30 12:04                                         ` Gabriel C
@ 2008-04-30 16:26                                           ` Yinghai Lu
  0 siblings, 0 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-04-30 16:26 UTC (permalink / raw)
  To: Gabriel C
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

On Wed, Apr 30, 2008 at 5:04 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>
> Yinghai Lu wrote:
>  > On Tue, Apr 29, 2008 at 9:12 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >> Yinghai Lu wrote:
>  >>  > please try the patches
>  >>  >
>  >>  > from
>  >>  > http://lkml.org/lkml/2008/4/29/754
>  >>  > http://lkml.org/lkml/2008/4/29/753
>  >>  >
>  >>  > in addtion to
>  >>  >
>  >>  > http://people.redhat.com/mingo/x86.git/README
>  >>  > ( it has v8 already)
>  >>  >
>  >>  > and try with mtrr_gran_size=128m
>  >>
>  >>  Without any value I get :
>  >>
>  >>  ...
>  >>
>  >>  [    0.000000] Linux version 2.6.25-x86-latest.git-06598-g6a2c2ff-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #1 SMP PREEMPT Wed Apr 30 05:51:39 CEST 2008
>  >>
>  >>
>  >> [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
>  >>  [    0.000000] BIOS-provided physical RAM map:
>  >>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  >>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  >>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  >>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  >>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  >>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  >>  [    0.000000] max_pfn_mapped = 1228800
>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  >>  [    0.000000] After WB checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  >>  [    0.000000] After UC checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  [    0.000000] After sorting
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  >>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  >>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  >>  [    0.000000] range: 00000000c0000000 - 00000000cf600000
>  >>
>  >> [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>  >>  [    0.000000] Setting variable MTRR 3, base: 3200MB, range: 64MB, type WB
>  >>  [    0.000000] Setting variable MTRR 4, base: 3264MB, range: 32MB, type WB
>  >>  [    0.000000] Setting variable MTRR 5, base: 3296MB, range: 16MB, type WB
>  >>  [    0.000000] Setting variable MTRR 6, base: 3312MB, range: 4MB, type WB
>  >>  [    0.000000] Setting variable MTRR 7, base: 3316MB, range: 2MB, type WB
>  >>  [    0.000000] range0: 00000000cf800000 - 00000000df800000
>  >>  [    0.000000] range: 00000000df800000 - 00000000d0000000
>  >>
>  >> [    0.000000] DONE variable MTRRs
>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  >>  [    0.000000] After WB checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>
>  >> [    0.000000] After UC checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>
>  >> [    0.000000] After sorting
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 704MB of RAM.
>  >>
>  >> [    0.000000] update e820 for mtrr
>  >>
>  >> [    0.000000] modified physical RAM map:
>  >>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  >>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  [    0.000000]  modified: 0000000000100000 - 00000000cf550000 (usable)
>  >>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>
>  >> [    0.000000]  modified: 00000000ffc00000 - 000000012c000000 (reserved)
>  >>  [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>  >>  [    0.000000] Entering add_active_range(0, 256, 849232) 3 entries of 256 used
>  >>
>  >> [    0.000000] max_pfn_mapped = 1228800
>  >>  [    0.000000] init_memory_mapping
>  >>
>  >>  ...
>  >>
>  >>  [   20.984343] [drm] Initialized i915 1.6.0 20060119 on minor 0
>  >>  [   21.450368] mtrr: no more MTRRs available
>  >>
>  >>  ...
>  >>
>  >>  with mtrr_gran_size=128m I get :
>  >>
>  >>  ...
>  >>
>  >>  [    0.000000] Linux version 2.6.25-x86-latest.git-06598-g6a2c2ff-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #1 SMP PREEMPT Wed Apr 30 05:51:39 CEST 2008
>  >>  [    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_gran_size=128m
>  >>
>  >>
>  >> [    0.000000] BIOS-provided physical RAM map:
>  >>  [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
>  >>  [    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
>  >>  [    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  [    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  [    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
>  >>  [    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
>  >>  [    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
>  >>  [    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
>  >>  [    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
>  >>  [    0.000000] max_pfn_mapped = 1228800
>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>  >>  [    0.000000] After WB checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
>  >>  [    0.000000] After UC checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  [    0.000000] After sorting
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
>  >>  [    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
>  >>  [    0.000000] range0: 0000000000000000 - 00000000c0000000
>  >>  [    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
>  >>  [    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
>  >>  [    0.000000] range: 00000000c0000000 - 00000000c8000000
>  >>  [    0.000000] Setting variable MTRR 2, base: 3072MB, range: 128MB, type WB
>  >>  [    0.000000] rangeX: 00000000d0000000 - 00000000d0000000
>  >>  [    0.000000] range0: 0000000100000000 - 0000000120000000
>  >>  [    0.000000] Setting variable MTRR 3, base: 4096MB, range: 512MB, type WB
>  >>  [    0.000000] range: 0000000120000000 - 0000000128000000
>  >>  [    0.000000] Setting variable MTRR 4, base: 4608MB, range: 128MB, type WB
>  >>  [    0.000000] DONE variable MTRRs
>  >>  [    0.000000] x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
>  >>  [    0.000000] After WB checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  >>  [    0.000000] After UC checking
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  >>  [    0.000000] After sorting
>  >>  [    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000c8000
>  >>  [    0.000000] MTRR MAP PFN: 0000000000100000 - 0000000000128000
>  >>  [    0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 181MB of RAM.
>  >>
>  >> [    0.000000] update e820 for mtrr
>  >>
>  >> [    0.000000] modified physical RAM map:
>  >>  [    0.000000]  modified: 0000000000000000 - 000000000009cc00 (usable)
>  >>  [    0.000000]  modified: 000000000009cc00 - 00000000000a0000 (reserved)
>  >>  [    0.000000]  modified: 00000000000e4000 - 0000000000100000 (reserved)
>  >>  [    0.000000]  modified: 0000000000100000 - 00000000c8000000 (usable)
>  >>  [    0.000000]  modified: 00000000c8000000 - 00000000cf550000 (reserved)
>  >>  [    0.000000]  modified: 00000000cf550000 - 00000000cf55e000 (ACPI data)
>  >>  [    0.000000]  modified: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
>  >>  [    0.000000]  modified: 00000000cf5e0000 - 00000000cf600000 (reserved)
>  >>  [    0.000000]  modified: 00000000fee00000 - 00000000fee01000 (reserved)
>  >>
>  >> [    0.000000]  modified: 00000000ffc00000 - 0000000100000000 (reserved)
>  >>  [    0.000000]  modified: 0000000100000000 - 0000000128000000 (usable)
>  >>  [    0.000000]  modified: 0000000128000000 - 000000012c000000 (reserved)
>  >>
>  >> [    0.000000] Entering add_active_range(0, 0, 156) 3 entries of 256 used
>  >>  [    0.000000] Entering add_active_range(0, 256, 819200) 3 entries of 256 used
>  >>  [    0.000000] Entering add_active_range(0, 1048576, 1212416) 3 entries of 256 used
>  >>
>  >> [    0.000000] max_pfn_mapped = 1228800
>  >>  [    0.000000] init_memory_mapping
>  >>
>  >>  ...
>  >>
>  >>  I will try tomorrow some more boot options but now I need some sleep ;)
>  >
>  > thanks.  let's try different mtrr_chunk_size/mtrr_gran_size to get
>  > back more ram.
>  > under mtrr_gran_size=128m, does the your X server work well..., fast or slow?
>
>  Yes X is fine and fast , it is even fine ( slower from my felling ) when I lose the 704MB.
>  In general with x86-latest.git tree things seems faster on that box , maybe there are some other bug fixes too.
>
>  I've tested some mtrr_chunk_size/mtrr_gran_size combos, dmesg's are uploaded there :
>
>  http://frugalware.org/~crazy/dmesg/mtrr/
>
>  Also setting lower values on mtrr_gran_size seems to give more RAM back ,
>  mtrr_chunk_size 256/512 eats 704 MB and 128 doesn't seems to do something ?
>
>  Other things I noticed ( probably you could add a note about in kernel-parameter.txt or some doc file ):
>
>  Setting mtrr_gran_size to high , on my box >=512m hangs the box on boot ,
>  setting it to low , on my box <=8m , will cause X to die with such a message :
>
>  xf86MapVidMem: Could not mmap framebuffer (0xd0000000,0x10000000) (Invalid argument)
>
>  If you want I can test such values for mtrr_chunk_size too , just let me know.
>
>  To be honest I'm even fine when losing 700 - 800 MB as long X and everything else does work.
>  The other alternative for me for that problem without your patches will be to buy new ram ( 2 x 1G )
>  and then I lose near 2,3G compared to now or live with broken X until xorg-server will support and
>  *work fine* with PAT ( most probably not that soon ).

great.  I am still working on
1. auto detect optimal chunk_size, gran_size
2. get back all RAM, at least only lose 1 or 2 M

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect
  2008-04-30  3:25             ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v8 - fix Yinghai Lu
  2008-04-30 12:09               ` Ingo Molnar
@ 2008-05-01  8:00               ` Yinghai Lu
  2008-05-01 11:45                 ` Gabriel C
                                   ` (3 more replies)
  1 sibling, 4 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-05-01  8:00 UTC (permalink / raw)
  To: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, Mika Fischer
  Cc: linux-kernel@vger.kernel.org


loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.

so user don't need to add mtrr_chunk_size and mtrr_gran_size, 

if optimal value is not found, print out all list to help select less optimal
value.

add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -610,28 +610,6 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
-#ifdef CONFIG_MTRR_SANITIZER
-static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
-#else
-static int enable_mtrr_cleanup __initdata = -1;
-#endif
-
-static int __init disable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 0;
-	return 0;
-}
-early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
-
-static int __init enable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 1;
-	return 0;
-}
-early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
-
 /* should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM 256
 
@@ -702,13 +680,15 @@ subtract_range(struct res_range *range, 
 			continue;
 		}
 
-		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+		if (start <= range[j].start && end < range[j].end &&
+		    range[j].start < end + 1) {
 			range[j].start = end + 1;
 			continue;
 		}
 
 
-		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+		if (start > range[j].start && end >= range[j].end &&
+		    range[j].end > start - 1) {
 			range[j].end = start - 1;
 			continue;
 		}
@@ -743,18 +723,119 @@ static int __init cmp_range(const void *
 	return start1 - start2;
 }
 
+struct var_mtrr_range_state {
+	unsigned long base_pfn;
+	unsigned long size_pfn;
+	mtrr_type type;
+};
+
+struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+
+static int __init
+x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+		       unsigned long extra_remove_base,
+		       unsigned long extra_remove_size)
+{
+	unsigned long i, base, size;
+	mtrr_type type;
+
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		base = range_state[i].base_pfn;
+		size = range_state[i].size_pfn;
+		nr_range = add_range_with_merge(range, nr_range, base,
+						base + size - 1);
+	}
+	printk(KERN_DEBUG "After WB checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+				 range[i].start, range[i].end + 1);
+
+	/* take out UC ranges */
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
+		if (type != MTRR_TYPE_UNCACHABLE)
+			continue;
+		size = range_state[i].size_pfn;
+		if (!size)
+			continue;
+		base = range_state[i].base_pfn;
+		subtract_range(range, base, base + size - 1);
+	}
+	if (extra_remove_size)
+		subtract_range(range, extra_remove_base,
+				 extra_remove_base + extra_remove_size  - 1);
+
+	/* get new range num */
+	nr_range = 0;
+	for (i = 0; i < RANGE_NUM; i++) {
+		if (!range[i].end)
+			continue;
+		nr_range++;
+	}
+	printk(KERN_DEBUG "After UC checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+			 range[i].start, range[i].end + 1);
+
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	printk(KERN_DEBUG "After sorting\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+				 range[i].start, range[i].end + 1);
+
+	return nr_range;
+}
+
+static struct res_range __initdata range[RANGE_NUM];
+
+#ifdef CONFIG_MTRR_SANITIZER
+
+static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
+{
+	unsigned long sum;
+	int i;
+
+	sum = 0;
+	for (i = 0; i < nr_range; i++)
+		sum += range[i].end + 1 - range[i].start;
+
+	return sum;
+}
+
+static int enable_mtrr_cleanup __initdata =
+				 CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 0;
+	return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 1;
+	return 0;
+}
+early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
+
 struct var_mtrr_state {
 	unsigned long	range_startk;
 	unsigned long	range_sizek;
 	unsigned long	chunk_sizek;
 	unsigned long	gran_sizek;
 	unsigned int	reg;
-	unsigned int	address_bits;
 };
 
 static void __init
 set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
-		unsigned char type, unsigned address_bits)
+		unsigned char type, unsigned int address_bits)
 {
 	u32 base_lo, base_hi, mask_lo, mask_hi;
 	u64 base, mask;
@@ -781,10 +862,34 @@ set_var_mtrr(unsigned int reg, unsigned 
 	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
 }
 
+static void __init
+save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+		unsigned char type)
+{
+	range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
+	range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
+	range_state[reg].type = type;
+}
+
+static void __init
+set_var_mtrr_all(unsigned int address_bits)
+{
+	unsigned long basek, sizek;
+	unsigned char type;
+	unsigned int reg;
+
+	for (reg = 0; reg < num_var_ranges; reg++) {
+		basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
+		sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
+		type = range_state[reg].type;
+
+		set_var_mtrr(reg, basek, sizek, type, address_bits);
+	}
+}
+
 static unsigned int __init
 range_to_mtrr(unsigned int reg, unsigned long range_startk,
-	      unsigned long range_sizek, unsigned char type,
-	      unsigned address_bits)
+	      unsigned long range_sizek, unsigned char type)
 {
 	if (!range_sizek || (reg >= num_var_ranges))
 		return reg;
@@ -803,12 +908,12 @@ range_to_mtrr(unsigned int reg, unsigned
 			align = max_align;
 
 		sizek = 1 << align;
-		printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
+		printk(KERN_DEBUG "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
 			reg, range_startk >> 10, sizek >> 10,
 			(type == MTRR_TYPE_UNCACHABLE)?"UC":
 			    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
 			);
-		set_var_mtrr(reg++, range_startk, sizek, type, address_bits);
+		save_var_mtrr(reg++, range_startk, sizek, type);
 		range_startk += sizek;
 		range_sizek -= sizek;
 		if (reg >= num_var_ranges)
@@ -817,10 +922,12 @@ range_to_mtrr(unsigned int reg, unsigned
 	return reg;
 }
 
-static void __init
-range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
+static unsigned __init
+range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
+			unsigned long sizek)
 {
 	unsigned long hole_basek, hole_sizek;
+	unsigned long second_basek, second_sizek;
 	unsigned long range0_basek, range0_sizek;
 	unsigned long range_basek, range_sizek;
 	unsigned long chunk_sizek;
@@ -828,64 +935,86 @@ range_to_mtrr_with_hole(struct var_mtrr_
 
 	hole_basek = 0;
 	hole_sizek = 0;
+	second_basek = 0;
+	second_sizek = 0;
 	chunk_sizek = state->chunk_sizek;
 	gran_sizek = state->gran_sizek;
 
 	/* align with gran size, prevent small block used up MTRRs */
 	range_basek = ALIGN(state->range_startk, gran_sizek);
 	if ((range_basek > basek) && basek)
-		return;
-	range_sizek = ALIGN(state->range_sizek - (range_basek - state->range_startk), gran_sizek);
+		return second_sizek;
+	state->range_sizek -= (range_basek - state->range_startk);
+	range_sizek = ALIGN(state->range_sizek, gran_sizek);
 
-	while (range_basek + range_sizek > (state->range_startk + state->range_sizek)) {
+	while (range_sizek > state->range_sizek) {
 		range_sizek -= gran_sizek;
 		if (!range_sizek)
-			return;
+			return second_sizek;
 	}
-	state->range_startk = range_basek;
 	state->range_sizek = range_sizek;
 
 	/* try to append some small hole */
 	range0_basek = state->range_startk;
 	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
 	if (range0_sizek == state->range_sizek) {
-			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10);
-			state->reg = range_to_mtrr(state->reg, range0_basek,
-				state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
-		return;
-	} else if (basek) {
-	    while (range0_basek + range0_sizek - chunk_sizek > basek) {
+		printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", range0_basek<<10,
+				(range0_basek + state->range_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, range0_basek,
+				state->range_sizek, MTRR_TYPE_WRBACK);
+		return second_sizek;
+	}
+
+	range0_sizek -= chunk_sizek;
+	if (range0_sizek && sizek) {
+	    while (range0_basek + range0_sizek > (basek + sizek)) {
 		range0_sizek -= chunk_sizek;
 		if (!range0_sizek)
 			break;
 	    }
 	}
 
+	if (range0_sizek) {
+		printk(KERN_DEBUG "range0: %016lx - %016lx\n", range0_basek<<10,
+				(range0_basek + range0_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, range0_basek,
+				range0_sizek, MTRR_TYPE_WRBACK);
 
-	if (range0_sizek > chunk_sizek)
-		range0_sizek -= chunk_sizek;
-	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
-	state->reg = range_to_mtrr(state->reg, range0_basek,
-			range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+	}
 
 	range_basek = range0_basek + range0_sizek;
 	range_sizek = chunk_sizek;
 
-	if ((range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) &&
-	    (range_basek + range_sizek <= basek)) {
-		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
+	if (range_basek + range_sizek > basek &&
+	    range_basek + range_sizek <= (basek + sizek)) {
+		second_basek = basek;
+		second_sizek = range_basek + range_sizek - basek;
+	}
+
+	if (range_sizek - (state->range_sizek - range0_sizek) - second_sizek
+	     < (chunk_sizek >> 1)) {
+		hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
+				 - second_sizek;
 		hole_basek = range_basek + range_sizek - hole_sizek;
-	} else
+	} else {
 		range_sizek = state->range_sizek - range0_sizek;
+		second_basek = 0;
+		second_sizek = 0;
+	}
 
-	printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10);
-	state->reg = range_to_mtrr(state->reg, range_basek,
-			range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+	printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
+			 (range_basek + range_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
+					 MTRR_TYPE_WRBACK);
 	if (hole_sizek) {
-		printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10);
-		state->reg = range_to_mtrr(state->reg, hole_basek,
-				hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits);
+		printk(KERN_DEBUG "hole: %016lx - %016lx\n", hole_basek<<10,
+				 (hole_basek + hole_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
+						 MTRR_TYPE_UNCACHABLE);
+
 	}
+
+	return second_sizek;
 }
 
 static void __init
@@ -893,6 +1022,7 @@ set_var_mtrr_range(struct var_mtrr_state
 		   unsigned long size_pfn)
 {
 	unsigned long basek, sizek;
+	unsigned long second_sizek = 0;
 
 	if (state->reg >= num_var_ranges)
 		return;
@@ -901,21 +1031,19 @@ set_var_mtrr_range(struct var_mtrr_state
 	sizek = size_pfn << (PAGE_SHIFT - 10);
 
 	/* See if I can merge with the last range */
-	if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) {
+	if ((basek <= 1024) ||
+	    (state->range_startk + state->range_sizek == basek)) {
 		unsigned long endk = basek + sizek;
 		state->range_sizek = endk - state->range_startk;
 		return;
 	}
 	/* Write the range mtrrs */
-	if (state->range_sizek != 0) {
-		range_to_mtrr_with_hole(state, basek);
+	if (state->range_sizek != 0)
+		second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
 
-		state->range_startk = 0;
-		state->range_sizek = 0;
-	}
 	/* Allocate an msr */
-	state->range_startk = basek;
-	state->range_sizek  = sizek;
+	state->range_startk = basek + second_sizek;
+	state->range_sizek  = sizek - second_sizek;
 }
 
 /* mininum size of mtrr block that can take hole */
@@ -931,7 +1059,7 @@ static int __init parse_mtrr_chunk_size_
 early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
 
 /* granity of mtrr of block */
-static u64 mtrr_gran_size __initdata = (1ULL<<20);
+static u64 mtrr_gran_size __initdata;
 
 static int __init parse_mtrr_gran_size_opt(char *p)
 {
@@ -942,91 +1070,78 @@ static int __init parse_mtrr_gran_size_o
 }
 early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
 
-static void __init
+static int nr_mtrr_spare_reg __initdata =
+				 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
+
+static int __init parse_mtrr_spare_reg(char *arg)
+{
+	if (arg)
+		nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
+	return 0;
+}
+
+early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
+
+static int __init
 x86_setup_var_mtrrs(struct res_range *range, int nr_range,
-		    unsigned address_bits)
+		    u64 chunk_size, u64 gran_size)
 {
 	struct var_mtrr_state var_state;
 	int i;
+	int num_reg;
 
 	var_state.range_startk	= 0;
 	var_state.range_sizek	= 0;
 	var_state.reg		= 0;
-	var_state.address_bits	= address_bits;
-	var_state.chunk_sizek	= mtrr_chunk_size >> 10;
-	var_state.gran_sizek	= mtrr_gran_size >> 10;
+	var_state.chunk_sizek	= chunk_size >> 10;
+	var_state.gran_sizek	= gran_size >> 10;
+
+	memset(range_state, 0, sizeof(range_state));
 
 	/* Write the range etc */
 	for (i = 0; i < nr_range; i++)
-		set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1);
+		set_var_mtrr_range(&var_state, range[i].start,
+				   range[i].end - range[i].start + 1);
 
 	/* Write the last range */
-	range_to_mtrr_with_hole(&var_state, 0);
-	printk(KERN_INFO "DONE variable MTRRs\n");
+	if (var_state.range_sizek != 0)
+		range_to_mtrr_with_hole(&var_state, 0, 0);
+	printk(KERN_DEBUG "DONE variable MTRRs\n");
+
+	num_reg = var_state.reg;
 	/* Clear out the extra MTRR's */
 	while (var_state.reg < num_var_ranges) {
-		set_var_mtrr(var_state.reg, 0, 0, 0, var_state.address_bits);
+		save_var_mtrr(var_state.reg, 0, 0, 0);
 		var_state.reg++;
 	}
-}
-
-static int __init
-x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
-		       unsigned long extra_remove_base,
-		       unsigned long extra_remove_size)
-{
-	unsigned long i, base, size;
-	mtrr_type type;
-
-	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
-		if (type != MTRR_TYPE_WRBACK)
-			continue;
-		nr_range = add_range_with_merge(range, nr_range, base, base + size - 1);
-	}
-	printk(KERN_INFO "After WB checking\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
 
-	/* take out UC ranges */
-	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
-		if (type != MTRR_TYPE_UNCACHABLE)
-			continue;
-		if (!size)
-			continue;
-		subtract_range(range, base, base + size - 1);
-	}
-	if (extra_remove_size)
-		subtract_range(range, extra_remove_base,  extra_remove_base + extra_remove_size  - 1);
+	return num_reg;
+}
 
-	/* get new range num */
-	nr_range = 0;
-	for (i = 0; i < RANGE_NUM; i++) {
-		if (!range[i].end)
-			continue;
-		nr_range++;
-	}
-	printk(KERN_INFO "After UC checking\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+struct mtrr_cleanup_result {
+	unsigned long gran_sizek;
+	unsigned long chunk_sizek;
+	unsigned long lose_cover_sizek;
+	unsigned int num_reg;
+	int bad;
+};
 
-	/* sort the ranges */
-	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
-	printk(KERN_INFO "After sorting\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+/* 2G, 1G, ..., 1M, so need 12 items and half maxtrix need 13*6 */
+#define NUM_RESULT 78
 
-	return nr_range;
-}
+static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
+static struct res_range __initdata range_new[RANGE_NUM];
 
 static int __init mtrr_cleanup(unsigned address_bits)
 {
 	unsigned long extra_remove_base, extra_remove_size;
 	unsigned long i, base, size, def, dummy;
-	struct res_range range[RANGE_NUM];
 	mtrr_type type;
-	int nr_range;
+	int nr_range, nr_range_new;
+	u64 chunk_size, gran_size;
+	unsigned long range_sums, range_sums_new;
+	long min_loss_pfn;
+	int index;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1038,10 +1153,20 @@ static int __init mtrr_cleanup(unsigned 
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
 
+	/* get it and store it aside */
+	memset(range_state, 0, sizeof(range_state));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		range_state[i].base_pfn = base;
+		range_state[i].size_pfn = size;
+		range_state[i].type = type;
+	}
+
 	/* check entries number */
 	memset(num, 0, sizeof(num));
 	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
+		type = range_state[i].type;
+		size = range_state[i].size_pfn;
 		if (type >= MTRR_NUM_TYPES)
 			continue;
 		if (!size)
@@ -1064,13 +1189,141 @@ static int __init mtrr_cleanup(unsigned 
 		extra_remove_base = 1 << (32 - PAGE_SHIFT);
 		extra_remove_size = (mtrr_tom2>>PAGE_SHIFT) - extra_remove_base;
 	}
-	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size);
+	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
+					  extra_remove_size);
+	range_sums = sum_ranges(range, nr_range);
+	printk(KERN_INFO "total RAM coverred: %ldM\n",
+			 range_sums >> (20 - PAGE_SHIFT));
+
+	if (mtrr_chunk_size && mtrr_gran_size) {
+		int num_reg;
+
+		/* convert ranges to var ranges state */
+		num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
+						 mtrr_gran_size);
+
+		/* we got new setting in range_state, check it */
+		memset(range_new, 0, sizeof(range_new));
+		nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+					 extra_remove_base, extra_remove_size);
+		range_sums_new = sum_ranges(range_new, nr_range_new);
+
+		i = 0;
+		result[i].chunk_sizek = mtrr_chunk_size >> 10;
+		result[i].gran_sizek = mtrr_gran_size >> 10;
+		result[i].num_reg = num_reg;
+		if (range_sums < range_sums_new) {
+			result[i].lose_cover_sizek = (range_sums_new - range_sums) << (PAGE_SHIFT - 10);
+			result[i].bad = 1;
+		} else
+			result[i].lose_cover_sizek = (range_sums - range_sums_new) << (PAGE_SHIFT - 10);
+
+		printk(KERN_INFO " %sgran_size: %ldM  \tchunk_size: %ldM  \t",
+			 result[i].bad?" BAD ":"", result[i].gran_sizek >> 10,
+			 result[i].chunk_sizek >> 10);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
+			 result[i].num_reg, result[i].bad?"-":"",
+			 result[i].lose_cover_sizek >> 10);
+		if (result[i].bad) {
+			printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, mtrr not changed\n");
+			return 0;
+		}
 
-	/* convert ranges to var ranges state */
-	x86_setup_var_mtrrs(range, nr_range, address_bits);
+		set_var_mtrr_all(address_bits);
+		return 1;
+	}
 
-	return 1;
+	i = 0;
+	min_loss_pfn = -1UL;
+	index = -1;
+	memset(result, 0, sizeof(result));
+	if (nr_mtrr_spare_reg >= num_var_ranges)
+		nr_mtrr_spare_reg = num_var_ranges - 1;
+	for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
+		for (chunk_size = gran_size; chunk_size < (1ULL<<32);
+		     chunk_size <<= 1) {
+			int num_reg;
+
+			printk(KERN_INFO "\ngran_size: %lldM   chunk_size_size: %lldM\n", gran_size >> 20, chunk_size >> 20);
+			if (i >= NUM_RESULT)
+				continue;
+
+			/* convert ranges to var ranges state */
+			num_reg = x86_setup_var_mtrrs(range, nr_range,
+							 chunk_size, gran_size);
+
+			/* we got new setting in range_state, check it */
+			memset(range_new, 0, sizeof(range_new));
+			nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+					 extra_remove_base, extra_remove_size);
+			range_sums_new = sum_ranges(range_new, nr_range_new);
+
+			result[i].chunk_sizek = chunk_size >> 10;
+			result[i].gran_sizek = gran_size >> 10;
+			result[i].num_reg = num_reg;
+			if (range_sums < range_sums_new) {
+				result[i].lose_cover_sizek = (range_sums_new - range_sums) << (PAGE_SHIFT - 10);
+				result[i].bad = 1;
+			} else
+				result[i].lose_cover_sizek = (range_sums - range_sums_new) << (PAGE_SHIFT - 10);
+
+			/* need to leave 2 entries for pci device for WB ?*/
+			if ((num_reg < num_var_ranges + 1 - nr_mtrr_spare_reg) && !result[i].bad) {
+				if (range_sums - range_sums_new < min_loss_pfn) {
+					min_loss_pfn = range_sums - range_sums_new;
+					index = i;
+					if (!min_loss_pfn)
+						break;
+				}
+			}
+			i++;
+		}
+		if (!min_loss_pfn)
+			break;
+	}
+
+	if (index != -1 && !min_loss_pfn) {
+		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
+		i = index;
+		printk(KERN_INFO "gran_size: %ldM  \tchunk_size: %ldM  \t",
+				result[i].gran_sizek >> 10,
+				result[i].chunk_sizek >> 10);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %ldM \n",
+				result[i].num_reg,
+				result[i].lose_cover_sizek >> 10);
+		/* convert ranges to var ranges state */
+		chunk_size = result[index].chunk_sizek;
+		chunk_size <<= 10;
+		gran_size = result[index].gran_sizek;
+		gran_size <<= 10;
+		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
+		set_var_mtrr_all(address_bits);
+
+		return 1;
+	}
+
+	for (i = 0; i < NUM_RESULT; i++) {
+		printk(KERN_INFO "%sgran_size: %ldM  \tchunk_size: %ldM  \t",
+			 result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
+			 result[i].chunk_sizek >> 10);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
+			 result[i].num_reg, result[i].bad?"-":"",
+			 result[i].lose_cover_sizek >> 10);
+	}
+
+	printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
+	printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
+
+	return 0;
 }
+#else
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+	return 0;
+}
+#endif
+
+static int __initdata changed_by_mtrr_cleanup;
 
 static int disable_mtrr_trim;
 
@@ -1111,7 +1364,8 @@ int __init amd_special_default_mtrr(void
 	return 0;
 }
 
-static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
+static u64 __init real_trim_memory(unsigned long start_pfn,
+				   unsigned long limit_pfn)
 {
 	u64 trim_start, trim_size;
 	trim_start = start_pfn;
@@ -1138,9 +1392,8 @@ int __init mtrr_trim_uncached_memory(uns
 {
 	unsigned long i, base, size, highest_pfn = 0, def, dummy;
 	mtrr_type type;
-	struct res_range range[RANGE_NUM];
 	int nr_range;
-	u64 total_real_trim_size;
+	u64 total_trim_size;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1155,11 +1408,22 @@ int __init mtrr_trim_uncached_memory(uns
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
 
-	/* Find highest cached pfn */
+	/* get it and store it aside */
+	memset(range_state, 0, sizeof(range_state));
 	for (i = 0; i < num_var_ranges; i++) {
 		mtrr_if->get(i, &base, &size, &type);
+		range_state[i].base_pfn = base;
+		range_state[i].size_pfn = size;
+		range_state[i].type = type;
+	}
+
+	/* Find highest cached pfn */
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
 		if (type != MTRR_TYPE_WRBACK)
 			continue;
+		base = range_state[i].base_pfn;
+		size = range_state[i].size_pfn;
 		if (highest_pfn < base + size)
 			highest_pfn = base + size;
 	}
@@ -1177,9 +1441,10 @@ int __init mtrr_trim_uncached_memory(uns
 	/* check entries number */
 	memset(num, 0, sizeof(num));
 	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
+		type = range_state[i].type;
 		if (type >= MTRR_NUM_TYPES)
 			continue;
+		size = range_state[i].size_pfn;
 		if (!size)
 			type = MTRR_NUM_TYPES;
 		num[type]++;
@@ -1205,26 +1470,28 @@ int __init mtrr_trim_uncached_memory(uns
 	}
 	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
 
-	total_real_trim_size = 0;
+	total_trim_size = 0;
 	/* check the head */
 	if (range[0].start)
-		total_real_trim_size += real_trim_memory(0, range[0].start);
+		total_trim_size += real_trim_memory(0, range[0].start);
 	/* check the holes */
 	for (i = 0; i < nr_range - 1; i++) {
 		if (range[i].end + 1 < range[i+1].start)
-			total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start);
+			total_trim_size += real_trim_memory(range[i].end + 1,
+							    range[i+1].start);
 	}
 	/* check the top */
 	i = nr_range - 1;
 	if (range[i].end + 1 < end_pfn)
-		total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn);
+		total_trim_size += real_trim_memory(range[i].end + 1,
+							 end_pfn);
 
-	if (total_real_trim_size) {
+	if (total_trim_size) {
 		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
 			" all of memory, losing %lluMB of RAM.\n",
-			total_real_trim_size >> 20);
+			total_trim_size >> 20);
 
-		if (enable_mtrr_cleanup < 1)
+		if (!changed_by_mtrr_cleanup)
 			WARN_ON(1);
 
 		printk(KERN_INFO "update e820 for mtrr\n");
@@ -1314,8 +1581,10 @@ void __init mtrr_bp_init(void)
 		if (use_intel()) {
 			get_mtrr_state();
 
-			if (mtrr_cleanup(phys_addr))
+			if (mtrr_cleanup(phys_addr)) {
+				changed_by_mtrr_cleanup = 1;
 				mtrr_if->set_all();
+			}
 
 		}
 	}
@@ -1355,7 +1624,7 @@ static int __init mtrr_init_finialize(vo
 	if (!mtrr_if)
 		return 0;
 	if (use_intel()) {
-		if (enable_mtrr_cleanup < 1)
+		if (!changed_by_mtrr_cleanup)
 			mtrr_state_warn();
 	} else {
 		/* The CPUs haven't MTRR and seem to not support SMP. They have
Index: linux-2.6/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.orig/Documentation/kernel-parameters.txt
+++ linux-2.6/Documentation/kernel-parameters.txt
@@ -613,9 +613,17 @@ and is between 256 and 4096 characters. 
 			that could hold holes aka. UC entries.
 
 	mtrr_gran_size=nn[KMG] [X86]
-			used for mtrr cleanup. It is granity of mtrr block.
+			used for mtrr cleanup. It is granularity of mtrr block.
+			default is 1.
 			Big value could prevent small alignment use up MTRRs.
 
+	mtrr_spare_reg_nr=n [X86]
+			Format: <integer>
+			range: 0,7 : spare reg number
+			default : 1
+			used for mtrr cleanup. It is spare mtrr entries number.
+			set to 2 or more if your graphical card need more.
+
 	disable_mtrr_trim [X86, Intel and AMD only]
 			By default the kernel will trim any uncacheable
 			memory out of your available memory pool based on
Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig
+++ linux-2.6/arch/x86/Kconfig
@@ -1099,6 +1099,15 @@ config MTRR_SANITIZER_ENABLE_DEFAULT
 	help
 	  Enable mtrr cleanup default value
 
+config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
+	int "MTRR cleanup spare reg num (0-7)"
+	range 0 7
+	default "1"
+	depends on MTRR_SANITIZER
+	help
+	  mtrr cleanup spare entries default, it can be changed via
+	  mtrr_spare_reg_nr=
+
 config X86_PAT
 	bool
 	prompt "x86 PAT support"

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect
  2008-05-01  8:00               ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect Yinghai Lu
@ 2008-05-01 11:45                 ` Gabriel C
  2008-05-02  0:06                   ` Yinghai Lu
  2008-05-01 12:09                 ` Mika Fischer
                                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 89+ messages in thread
From: Gabriel C @ 2008-05-01 11:45 UTC (permalink / raw)
  To: yhlu.kernel
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

Yinghai Lu wrote:
> loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.
> 
> so user don't need to add mtrr_chunk_size and mtrr_gran_size, 
> 
> if optimal value is not found, print out all list to help select less optimal
> value.
> 
> add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.

WOW :)

With this patch all is working fine , no RAM is lost , X is fast , 
so far everything else seems to work fine. \o/

I will test on 32bit tomorrow and stress the box later on today to be sure everything works fine.

There is my dmesg , meminfo , mtrr output with this patch on top x86-latest :

http://frugalware.org/~crazy/mtrr_x86-latest/

Yinghai thx for your work on that :)
 
> 
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Tested-by: Gabriel Craciunescu <nix.or.die@googlemail.com>

> 
> Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
> +++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
> @@ -610,28 +610,6 @@ static struct sysdev_driver mtrr_sysdev_
>  	.resume		= mtrr_restore,
>  };
>  
> -#ifdef CONFIG_MTRR_SANITIZER
> -static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
> -#else
> -static int enable_mtrr_cleanup __initdata = -1;
> -#endif
> -
> -static int __init disable_mtrr_cleanup_setup(char *str)
> -{
> -	if (enable_mtrr_cleanup != -1)
> -		enable_mtrr_cleanup = 0;
> -	return 0;
> -}
> -early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
> -
> -static int __init enable_mtrr_cleanup_setup(char *str)
> -{
> -	if (enable_mtrr_cleanup != -1)
> -		enable_mtrr_cleanup = 1;
> -	return 0;
> -}
> -early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
> -
>  /* should be related to MTRR_VAR_RANGES nums */
>  #define RANGE_NUM 256
>  
> @@ -702,13 +680,15 @@ subtract_range(struct res_range *range, 
>  			continue;
>  		}
>  
> -		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
> +		if (start <= range[j].start && end < range[j].end &&
> +		    range[j].start < end + 1) {
>  			range[j].start = end + 1;
>  			continue;
>  		}
>  
>  
> -		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
> +		if (start > range[j].start && end >= range[j].end &&
> +		    range[j].end > start - 1) {
>  			range[j].end = start - 1;
>  			continue;
>  		}
> @@ -743,18 +723,119 @@ static int __init cmp_range(const void *
>  	return start1 - start2;
>  }
>  
> +struct var_mtrr_range_state {
> +	unsigned long base_pfn;
> +	unsigned long size_pfn;
> +	mtrr_type type;
> +};
> +
> +struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
> +
> +static int __init
> +x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
> +		       unsigned long extra_remove_base,
> +		       unsigned long extra_remove_size)
> +{
> +	unsigned long i, base, size;
> +	mtrr_type type;
> +
> +	for (i = 0; i < num_var_ranges; i++) {
> +		type = range_state[i].type;
> +		if (type != MTRR_TYPE_WRBACK)
> +			continue;
> +		base = range_state[i].base_pfn;
> +		size = range_state[i].size_pfn;
> +		nr_range = add_range_with_merge(range, nr_range, base,
> +						base + size - 1);
> +	}
> +	printk(KERN_DEBUG "After WB checking\n");
> +	for (i = 0; i < nr_range; i++)
> +		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
> +				 range[i].start, range[i].end + 1);
> +
> +	/* take out UC ranges */
> +	for (i = 0; i < num_var_ranges; i++) {
> +		type = range_state[i].type;
> +		if (type != MTRR_TYPE_UNCACHABLE)
> +			continue;
> +		size = range_state[i].size_pfn;
> +		if (!size)
> +			continue;
> +		base = range_state[i].base_pfn;
> +		subtract_range(range, base, base + size - 1);
> +	}
> +	if (extra_remove_size)
> +		subtract_range(range, extra_remove_base,
> +				 extra_remove_base + extra_remove_size  - 1);
> +
> +	/* get new range num */
> +	nr_range = 0;
> +	for (i = 0; i < RANGE_NUM; i++) {
> +		if (!range[i].end)
> +			continue;
> +		nr_range++;
> +	}
> +	printk(KERN_DEBUG "After UC checking\n");
> +	for (i = 0; i < nr_range; i++)
> +		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
> +			 range[i].start, range[i].end + 1);
> +
> +	/* sort the ranges */
> +	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
> +	printk(KERN_DEBUG "After sorting\n");
> +	for (i = 0; i < nr_range; i++)
> +		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
> +				 range[i].start, range[i].end + 1);
> +
> +	return nr_range;
> +}
> +
> +static struct res_range __initdata range[RANGE_NUM];
> +
> +#ifdef CONFIG_MTRR_SANITIZER
> +
> +static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
> +{
> +	unsigned long sum;
> +	int i;
> +
> +	sum = 0;
> +	for (i = 0; i < nr_range; i++)
> +		sum += range[i].end + 1 - range[i].start;
> +
> +	return sum;
> +}
> +
> +static int enable_mtrr_cleanup __initdata =
> +				 CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
> +
> +static int __init disable_mtrr_cleanup_setup(char *str)
> +{
> +	if (enable_mtrr_cleanup != -1)
> +		enable_mtrr_cleanup = 0;
> +	return 0;
> +}
> +early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
> +
> +static int __init enable_mtrr_cleanup_setup(char *str)
> +{
> +	if (enable_mtrr_cleanup != -1)
> +		enable_mtrr_cleanup = 1;
> +	return 0;
> +}
> +early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
> +
>  struct var_mtrr_state {
>  	unsigned long	range_startk;
>  	unsigned long	range_sizek;
>  	unsigned long	chunk_sizek;
>  	unsigned long	gran_sizek;
>  	unsigned int	reg;
> -	unsigned int	address_bits;
>  };
>  
>  static void __init
>  set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
> -		unsigned char type, unsigned address_bits)
> +		unsigned char type, unsigned int address_bits)
>  {
>  	u32 base_lo, base_hi, mask_lo, mask_hi;
>  	u64 base, mask;
> @@ -781,10 +862,34 @@ set_var_mtrr(unsigned int reg, unsigned 
>  	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
>  }
>  
> +static void __init
> +save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
> +		unsigned char type)
> +{
> +	range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
> +	range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
> +	range_state[reg].type = type;
> +}
> +
> +static void __init
> +set_var_mtrr_all(unsigned int address_bits)
> +{
> +	unsigned long basek, sizek;
> +	unsigned char type;
> +	unsigned int reg;
> +
> +	for (reg = 0; reg < num_var_ranges; reg++) {
> +		basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
> +		sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
> +		type = range_state[reg].type;
> +
> +		set_var_mtrr(reg, basek, sizek, type, address_bits);
> +	}
> +}
> +
>  static unsigned int __init
>  range_to_mtrr(unsigned int reg, unsigned long range_startk,
> -	      unsigned long range_sizek, unsigned char type,
> -	      unsigned address_bits)
> +	      unsigned long range_sizek, unsigned char type)
>  {
>  	if (!range_sizek || (reg >= num_var_ranges))
>  		return reg;
> @@ -803,12 +908,12 @@ range_to_mtrr(unsigned int reg, unsigned
>  			align = max_align;
>  
>  		sizek = 1 << align;
> -		printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
> +		printk(KERN_DEBUG "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
>  			reg, range_startk >> 10, sizek >> 10,
>  			(type == MTRR_TYPE_UNCACHABLE)?"UC":
>  			    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
>  			);
> -		set_var_mtrr(reg++, range_startk, sizek, type, address_bits);
> +		save_var_mtrr(reg++, range_startk, sizek, type);
>  		range_startk += sizek;
>  		range_sizek -= sizek;
>  		if (reg >= num_var_ranges)
> @@ -817,10 +922,12 @@ range_to_mtrr(unsigned int reg, unsigned
>  	return reg;
>  }
>  
> -static void __init
> -range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
> +static unsigned __init
> +range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
> +			unsigned long sizek)
>  {
>  	unsigned long hole_basek, hole_sizek;
> +	unsigned long second_basek, second_sizek;
>  	unsigned long range0_basek, range0_sizek;
>  	unsigned long range_basek, range_sizek;
>  	unsigned long chunk_sizek;
> @@ -828,64 +935,86 @@ range_to_mtrr_with_hole(struct var_mtrr_
>  
>  	hole_basek = 0;
>  	hole_sizek = 0;
> +	second_basek = 0;
> +	second_sizek = 0;
>  	chunk_sizek = state->chunk_sizek;
>  	gran_sizek = state->gran_sizek;
>  
>  	/* align with gran size, prevent small block used up MTRRs */
>  	range_basek = ALIGN(state->range_startk, gran_sizek);
>  	if ((range_basek > basek) && basek)
> -		return;
> -	range_sizek = ALIGN(state->range_sizek - (range_basek - state->range_startk), gran_sizek);
> +		return second_sizek;
> +	state->range_sizek -= (range_basek - state->range_startk);
> +	range_sizek = ALIGN(state->range_sizek, gran_sizek);
>  
> -	while (range_basek + range_sizek > (state->range_startk + state->range_sizek)) {
> +	while (range_sizek > state->range_sizek) {
>  		range_sizek -= gran_sizek;
>  		if (!range_sizek)
> -			return;
> +			return second_sizek;
>  	}
> -	state->range_startk = range_basek;
>  	state->range_sizek = range_sizek;
>  
>  	/* try to append some small hole */
>  	range0_basek = state->range_startk;
>  	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
>  	if (range0_sizek == state->range_sizek) {
> -			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10);
> -			state->reg = range_to_mtrr(state->reg, range0_basek,
> -				state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
> -		return;
> -	} else if (basek) {
> -	    while (range0_basek + range0_sizek - chunk_sizek > basek) {
> +		printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", range0_basek<<10,
> +				(range0_basek + state->range_sizek)<<10);
> +		state->reg = range_to_mtrr(state->reg, range0_basek,
> +				state->range_sizek, MTRR_TYPE_WRBACK);
> +		return second_sizek;
> +	}
> +
> +	range0_sizek -= chunk_sizek;
> +	if (range0_sizek && sizek) {
> +	    while (range0_basek + range0_sizek > (basek + sizek)) {
>  		range0_sizek -= chunk_sizek;
>  		if (!range0_sizek)
>  			break;
>  	    }
>  	}
>  
> +	if (range0_sizek) {
> +		printk(KERN_DEBUG "range0: %016lx - %016lx\n", range0_basek<<10,
> +				(range0_basek + range0_sizek)<<10);
> +		state->reg = range_to_mtrr(state->reg, range0_basek,
> +				range0_sizek, MTRR_TYPE_WRBACK);
>  
> -	if (range0_sizek > chunk_sizek)
> -		range0_sizek -= chunk_sizek;
> -	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
> -	state->reg = range_to_mtrr(state->reg, range0_basek,
> -			range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
> +	}
>  
>  	range_basek = range0_basek + range0_sizek;
>  	range_sizek = chunk_sizek;
>  
> -	if ((range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) &&
> -	    (range_basek + range_sizek <= basek)) {
> -		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
> +	if (range_basek + range_sizek > basek &&
> +	    range_basek + range_sizek <= (basek + sizek)) {
> +		second_basek = basek;
> +		second_sizek = range_basek + range_sizek - basek;
> +	}
> +
> +	if (range_sizek - (state->range_sizek - range0_sizek) - second_sizek
> +	     < (chunk_sizek >> 1)) {
> +		hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
> +				 - second_sizek;
>  		hole_basek = range_basek + range_sizek - hole_sizek;
> -	} else
> +	} else {
>  		range_sizek = state->range_sizek - range0_sizek;
> +		second_basek = 0;
> +		second_sizek = 0;
> +	}
>  
> -	printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10);
> -	state->reg = range_to_mtrr(state->reg, range_basek,
> -			range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
> +	printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
> +			 (range_basek + range_sizek)<<10);
> +	state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
> +					 MTRR_TYPE_WRBACK);
>  	if (hole_sizek) {
> -		printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10);
> -		state->reg = range_to_mtrr(state->reg, hole_basek,
> -				hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits);
> +		printk(KERN_DEBUG "hole: %016lx - %016lx\n", hole_basek<<10,
> +				 (hole_basek + hole_sizek)<<10);
> +		state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
> +						 MTRR_TYPE_UNCACHABLE);
> +
>  	}
> +
> +	return second_sizek;
>  }
>  
>  static void __init
> @@ -893,6 +1022,7 @@ set_var_mtrr_range(struct var_mtrr_state
>  		   unsigned long size_pfn)
>  {
>  	unsigned long basek, sizek;
> +	unsigned long second_sizek = 0;
>  
>  	if (state->reg >= num_var_ranges)
>  		return;
> @@ -901,21 +1031,19 @@ set_var_mtrr_range(struct var_mtrr_state
>  	sizek = size_pfn << (PAGE_SHIFT - 10);
>  
>  	/* See if I can merge with the last range */
> -	if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) {
> +	if ((basek <= 1024) ||
> +	    (state->range_startk + state->range_sizek == basek)) {
>  		unsigned long endk = basek + sizek;
>  		state->range_sizek = endk - state->range_startk;
>  		return;
>  	}
>  	/* Write the range mtrrs */
> -	if (state->range_sizek != 0) {
> -		range_to_mtrr_with_hole(state, basek);
> +	if (state->range_sizek != 0)
> +		second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
>  
> -		state->range_startk = 0;
> -		state->range_sizek = 0;
> -	}
>  	/* Allocate an msr */
> -	state->range_startk = basek;
> -	state->range_sizek  = sizek;
> +	state->range_startk = basek + second_sizek;
> +	state->range_sizek  = sizek - second_sizek;
>  }
>  
>  /* mininum size of mtrr block that can take hole */
> @@ -931,7 +1059,7 @@ static int __init parse_mtrr_chunk_size_
>  early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
>  
>  /* granity of mtrr of block */
> -static u64 mtrr_gran_size __initdata = (1ULL<<20);
> +static u64 mtrr_gran_size __initdata;
>  
>  static int __init parse_mtrr_gran_size_opt(char *p)
>  {
> @@ -942,91 +1070,78 @@ static int __init parse_mtrr_gran_size_o
>  }
>  early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
>  
> -static void __init
> +static int nr_mtrr_spare_reg __initdata =
> +				 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
> +
> +static int __init parse_mtrr_spare_reg(char *arg)
> +{
> +	if (arg)
> +		nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
> +	return 0;
> +}
> +
> +early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
> +
> +static int __init
>  x86_setup_var_mtrrs(struct res_range *range, int nr_range,
> -		    unsigned address_bits)
> +		    u64 chunk_size, u64 gran_size)
>  {
>  	struct var_mtrr_state var_state;
>  	int i;
> +	int num_reg;
>  
>  	var_state.range_startk	= 0;
>  	var_state.range_sizek	= 0;
>  	var_state.reg		= 0;
> -	var_state.address_bits	= address_bits;
> -	var_state.chunk_sizek	= mtrr_chunk_size >> 10;
> -	var_state.gran_sizek	= mtrr_gran_size >> 10;
> +	var_state.chunk_sizek	= chunk_size >> 10;
> +	var_state.gran_sizek	= gran_size >> 10;
> +
> +	memset(range_state, 0, sizeof(range_state));
>  
>  	/* Write the range etc */
>  	for (i = 0; i < nr_range; i++)
> -		set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1);
> +		set_var_mtrr_range(&var_state, range[i].start,
> +				   range[i].end - range[i].start + 1);
>  
>  	/* Write the last range */
> -	range_to_mtrr_with_hole(&var_state, 0);
> -	printk(KERN_INFO "DONE variable MTRRs\n");
> +	if (var_state.range_sizek != 0)
> +		range_to_mtrr_with_hole(&var_state, 0, 0);
> +	printk(KERN_DEBUG "DONE variable MTRRs\n");
> +
> +	num_reg = var_state.reg;
>  	/* Clear out the extra MTRR's */
>  	while (var_state.reg < num_var_ranges) {
> -		set_var_mtrr(var_state.reg, 0, 0, 0, var_state.address_bits);
> +		save_var_mtrr(var_state.reg, 0, 0, 0);
>  		var_state.reg++;
>  	}
> -}
> -
> -static int __init
> -x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
> -		       unsigned long extra_remove_base,
> -		       unsigned long extra_remove_size)
> -{
> -	unsigned long i, base, size;
> -	mtrr_type type;
> -
> -	for (i = 0; i < num_var_ranges; i++) {
> -		mtrr_if->get(i, &base, &size, &type);
> -		if (type != MTRR_TYPE_WRBACK)
> -			continue;
> -		nr_range = add_range_with_merge(range, nr_range, base, base + size - 1);
> -	}
> -	printk(KERN_INFO "After WB checking\n");
> -	for (i = 0; i < nr_range; i++)
> -		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
>  
> -	/* take out UC ranges */
> -	for (i = 0; i < num_var_ranges; i++) {
> -		mtrr_if->get(i, &base, &size, &type);
> -		if (type != MTRR_TYPE_UNCACHABLE)
> -			continue;
> -		if (!size)
> -			continue;
> -		subtract_range(range, base, base + size - 1);
> -	}
> -	if (extra_remove_size)
> -		subtract_range(range, extra_remove_base,  extra_remove_base + extra_remove_size  - 1);
> +	return num_reg;
> +}
>  
> -	/* get new range num */
> -	nr_range = 0;
> -	for (i = 0; i < RANGE_NUM; i++) {
> -		if (!range[i].end)
> -			continue;
> -		nr_range++;
> -	}
> -	printk(KERN_INFO "After UC checking\n");
> -	for (i = 0; i < nr_range; i++)
> -		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
> +struct mtrr_cleanup_result {
> +	unsigned long gran_sizek;
> +	unsigned long chunk_sizek;
> +	unsigned long lose_cover_sizek;
> +	unsigned int num_reg;
> +	int bad;
> +};
>  
> -	/* sort the ranges */
> -	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
> -	printk(KERN_INFO "After sorting\n");
> -	for (i = 0; i < nr_range; i++)
> -		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
> +/* 2G, 1G, ..., 1M, so need 12 items and half maxtrix need 13*6 */
> +#define NUM_RESULT 78
>  
> -	return nr_range;
> -}
> +static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
> +static struct res_range __initdata range_new[RANGE_NUM];
>  
>  static int __init mtrr_cleanup(unsigned address_bits)
>  {
>  	unsigned long extra_remove_base, extra_remove_size;
>  	unsigned long i, base, size, def, dummy;
> -	struct res_range range[RANGE_NUM];
>  	mtrr_type type;
> -	int nr_range;
> +	int nr_range, nr_range_new;
> +	u64 chunk_size, gran_size;
> +	unsigned long range_sums, range_sums_new;
> +	long min_loss_pfn;
> +	int index;
>  
>  	/* extra one for all 0 */
>  	int num[MTRR_NUM_TYPES + 1];
> @@ -1038,10 +1153,20 @@ static int __init mtrr_cleanup(unsigned 
>  	if (def != MTRR_TYPE_UNCACHABLE)
>  		return 0;
>  
> +	/* get it and store it aside */
> +	memset(range_state, 0, sizeof(range_state));
> +	for (i = 0; i < num_var_ranges; i++) {
> +		mtrr_if->get(i, &base, &size, &type);
> +		range_state[i].base_pfn = base;
> +		range_state[i].size_pfn = size;
> +		range_state[i].type = type;
> +	}
> +
>  	/* check entries number */
>  	memset(num, 0, sizeof(num));
>  	for (i = 0; i < num_var_ranges; i++) {
> -		mtrr_if->get(i, &base, &size, &type);
> +		type = range_state[i].type;
> +		size = range_state[i].size_pfn;
>  		if (type >= MTRR_NUM_TYPES)
>  			continue;
>  		if (!size)
> @@ -1064,13 +1189,141 @@ static int __init mtrr_cleanup(unsigned 
>  		extra_remove_base = 1 << (32 - PAGE_SHIFT);
>  		extra_remove_size = (mtrr_tom2>>PAGE_SHIFT) - extra_remove_base;
>  	}
> -	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size);
> +	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
> +					  extra_remove_size);
> +	range_sums = sum_ranges(range, nr_range);
> +	printk(KERN_INFO "total RAM coverred: %ldM\n",
> +			 range_sums >> (20 - PAGE_SHIFT));
> +
> +	if (mtrr_chunk_size && mtrr_gran_size) {
> +		int num_reg;
> +
> +		/* convert ranges to var ranges state */
> +		num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
> +						 mtrr_gran_size);
> +
> +		/* we got new setting in range_state, check it */
> +		memset(range_new, 0, sizeof(range_new));
> +		nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
> +					 extra_remove_base, extra_remove_size);
> +		range_sums_new = sum_ranges(range_new, nr_range_new);
> +
> +		i = 0;
> +		result[i].chunk_sizek = mtrr_chunk_size >> 10;
> +		result[i].gran_sizek = mtrr_gran_size >> 10;
> +		result[i].num_reg = num_reg;
> +		if (range_sums < range_sums_new) {
> +			result[i].lose_cover_sizek = (range_sums_new - range_sums) << (PAGE_SHIFT - 10);
> +			result[i].bad = 1;
> +		} else
> +			result[i].lose_cover_sizek = (range_sums - range_sums_new) << (PAGE_SHIFT - 10);
> +
> +		printk(KERN_INFO " %sgran_size: %ldM  \tchunk_size: %ldM  \t",
> +			 result[i].bad?" BAD ":"", result[i].gran_sizek >> 10,
> +			 result[i].chunk_sizek >> 10);
> +		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
> +			 result[i].num_reg, result[i].bad?"-":"",
> +			 result[i].lose_cover_sizek >> 10);
> +		if (result[i].bad) {
> +			printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, mtrr not changed\n");
> +			return 0;
> +		}
>  
> -	/* convert ranges to var ranges state */
> -	x86_setup_var_mtrrs(range, nr_range, address_bits);
> +		set_var_mtrr_all(address_bits);
> +		return 1;
> +	}
>  
> -	return 1;
> +	i = 0;
> +	min_loss_pfn = -1UL;
> +	index = -1;
> +	memset(result, 0, sizeof(result));
> +	if (nr_mtrr_spare_reg >= num_var_ranges)
> +		nr_mtrr_spare_reg = num_var_ranges - 1;
> +	for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
> +		for (chunk_size = gran_size; chunk_size < (1ULL<<32);
> +		     chunk_size <<= 1) {
> +			int num_reg;
> +
> +			printk(KERN_INFO "\ngran_size: %lldM   chunk_size_size: %lldM\n", gran_size >> 20, chunk_size >> 20);
> +			if (i >= NUM_RESULT)
> +				continue;
> +
> +			/* convert ranges to var ranges state */
> +			num_reg = x86_setup_var_mtrrs(range, nr_range,
> +							 chunk_size, gran_size);
> +
> +			/* we got new setting in range_state, check it */
> +			memset(range_new, 0, sizeof(range_new));
> +			nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
> +					 extra_remove_base, extra_remove_size);
> +			range_sums_new = sum_ranges(range_new, nr_range_new);
> +
> +			result[i].chunk_sizek = chunk_size >> 10;
> +			result[i].gran_sizek = gran_size >> 10;
> +			result[i].num_reg = num_reg;
> +			if (range_sums < range_sums_new) {
> +				result[i].lose_cover_sizek = (range_sums_new - range_sums) << (PAGE_SHIFT - 10);
> +				result[i].bad = 1;
> +			} else
> +				result[i].lose_cover_sizek = (range_sums - range_sums_new) << (PAGE_SHIFT - 10);
> +
> +			/* need to leave 2 entries for pci device for WB ?*/
> +			if ((num_reg < num_var_ranges + 1 - nr_mtrr_spare_reg) && !result[i].bad) {
> +				if (range_sums - range_sums_new < min_loss_pfn) {
> +					min_loss_pfn = range_sums - range_sums_new;
> +					index = i;
> +					if (!min_loss_pfn)
> +						break;
> +				}
> +			}
> +			i++;
> +		}
> +		if (!min_loss_pfn)
> +			break;
> +	}
> +
> +	if (index != -1 && !min_loss_pfn) {
> +		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
> +		i = index;
> +		printk(KERN_INFO "gran_size: %ldM  \tchunk_size: %ldM  \t",
> +				result[i].gran_sizek >> 10,
> +				result[i].chunk_sizek >> 10);
> +		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %ldM \n",
> +				result[i].num_reg,
> +				result[i].lose_cover_sizek >> 10);
> +		/* convert ranges to var ranges state */
> +		chunk_size = result[index].chunk_sizek;
> +		chunk_size <<= 10;
> +		gran_size = result[index].gran_sizek;
> +		gran_size <<= 10;
> +		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
> +		set_var_mtrr_all(address_bits);
> +
> +		return 1;
> +	}
> +
> +	for (i = 0; i < NUM_RESULT; i++) {
> +		printk(KERN_INFO "%sgran_size: %ldM  \tchunk_size: %ldM  \t",
> +			 result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
> +			 result[i].chunk_sizek >> 10);
> +		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
> +			 result[i].num_reg, result[i].bad?"-":"",
> +			 result[i].lose_cover_sizek >> 10);
> +	}
> +
> +	printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
> +	printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
> +
> +	return 0;
>  }
> +#else
> +static int __init mtrr_cleanup(unsigned address_bits)
> +{
> +	return 0;
> +}
> +#endif
> +
> +static int __initdata changed_by_mtrr_cleanup;
>  
>  static int disable_mtrr_trim;
>  
> @@ -1111,7 +1364,8 @@ int __init amd_special_default_mtrr(void
>  	return 0;
>  }
>  
> -static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
> +static u64 __init real_trim_memory(unsigned long start_pfn,
> +				   unsigned long limit_pfn)
>  {
>  	u64 trim_start, trim_size;
>  	trim_start = start_pfn;
> @@ -1138,9 +1392,8 @@ int __init mtrr_trim_uncached_memory(uns
>  {
>  	unsigned long i, base, size, highest_pfn = 0, def, dummy;
>  	mtrr_type type;
> -	struct res_range range[RANGE_NUM];
>  	int nr_range;
> -	u64 total_real_trim_size;
> +	u64 total_trim_size;
>  
>  	/* extra one for all 0 */
>  	int num[MTRR_NUM_TYPES + 1];
> @@ -1155,11 +1408,22 @@ int __init mtrr_trim_uncached_memory(uns
>  	if (def != MTRR_TYPE_UNCACHABLE)
>  		return 0;
>  
> -	/* Find highest cached pfn */
> +	/* get it and store it aside */
> +	memset(range_state, 0, sizeof(range_state));
>  	for (i = 0; i < num_var_ranges; i++) {
>  		mtrr_if->get(i, &base, &size, &type);
> +		range_state[i].base_pfn = base;
> +		range_state[i].size_pfn = size;
> +		range_state[i].type = type;
> +	}
> +
> +	/* Find highest cached pfn */
> +	for (i = 0; i < num_var_ranges; i++) {
> +		type = range_state[i].type;
>  		if (type != MTRR_TYPE_WRBACK)
>  			continue;
> +		base = range_state[i].base_pfn;
> +		size = range_state[i].size_pfn;
>  		if (highest_pfn < base + size)
>  			highest_pfn = base + size;
>  	}
> @@ -1177,9 +1441,10 @@ int __init mtrr_trim_uncached_memory(uns
>  	/* check entries number */
>  	memset(num, 0, sizeof(num));
>  	for (i = 0; i < num_var_ranges; i++) {
> -		mtrr_if->get(i, &base, &size, &type);
> +		type = range_state[i].type;
>  		if (type >= MTRR_NUM_TYPES)
>  			continue;
> +		size = range_state[i].size_pfn;
>  		if (!size)
>  			type = MTRR_NUM_TYPES;
>  		num[type]++;
> @@ -1205,26 +1470,28 @@ int __init mtrr_trim_uncached_memory(uns
>  	}
>  	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
>  
> -	total_real_trim_size = 0;
> +	total_trim_size = 0;
>  	/* check the head */
>  	if (range[0].start)
> -		total_real_trim_size += real_trim_memory(0, range[0].start);
> +		total_trim_size += real_trim_memory(0, range[0].start);
>  	/* check the holes */
>  	for (i = 0; i < nr_range - 1; i++) {
>  		if (range[i].end + 1 < range[i+1].start)
> -			total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start);
> +			total_trim_size += real_trim_memory(range[i].end + 1,
> +							    range[i+1].start);
>  	}
>  	/* check the top */
>  	i = nr_range - 1;
>  	if (range[i].end + 1 < end_pfn)
> -		total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn);
> +		total_trim_size += real_trim_memory(range[i].end + 1,
> +							 end_pfn);
>  
> -	if (total_real_trim_size) {
> +	if (total_trim_size) {
>  		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
>  			" all of memory, losing %lluMB of RAM.\n",
> -			total_real_trim_size >> 20);
> +			total_trim_size >> 20);
>  
> -		if (enable_mtrr_cleanup < 1)
> +		if (!changed_by_mtrr_cleanup)
>  			WARN_ON(1);
>  
>  		printk(KERN_INFO "update e820 for mtrr\n");
> @@ -1314,8 +1581,10 @@ void __init mtrr_bp_init(void)
>  		if (use_intel()) {
>  			get_mtrr_state();
>  
> -			if (mtrr_cleanup(phys_addr))
> +			if (mtrr_cleanup(phys_addr)) {
> +				changed_by_mtrr_cleanup = 1;
>  				mtrr_if->set_all();
> +			}
>  
>  		}
>  	}
> @@ -1355,7 +1624,7 @@ static int __init mtrr_init_finialize(vo
>  	if (!mtrr_if)
>  		return 0;
>  	if (use_intel()) {
> -		if (enable_mtrr_cleanup < 1)
> +		if (!changed_by_mtrr_cleanup)
>  			mtrr_state_warn();
>  	} else {
>  		/* The CPUs haven't MTRR and seem to not support SMP. They have
> Index: linux-2.6/Documentation/kernel-parameters.txt
> ===================================================================
> --- linux-2.6.orig/Documentation/kernel-parameters.txt
> +++ linux-2.6/Documentation/kernel-parameters.txt
> @@ -613,9 +613,17 @@ and is between 256 and 4096 characters. 
>  			that could hold holes aka. UC entries.
>  
>  	mtrr_gran_size=nn[KMG] [X86]
> -			used for mtrr cleanup. It is granity of mtrr block.
> +			used for mtrr cleanup. It is granularity of mtrr block.
> +			default is 1.
>  			Big value could prevent small alignment use up MTRRs.
>  
> +	mtrr_spare_reg_nr=n [X86]
> +			Format: <integer>
> +			range: 0,7 : spare reg number
> +			default : 1
> +			used for mtrr cleanup. It is spare mtrr entries number.
> +			set to 2 or more if your graphical card need more.
> +
>  	disable_mtrr_trim [X86, Intel and AMD only]
>  			By default the kernel will trim any uncacheable
>  			memory out of your available memory pool based on
> Index: linux-2.6/arch/x86/Kconfig
> ===================================================================
> --- linux-2.6.orig/arch/x86/Kconfig
> +++ linux-2.6/arch/x86/Kconfig
> @@ -1099,6 +1099,15 @@ config MTRR_SANITIZER_ENABLE_DEFAULT
>  	help
>  	  Enable mtrr cleanup default value
>  
> +config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
> +	int "MTRR cleanup spare reg num (0-7)"
> +	range 0 7
> +	default "1"
> +	depends on MTRR_SANITIZER
> +	help
> +	  mtrr cleanup spare entries default, it can be changed via
> +	  mtrr_spare_reg_nr=
> +
>  config X86_PAT
>  	bool
>  	prompt "x86 PAT support"
> 

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect
  2008-05-01  8:00               ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect Yinghai Lu
  2008-05-01 11:45                 ` Gabriel C
@ 2008-05-01 12:09                 ` Mika Fischer
  2008-05-01 16:35                   ` Yinghai Lu
  2008-05-01 15:09                 ` Randy Dunlap
  2008-05-01 18:57                 ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2 Yinghai Lu
  3 siblings, 1 reply; 89+ messages in thread
From: Mika Fischer @ 2008-05-01 12:09 UTC (permalink / raw)
  To: yhlu.kernel
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, linux-kernel@vger.kernel.org

[-- Attachment #1: Type: text/plain, Size: 1305 bytes --]

Yinghai Lu schrieb:
> loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.
> 
> so user don't need to add mtrr_chunk_size and mtrr_gran_size, 
> 
> if optimal value is not found, print out all list to help select less optimal
> value.
> 
> add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.

On my system x86-latest + this patch and using no boot options gives me
this /proc/mtrr:
reg00: base=0x00000000 (   0MB), size=2048MB: write-back, count=1
reg01: base=0x80000000 (2048MB), size= 512MB: write-back, count=1
reg02: base=0xa0000000 (2560MB), size= 256MB: write-back, count=1
reg03: base=0xb0000000 (2816MB), size= 256MB: write-back, count=1
reg04: base=0xbf700000 (3063MB), size=   1MB: uncachable, count=1
reg05: base=0xbf800000 (3064MB), size=   8MB: uncachable, count=1
reg06: base=0x100000000 (4096MB), size=1024MB: write-back, count=1

Which is OK. It could probably collapse reg01-reg03 into one but that's
a minor issue (for me at least, there are probably cases where
collapsing them might save the user from having to specify the
mtrr_spare_reg_nr boot option).

In any case it works fine here. dmesg is attached.

Let me know if there's anything else I should test!

And thanks a lot for all your work, Yinghai! :)

Regards,
 Mika

[-- Attachment #2: dmesg-2.6.25-x86-latest.git-06791-gd269f9f-dirty.txt --]
[-- Type: text/plain, Size: 31404 bytes --]

Linux version 2.6.25-x86-latest.git-06791-gd269f9f-dirty (mika@arthur) (gcc version 4.2.3 (Ubuntu 4.2.3-2ubuntu7)) #6 SMP Thu May 1 13:54:27 CEST 2008
BIOS-provided physical RAM map:
 BIOS-e820: 0000000000000000 - 000000000009f800 (usable)
 BIOS-e820: 000000000009f800 - 00000000000a0000 (reserved)
 BIOS-e820: 00000000000dc000 - 0000000000100000 (reserved)
 BIOS-e820: 0000000000100000 - 00000000bf6d0000 (usable)
 BIOS-e820: 00000000bf6d0000 - 00000000bf6e3000 (ACPI NVS)
 BIOS-e820: 00000000bf6e3000 - 00000000c0000000 (reserved)
 BIOS-e820: 00000000e0000000 - 00000000f0000000 (reserved)
 BIOS-e820: 00000000fec00000 - 00000000fec10000 (reserved)
 BIOS-e820: 00000000fed00000 - 00000000fed00400 (reserved)
 BIOS-e820: 00000000fed14000 - 00000000fed1a000 (reserved)
 BIOS-e820: 00000000fed1c000 - 00000000fed90000 (reserved)
 BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
 BIOS-e820: 00000000ff000000 - 0000000100000000 (reserved)
 BIOS-e820: 0000000100000000 - 0000000140000000 (usable)
x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
After WB checking
MTRR MAP PFN: 0000000000000000 - 0000000000140000
After UC checking
MTRR MAP PFN: 0000000000000000 - 00000000000bf700
MTRR MAP PFN: 0000000000100000 - 0000000000140000
After sorting
MTRR MAP PFN: 0000000000000000 - 00000000000bf700
MTRR MAP PFN: 0000000000100000 - 0000000000140000
total RAM coverred: 4087M

gran_size: 1M   chunk_size_size: 1M
rangeX: 0000000000000000 - 00000000bf700000
Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
Setting variable MTRR 1, base: 2048MB, range: 512MB, type WB
Setting variable MTRR 2, base: 2560MB, range: 256MB, type WB
Setting variable MTRR 3, base: 2816MB, range: 128MB, type WB
Setting variable MTRR 4, base: 2944MB, range: 64MB, type WB
Setting variable MTRR 5, base: 3008MB, range: 32MB, type WB
Setting variable MTRR 6, base: 3040MB, range: 16MB, type WB
Setting variable MTRR 7, base: 3056MB, range: 4MB, type WB
rangeX: 0000000000000000 - 0000000040000000
DONE variable MTRRs
After WB checking
MTRR MAP PFN: 0000000000000000 - 00000000000bf400
After UC checking
MTRR MAP PFN: 0000000000000000 - 00000000000bf400
After sorting
MTRR MAP PFN: 0000000000000000 - 00000000000bf400

gran_size: 1M   chunk_size_size: 2M
range0: 0000000000000000 - 00000000bf600000
Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
Setting variable MTRR 1, base: 2048MB, range: 512MB, type WB
Setting variable MTRR 2, base: 2560MB, range: 256MB, type WB
Setting variable MTRR 3, base: 2816MB, range: 128MB, type WB
Setting variable MTRR 4, base: 2944MB, range: 64MB, type WB
Setting variable MTRR 5, base: 3008MB, range: 32MB, type WB
Setting variable MTRR 6, base: 3040MB, range: 16MB, type WB
Setting variable MTRR 7, base: 3056MB, range: 4MB, type WB
range: 00000000bf600000 - 00000000bf700000
rangeX: 0000000000000000 - 0000000040000000
DONE variable MTRRs
After WB checking
MTRR MAP PFN: 0000000000000000 - 00000000000bf400
After UC checking
MTRR MAP PFN: 0000000000000000 - 00000000000bf400
After sorting
MTRR MAP PFN: 0000000000000000 - 00000000000bf400

gran_size: 1M   chunk_size_size: 4M
range0: 0000000000000000 - 00000000bf400000
Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
Setting variable MTRR 1, base: 2048MB, range: 512MB, type WB
Setting variable MTRR 2, base: 2560MB, range: 256MB, type WB
Setting variable MTRR 3, base: 2816MB, range: 128MB, type WB
Setting variable MTRR 4, base: 2944MB, range: 64MB, type WB
Setting variable MTRR 5, base: 3008MB, range: 32MB, type WB
Setting variable MTRR 6, base: 3040MB, range: 16MB, type WB
Setting variable MTRR 7, base: 3056MB, range: 4MB, type WB
range: 00000000bf400000 - 00000000bf800000
hole: 00000000bf700000 - 00000000bf800000
rangeX: 0000000000000000 - 0000000040000000
DONE variable MTRRs
After WB checking
MTRR MAP PFN: 0000000000000000 - 00000000000bf400
After UC checking
MTRR MAP PFN: 0000000000000000 - 00000000000bf400
After sorting
MTRR MAP PFN: 0000000000000000 - 00000000000bf400

gran_size: 1M   chunk_size_size: 8M
range0: 0000000000000000 - 00000000bf000000
Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
Setting variable MTRR 1, base: 2048MB, range: 512MB, type WB
Setting variable MTRR 2, base: 2560MB, range: 256MB, type WB
Setting variable MTRR 3, base: 2816MB, range: 128MB, type WB
Setting variable MTRR 4, base: 2944MB, range: 64MB, type WB
Setting variable MTRR 5, base: 3008MB, range: 32MB, type WB
Setting variable MTRR 6, base: 3040MB, range: 16MB, type WB
range: 00000000bf000000 - 00000000bf800000
Setting variable MTRR 7, base: 3056MB, range: 8MB, type WB
hole: 00000000bf700000 - 00000000bf800000
rangeX: 0000000000000000 - 0000000040000000
DONE variable MTRRs
After WB checking
MTRR MAP PFN: 0000000000000000 - 00000000000bf800
After UC checking
MTRR MAP PFN: 0000000000000000 - 00000000000bf800
After sorting
MTRR MAP PFN: 0000000000000000 - 00000000000bf800

gran_size: 1M   chunk_size_size: 16M
range0: 0000000000000000 - 00000000bf000000
Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
Setting variable MTRR 1, base: 2048MB, range: 512MB, type WB
Setting variable MTRR 2, base: 2560MB, range: 256MB, type WB
Setting variable MTRR 3, base: 2816MB, range: 128MB, type WB
Setting variable MTRR 4, base: 2944MB, range: 64MB, type WB
Setting variable MTRR 5, base: 3008MB, range: 32MB, type WB
Setting variable MTRR 6, base: 3040MB, range: 16MB, type WB
range: 00000000bf000000 - 00000000bf700000
Setting variable MTRR 7, base: 3056MB, range: 4MB, type WB
rangeX: 0000000000000000 - 0000000040000000
DONE variable MTRRs
After WB checking
MTRR MAP PFN: 0000000000000000 - 00000000000bf400
After UC checking
MTRR MAP PFN: 0000000000000000 - 00000000000bf400
After sorting
MTRR MAP PFN: 0000000000000000 - 00000000000bf400

gran_size: 1M   chunk_size_size: 32M
range0: 0000000000000000 - 00000000be000000
Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
Setting variable MTRR 1, base: 2048MB, range: 512MB, type WB
Setting variable MTRR 2, base: 2560MB, range: 256MB, type WB
Setting variable MTRR 3, base: 2816MB, range: 128MB, type WB
Setting variable MTRR 4, base: 2944MB, range: 64MB, type WB
Setting variable MTRR 5, base: 3008MB, range: 32MB, type WB
range: 00000000be000000 - 00000000c0000000
Setting variable MTRR 6, base: 3040MB, range: 32MB, type WB
hole: 00000000bf700000 - 00000000c0000000
Setting variable MTRR 7, base: 3063MB, range: 1MB, type UC
rangeX: 0000000000000000 - 0000000040000000
DONE variable MTRRs
After WB checking
MTRR MAP PFN: 0000000000000000 - 00000000000c0000
After UC checking
MTRR MAP PFN: 0000000000000000 - 00000000000bf700
MTRR MAP PFN: 00000000000bf800 - 00000000000c0000
After sorting
MTRR MAP PFN: 0000000000000000 - 00000000000bf700
MTRR MAP PFN: 00000000000bf800 - 00000000000c0000

gran_size: 1M   chunk_size_size: 64M
range0: 0000000000000000 - 00000000bc000000
Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
Setting variable MTRR 1, base: 2048MB, range: 512MB, type WB
Setting variable MTRR 2, base: 2560MB, range: 256MB, type WB
Setting variable MTRR 3, base: 2816MB, range: 128MB, type WB
Setting variable MTRR 4, base: 2944MB, range: 64MB, type WB
range: 00000000bc000000 - 00000000c0000000
Setting variable MTRR 5, base: 3008MB, range: 64MB, type WB
hole: 00000000bf700000 - 00000000c0000000
Setting variable MTRR 6, base: 3063MB, range: 1MB, type UC
Setting variable MTRR 7, base: 3064MB, range: 8MB, type UC
rangeX: 0000000000000000 - 0000000040000000
DONE variable MTRRs
After WB checking
MTRR MAP PFN: 0000000000000000 - 00000000000c0000
After UC checking
MTRR MAP PFN: 0000000000000000 - 00000000000bf700
After sorting
MTRR MAP PFN: 0000000000000000 - 00000000000bf700

gran_size: 1M   chunk_size_size: 128M
range0: 0000000000000000 - 00000000b8000000
Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
Setting variable MTRR 1, base: 2048MB, range: 512MB, type WB
Setting variable MTRR 2, base: 2560MB, range: 256MB, type WB
Setting variable MTRR 3, base: 2816MB, range: 128MB, type WB
range: 00000000b8000000 - 00000000c0000000
Setting variable MTRR 4, base: 2944MB, range: 128MB, type WB
hole: 00000000bf700000 - 00000000c0000000
Setting variable MTRR 5, base: 3063MB, range: 1MB, type UC
Setting variable MTRR 6, base: 3064MB, range: 8MB, type UC
rangeX: 0000000000000000 - 0000000040000000
Setting variable MTRR 7, base: 4096MB, range: 1024MB, type WB
DONE variable MTRRs
After WB checking
MTRR MAP PFN: 0000000000000000 - 00000000000c0000
MTRR MAP PFN: 0000000000100000 - 0000000000140000
After UC checking
MTRR MAP PFN: 0000000000000000 - 00000000000bf700
MTRR MAP PFN: 0000000000100000 - 0000000000140000
After sorting
MTRR MAP PFN: 0000000000000000 - 00000000000bf700
MTRR MAP PFN: 0000000000100000 - 0000000000140000

gran_size: 1M   chunk_size_size: 256M
range0: 0000000000000000 - 00000000b0000000
Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
Setting variable MTRR 1, base: 2048MB, range: 512MB, type WB
Setting variable MTRR 2, base: 2560MB, range: 256MB, type WB
range: 00000000b0000000 - 00000000c0000000
Setting variable MTRR 3, base: 2816MB, range: 256MB, type WB
hole: 00000000bf700000 - 00000000c0000000
Setting variable MTRR 4, base: 3063MB, range: 1MB, type UC
Setting variable MTRR 5, base: 3064MB, range: 8MB, type UC
rangeX: 0000000000000000 - 0000000040000000
Setting variable MTRR 6, base: 4096MB, range: 1024MB, type WB
DONE variable MTRRs
After WB checking
MTRR MAP PFN: 0000000000000000 - 00000000000c0000
MTRR MAP PFN: 0000000000100000 - 0000000000140000
After UC checking
MTRR MAP PFN: 0000000000000000 - 00000000000bf700
MTRR MAP PFN: 0000000000100000 - 0000000000140000
After sorting
MTRR MAP PFN: 0000000000000000 - 00000000000bf700
MTRR MAP PFN: 0000000000100000 - 0000000000140000
Found optimal setting for mtrr clean up
gran_size: 1M  	chunk_size: 256M  	num_reg: 7  	lose cover RAM: 0M 
range0: 0000000000000000 - 00000000b0000000
Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
Setting variable MTRR 1, base: 2048MB, range: 512MB, type WB
Setting variable MTRR 2, base: 2560MB, range: 256MB, type WB
range: 00000000b0000000 - 00000000c0000000
Setting variable MTRR 3, base: 2816MB, range: 256MB, type WB
hole: 00000000bf700000 - 00000000c0000000
Setting variable MTRR 4, base: 3063MB, range: 1MB, type UC
Setting variable MTRR 5, base: 3064MB, range: 8MB, type UC
rangeX: 0000000000000000 - 0000000040000000
Setting variable MTRR 6, base: 4096MB, range: 1024MB, type WB
DONE variable MTRRs
x86 PAT enabled: cpu 0, old 0x7010600070106, new 0x7010600070106
After WB checking
MTRR MAP PFN: 0000000000000000 - 00000000000c0000
MTRR MAP PFN: 0000000000100000 - 0000000000140000
After UC checking
MTRR MAP PFN: 0000000000000000 - 00000000000bf700
MTRR MAP PFN: 0000000000100000 - 0000000000140000
After sorting
MTRR MAP PFN: 0000000000000000 - 00000000000bf700
MTRR MAP PFN: 0000000000100000 - 0000000000140000
Warning only 4GB will be used.
Use a HIGHMEM64G enabled kernel.
3200MB HIGHMEM available.
896MB LOWMEM available.
found SMP MP-table at [c00f6e10] 000f6e10
Entering add_active_range(0, 0, 1048576) 0 entries of 256 used
Zone PFN ranges:
  DMA             0 ->     4096
  Normal       4096 ->   229376
  HighMem    229376 ->  1048576
Movable zone start PFN for each node
early_node_map[1] active PFN ranges
    0:        0 ->  1048576
On node 0 totalpages: 1048576
  DMA zone: 32 pages used for memmap
  DMA zone: 0 pages reserved
  DMA zone: 4064 pages, LIFO batch:0
  Normal zone: 1760 pages used for memmap
  Normal zone: 223520 pages, LIFO batch:31
  HighMem zone: 6400 pages used for memmap
  HighMem zone: 812800 pages, LIFO batch:31
  Movable zone: 0 pages used for memmap
DMI present.
Using APIC driver default
ACPI: RSDP 000F6DE0, 0024 (r2 PTLTD )
ACPI: XSDT BF6D7236, 00AC (r1 SECCSD LH43STAR  6040000  LTP        0)
ACPI: FACP BF6DFB37, 00F4 (r3 INTEL  CRESTLNE  6040000 ALAN        1)
ACPI: DSDT BF6D9266, 685D (r2 INTEL  CRESTLNE  6040000 INTL 20050624)
ACPI: FACS BF6E2FC0, 0040
ACPI: APIC BF6DFC2B, 0068 (r1 INTEL  CRESTLNE  6040000 LOHR       5A)
ACPI: HPET BF6DFC93, 0038 (r1 INTEL  CRESTLNE  6040000 LOHR       5A)
ACPI: MCFG BF6DFCCB, 003C (r1 INTEL  CRESTLNE  6040000 LOHR       5A)
ACPI: TCPA BF6DFD07, 0032 (r1 Intel   CRESTLN  6040000          5A52)
ACPI: TMOR BF6DFD39, 0026 (r1 PTLTD            6040000 PTL         3)
ACPI: APIC BF6DFD5F, 0068 (r1 PTLTD  	 APIC    6040000  LTP        0)
ACPI: HPET BF6DFDC7, 0038 (r1 PTLTD  HPETTBL   6040000  LTP        1)
ACPI: BOOT BF6DFDFF, 0028 (r1 PTLTD  $SBFTBL$  6040000  LTP        1)
ACPI: ASF! BF6DFE27, 0063 (r16   CETP     CETP  6040000 PTL         1)
ACPI: SLIC BF6DFE8A, 0176 (r1 SECCSD LH43STAR  6040000  LTP        0)
ACPI: SSDT BF6D8C17, 064F (r1 SataRe  SataPri     1000 INTL 20050624)
ACPI: SSDT BF6D8585, 0692 (r1 SataRe  SataSec     1000 INTL 20050624)
ACPI: SSDT BF6D854C, 0039 (r1 BrtRef  DD01BRT     1000 INTL 20050624)
ACPI: SSDT BF6D786E, 025F (r1  PmRef  Cpu0Tst     3000 INTL 20050624)
ACPI: SSDT BF6D77C8, 00A6 (r1  PmRef  Cpu1Tst     3000 INTL 20050624)
ACPI: SSDT BF6D72E2, 04E6 (r1  PmRef    CpuPm     3000 INTL 20050624)
ACPI: BIOS bug: multiple APIC/MADT found, using 0
ACPI: If "acpi_apic_instance=2" works better, notify linux-acpi@vger.kernel.org
ACPI: DMI detected: Samsung
ACPI: PM-Timer IO Port: 0x1008
ACPI: Local APIC address 0xfee00000
ACPI: LAPIC (acpi_id[0x00] lapic_id[0x00] enabled)
BIOS bug, APIC version is 0 for CPU#0! fixing up to 0x10. (tell your hw vendor)
ACPI: LAPIC (acpi_id[0x01] lapic_id[0x01] enabled)
BIOS bug, APIC version is 0 for CPU#0! fixing up to 0x10. (tell your hw vendor)
ACPI: LAPIC_NMI (acpi_id[0x00] high edge lint[0x1])
ACPI: LAPIC_NMI (acpi_id[0x01] high edge lint[0x1])
ACPI: IOAPIC (id[0x01] address[0xfec00000] gsi_base[0])
IOAPIC[0]: apic_id 1 already used, trying 2
IOAPIC[0]: apic_id 2, version 32, address 0xfec00000, GSI 0-23
ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
ACPI: IRQ0 used by override.
ACPI: IRQ2 used by override.
ACPI: IRQ9 used by override.
Enabling APIC mode:  Flat.  Using 1 I/O APICs
ACPI: HPET id: 0x8086a201 base: 0xfed00000
Using ACPI (MADT) for SMP configuration information
Allocating PCI resources starting at c2000000 (gap: c0000000:20000000)
SMP: Allowing 2 CPUs, 0 hotplug CPUs
PERCPU: Allocating 31232 bytes of per cpu data
NR_CPUS: 32, nr_cpu_ids: 2
Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 1040384
Kernel command line: root=/dev/hda6 init=/bin/bash
mapped APIC to ffffb000 (fee00000)
mapped IOAPIC to ffffa000 (fec00000)
Enabling fast FPU save and restore... done.
Enabling unmasked SIMD FPU exception support... done.
Initializing CPU#0
PID hash table entries: 4096 (order: 12, 16384 bytes)
Extended CMOS year: 2000
Detected 1995.177 MHz processor.
Console: colour VGA+ 80x25
console [tty0] enabled
Dentry cache hash table entries: 131072 (order: 7, 524288 bytes)
Inode-cache hash table entries: 65536 (order: 6, 262144 bytes)
Memory: 3095596k/4194304k available (3079k kernel code, 39520k reserved, 1707k data, 244k init, 2218816k highmem)
virtual kernel memory layout:
    fixmap  : 0xffe18000 - 0xfffff000   (1948 kB)
    pkmap   : 0xff800000 - 0xffc00000   (4096 kB)
    vmalloc : 0xf8800000 - 0xff7fe000   ( 111 MB)
    lowmem  : 0xc0000000 - 0xf8000000   ( 896 MB)
      .init : 0xc05b3000 - 0xc05f0000   ( 244 kB)
      .data : 0xc0401f9f - 0xc05acf68   (1707 kB)
      .text : 0xc0100000 - 0xc0401f9f   (3079 kB)
Checking if this processor honours the WP bit even in supervisor mode...Ok.
CPA: page pool initialized 1 of 1 pages preallocated
SLUB: Genslabs=12, HWalign=64, Order=0-3, MinObjects=0, CPUs=2, Nodes=1
hpet clockevent registered
Calibrating delay using timer specific routine.. 3995.16 BogoMIPS (lpj=7990332)
Mount-cache hash table entries: 512
CPU: L1 I cache: 32K, L1 D cache: 32K
CPU: L2 cache: 2048K
CPU: Physical Processor ID: 0
CPU: Processor Core ID: 0
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#0.
using mwait in idle threads.
Checking 'hlt' instruction... OK.
ACPI: Core revision 20070126
Parsing all Control Methods:
Table [DSDT](id 0001) - 932 Objects with 77 Devices 247 Methods 37 Regions
Parsing all Control Methods:
Table [SSDT](id 0002) - 10 Objects with 3 Devices 4 Methods 0 Regions
Parsing all Control Methods:
Table [SSDT](id 0003) - 10 Objects with 3 Devices 4 Methods 0 Regions
Parsing all Control Methods:
Table [SSDT](id 0004) - 0 Objects with 0 Devices 0 Methods 0 Regions
Parsing all Control Methods:
Table [SSDT](id 0005) - 7 Objects with 0 Devices 3 Methods 0 Regions
Parsing all Control Methods:
Table [SSDT](id 0006) - 4 Objects with 0 Devices 3 Methods 0 Regions
Parsing all Control Methods:
Table [SSDT](id 0007) - 13 Objects with 0 Devices 4 Methods 0 Regions
 tbxface-0598 [02] tb_load_namespace     : ACPI Tables successfully acquired
evxfevnt-0091 [02] enable                : Transition to ACPI mode successful
ENABLING IO-APIC IRQs
..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
CPU0: Intel(R) Core(TM)2 Duo CPU     T7250  @ 2.00GHz stepping 0d
Booting processor 1/1 ip 6000
Initializing CPU#1
Calibrating delay using timer specific routine.. 3990.03 BogoMIPS (lpj=7980072)
CPU: L1 I cache: 32K, L1 D cache: 32K
CPU: L2 cache: 2048K
CPU: Physical Processor ID: 0
CPU: Processor Core ID: 1
Intel machine check architecture supported.
Intel machine check reporting enabled on CPU#1.
x86 PAT enabled: cpu 1, old 0x7010600070106, new 0x7010600070106
CPU1: Intel(R) Core(TM)2 Duo CPU     T7250  @ 2.00GHz stepping 0d
checking TSC synchronization [CPU#0 -> CPU#1]: passed.
Brought up 2 CPUs
Total of 2 processors activated (7985.20 BogoMIPS).
net_namespace: 332 bytes
NET: Registered protocol family 16
No dock devices found.
ACPI: bus type pci registered
PCI: MCFG configuration 0: base e0000000 segment 0 buses 0 - 255
PCI: MCFG area at e0000000 reserved in E820
PCI: Using MMCONFIG for extended config space
PCI: Using configuration type 1 for base access
Setting up standard PCI resources
evgpeblk-0956 [04] ev_create_gpe_block   : GPE 00 to 1F [_GPE] 4 regs on int 0x9
evgpeblk-1052 [03] ev_initialize_gpe_bloc: Found 3 Wake, Enabled 4 Runtime GPEs in this block
ACPI: EC: Look up EC in DSDT
Completing Region/Field/Buffer/Package initialization:........................................................................
Initialized 35/37 Regions 0/0 Fields 24/24 Buffers 13/21 Packages (985 nodes)
Initializing Device/Processor/Thermal objects by executing _INI methods:<5>ACPI: BIOS _OSI(Linux) query ignored via DMI
.......
Executed 7 _INI methods requiring 0 _STA executions (examined 89 objects)
ACPI: Interpreter enabled
ACPI: (supports S0 S3 S5)
ACPI: Using IOAPIC for interrupt routing
ACPI: EC: non-query interrupt received, switching to interrupt mode
ACPI: EC: GPE = 0x17, I/O: command/status = 0x66, data = 0x62
ACPI: EC: driver started in interrupt mode
ACPI: PCI Root Bridge [PCI0] (0000:00)
pci 0000:00:1f.0: quirk: region 1000-107f claimed by ICH6 ACPI/GPIO/TCO
pci 0000:00:1f.0: quirk: region 1180-11bf claimed by ICH6 GPIO
PCI: Transparent bridge - 0000:00:1e.0
bus 00 -> node 0
ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]
ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.RP01._PRT]
ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.RP02._PRT]
ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.PCIB._PRT]
ACPI: PCI Interrupt Link [LNKA] (IRQs 11) *10
ACPI: PCI Interrupt Link [LNKB] (IRQs *5)
ACPI: PCI Interrupt Link [LNKC] (IRQs *10)
ACPI: PCI Interrupt Link [LNKD] (IRQs *5)
ACPI: PCI Interrupt Link [LNKE] (IRQs *11)
ACPI: PCI Interrupt Link [LNKF] (IRQs *10)
ACPI: PCI Interrupt Link [LNKG] (IRQs *5)
ACPI: PCI Interrupt Link [LNKH] (IRQs *5)
ACPI: Power Resource [FN00] (off)
Linux Plug and Play Support v0.97 (c) Adam Belay
pnp: PnP ACPI init
ACPI: bus type pnp registered
pnp: PnP ACPI: found 10 devices
ACPI: ACPI bus type pnp unregistered
SCSI subsystem initialized
libata version 3.00 loaded.
usbcore: registered new interface driver usbfs
usbcore: registered new interface driver hub
usbcore: registered new device driver usb
PCI: Using ACPI for IRQ routing
hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0
hpet0: 3 64-bit timers, 14318180 Hz
system 00:01: iomem range 0xfed1c000-0xfed1ffff could not be reserved
system 00:01: iomem range 0xfed14000-0xfed17fff could not be reserved
system 00:01: iomem range 0xfed18000-0xfed18fff could not be reserved
system 00:01: iomem range 0xfed19000-0xfed19fff could not be reserved
system 00:01: iomem range 0xe0000000-0xefffffff could not be reserved
system 00:01: iomem range 0xfed20000-0xfed3ffff could not be reserved
system 00:01: iomem range 0xfed40000-0xfed44fff could not be reserved
system 00:01: iomem range 0xfed45000-0xfed8ffff could not be reserved
system 00:04: iomem range 0xfed00000-0xfed003ff could not be reserved
system 00:06: ioport range 0x680-0x69f has been reserved
system 00:06: ioport range 0x6b0-0x6ff has been reserved
system 00:06: ioport range 0x800-0x80f has been reserved
system 00:06: ioport range 0x900-0x90f has been reserved
system 00:06: ioport range 0x1000-0x107f has been reserved
system 00:06: ioport range 0x1180-0x11bf has been reserved
system 00:06: ioport range 0x1640-0x164f has been reserved
system 00:06: ioport range 0xfe00-0xfe00 has been reserved
PCI: Bridge: 0000:00:1c.0
  IO window: disabled.
  MEM window: 0xf0300000-0xf03fffff
  PREFETCH window: disabled.
PCI: Bridge: 0000:00:1c.1
  IO window: 2000-2fff
  MEM window: 0xf0200000-0xf02fffff
  PREFETCH window: disabled.
PCI: Bus 5, cardbus bridge: 0000:04:09.0
  IO window: 0x00003000-0x000030ff
  IO window: 0x00003400-0x000034ff
  PREFETCH window: 0xc8000000-0xcbffffff
  MEM window: 0xcc000000-0xcfffffff
PCI: Bridge: 0000:00:1e.0
  IO window: 3000-3fff
  MEM window: 0xf0400000-0xf04fffff
  PREFETCH window: 0x00000000c2000000-0x00000000c5ffffff
ACPI: PCI Interrupt 0000:00:1c.0[A] -> GSI 17 (level, low) -> IRQ 17
PCI: Setting latency timer of device 0000:00:1c.0 to 64
ACPI: PCI Interrupt 0000:00:1c.1[B] -> GSI 16 (level, low) -> IRQ 16
PCI: Setting latency timer of device 0000:00:1c.1 to 64
PCI: Setting latency timer of device 0000:00:1e.0 to 64
ACPI: PCI Interrupt 0000:04:09.0[A] -> GSI 20 (level, low) -> IRQ 20
NET: Registered protocol family 2
IP route cache hash table entries: 32768 (order: 5, 131072 bytes)
TCP established hash table entries: 131072 (order: 8, 1048576 bytes)
TCP bind hash table entries: 65536 (order: 7, 524288 bytes)
TCP: Hash tables configured (established 131072 bind 65536)
TCP reno registered
NET: Registered protocol family 1
Simple Boot Flag at 0x36 set to 0x1
Machine check exception polling timer started.
IA-32 Microcode Update Driver: v1.14a <tigran@aivazian.fsnet.co.uk>
highmem bounce pool size: 64 pages
Total HugeTLB memory allocated, 0
Installing knfsd (copyright (C) 1996 okir@monad.swb.de).
msgmni has been set to 1714 for ipc namespace c0564fc8
io scheduler noop registered
io scheduler anticipatory registered
io scheduler deadline registered
io scheduler cfq registered (default)
pci 0000:00:02.0: Boot video device
Real Time Clock Driver v1.12ac
hpet_resources: 0xfed00000 is busy
Linux agpgart interface v0.103
agpgart: Detected an Intel 965GM Chipset.
agpgart: Detected 7676K stolen memory.
agpgart: AGP aperture is 256M @ 0xd0000000
Serial: 8250/16550 driver $Revision: 1.90 $ 4 ports, IRQ sharing disabled
Switched to high resolution mode on CPU 1
Switched to high resolution mode on CPU 0
floppy0: no floppy controllers found
brd: module loaded
loop: module loaded
Intel(R) PRO/1000 Network Driver - version 7.3.20-k2
Copyright (c) 1999-2006 Intel Corporation.
e1000e: Intel(R) PRO/1000 Network Driver - 0.2.1
e1000e: Copyright (c) 1999-2008 Intel Corporation.
e100: Intel(R) PRO/100 Network Driver, 3.5.23-k4-NAPI
e100: Copyright(c) 1999-2006 Intel Corporation
Uniform Multi-Platform E-IDE driver
ide: Assuming 33MHz system bus speed for PIO modes; override with idebus=xx
Probing IDE interface ide0...
hda: SAMSUNG HM160HI, ATA DISK drive
ide0 at 0x1f0-0x1f7,0x3f6 on irq 14
hda: max request size: 512KiB
hda: 312581808 sectors (160041 MB) w/8192KiB Cache, CHS=19457/255/63
hda: cache flushes supported
 hda: hda1 hda2 < hda5 hda6 hda7 >
3ware Storage Controller device driver for Linux v1.26.02.002.
Driver 'sd' needs updating - please use bus_type methods
Driver 'sr' needs updating - please use bus_type methods
ata_piix 0000:00:1f.2: version 2.12
ACPI: PCI Interrupt 0000:00:1f.2[B] -> GSI 19 (level, low) -> IRQ 19
ata_piix 0000:00:1f.2: MAP [ P0 P2 IDE IDE ]
PCI: Unable to reserve I/O region #1:8@1f0 for device 0000:00:1f.2
ata_piix 0000:00:1f.2: failed to request/iomap BARs for port 0 (errno=-16)
PCI: Unable to reserve I/O region #3:8@170 for device 0000:00:1f.2
ata_piix 0000:00:1f.2: failed to request/iomap BARs for port 1 (errno=-16)
ata_piix 0000:00:1f.2: no available native port
Fusion MPT base driver 3.04.06
Copyright (c) 1999-2007 LSI Corporation
Fusion MPT SPI Host driver 3.04.06
ACPI: PCI Interrupt 0000:04:09.1[B] -> GSI 21 (level, low) -> IRQ 21
ohci1394: fw-host0: OHCI-1394 1.0 (PCI): IRQ=[21]  MMIO=[f0401000-f04017ff]  Max Packet=[2048]  IR/IT contexts=[4/4]
ieee1394: raw1394: /dev/raw1394 device initialized
usbmon: debugfs is not available
ACPI: PCI Interrupt 0000:00:1a.7[C] -> GSI 18 (level, low) -> IRQ 18
PCI: Setting latency timer of device 0000:00:1a.7 to 64
ehci_hcd 0000:00:1a.7: EHCI Host Controller
ehci_hcd 0000:00:1a.7: new USB bus registered, assigned bus number 1
ehci_hcd 0000:00:1a.7: debug port 1
PCI: cache line size of 32 is not supported by device 0000:00:1a.7
ehci_hcd 0000:00:1a.7: irq 18, io mem 0xf0704000
ehci_hcd 0000:00:1a.7: USB 2.0 started, EHCI 1.00, driver 10 Dec 2004
usb usb1: configuration #1 chosen from 1 choice
hub 1-0:1.0: USB hub found
hub 1-0:1.0: 4 ports detected
ACPI: PCI Interrupt 0000:00:1d.7[A] -> GSI 23 (level, low) -> IRQ 23
PCI: Setting latency timer of device 0000:00:1d.7 to 64
ehci_hcd 0000:00:1d.7: EHCI Host Controller
ehci_hcd 0000:00:1d.7: new USB bus registered, assigned bus number 2
ehci_hcd 0000:00:1d.7: debug port 1
PCI: cache line size of 32 is not supported by device 0000:00:1d.7
ehci_hcd 0000:00:1d.7: irq 23, io mem 0xf0704400
ehci_hcd 0000:00:1d.7: USB 2.0 started, EHCI 1.00, driver 10 Dec 2004
usb usb2: configuration #1 chosen from 1 choice
hub 2-0:1.0: USB hub found
hub 2-0:1.0: 6 ports detected
ohci_hcd: 2006 August 04 USB 1.1 'Open' Host Controller (OHCI) Driver
USB Universal Host Controller Interface driver v3.0
ACPI: PCI Interrupt 0000:00:1a.0[A] -> GSI 16 (level, low) -> IRQ 16
PCI: Setting latency timer of device 0000:00:1a.0 to 64
uhci_hcd 0000:00:1a.0: UHCI Host Controller
uhci_hcd 0000:00:1a.0: new USB bus registered, assigned bus number 3
uhci_hcd 0000:00:1a.0: irq 16, io base 0x00001820
usb usb3: configuration #1 chosen from 1 choice
hub 3-0:1.0: USB hub found
hub 3-0:1.0: 2 ports detected
ACPI: PCI Interrupt 0000:00:1a.1[B] -> GSI 21 (level, low) -> IRQ 21
PCI: Setting latency timer of device 0000:00:1a.1 to 64
uhci_hcd 0000:00:1a.1: UHCI Host Controller
uhci_hcd 0000:00:1a.1: new USB bus registered, assigned bus number 4
uhci_hcd 0000:00:1a.1: irq 21, io base 0x00001840
usb usb4: configuration #1 chosen from 1 choice
hub 4-0:1.0: USB hub found
hub 4-0:1.0: 2 ports detected
usb 1-2: new high speed USB device using ehci_hcd and address 2
ACPI: PCI Interrupt 0000:00:1d.0[A] -> GSI 23 (level, low) -> IRQ 23
PCI: Setting latency timer of device 0000:00:1d.0 to 64
uhci_hcd 0000:00:1d.0: UHCI Host Controller
uhci_hcd 0000:00:1d.0: new USB bus registered, assigned bus number 5
uhci_hcd 0000:00:1d.0: irq 23, io base 0x00001860
usb usb5: configuration #1 chosen from 1 choice
hub 5-0:1.0: USB hub found
hub 5-0:1.0: 2 ports detected
usb 1-2: configuration #1 chosen from 1 choice
ACPI: PCI Interrupt 0000:00:1d.1[B] -> GSI 19 (level, low) -> IRQ 19
PCI: Setting latency timer of device 0000:00:1d.1 to 64
uhci_hcd 0000:00:1d.1: UHCI Host Controller
uhci_hcd 0000:00:1d.1: new USB bus registered, assigned bus number 6
uhci_hcd 0000:00:1d.1: irq 19, io base 0x00001880
usb usb6: configuration #1 chosen from 1 choice
hub 6-0:1.0: USB hub found
hub 6-0:1.0: 2 ports detected
ACPI: PCI Interrupt 0000:00:1d.2[C] -> GSI 18 (level, low) -> IRQ 18
PCI: Setting latency timer of device 0000:00:1d.2 to 64
uhci_hcd 0000:00:1d.2: UHCI Host Controller
uhci_hcd 0000:00:1d.2: new USB bus registered, assigned bus number 7
uhci_hcd 0000:00:1d.2: irq 18, io base 0x000018a0
usb usb7: configuration #1 chosen from 1 choice
hub 7-0:1.0: USB hub found
hub 7-0:1.0: 2 ports detected
usb 2-2: new high speed USB device using ehci_hcd and address 2
usb 2-2: configuration #1 chosen from 1 choice
hub 2-2:1.0: USB hub found
hub 2-2:1.0: 4 ports detected
hub 2-0:1.0: unable to enumerate USB device on port 5
ieee1394: Host added: ID:BUS[0-00:1023]  GUID[0000f0410107774d]
usb 7-1: new full speed USB device using uhci_hcd and address 2
usb 7-1: configuration #1 chosen from 1 choice
usb 2-2.2: new high speed USB device using ehci_hcd and address 4
usb 2-2.2: configuration #1 chosen from 1 choice
hub 2-2.2:1.0: USB hub found
hub 2-2.2:1.0: 4 ports detected
usb 2-2.2.1: new low speed USB device using ehci_hcd and address 5
usb 2-2.2.1: configuration #1 chosen from 1 choice
usb 2-2.2.2: new low speed USB device using ehci_hcd and address 6
usb 2-2.2.2: configuration #1 chosen from 1 choice
usbcore: registered new interface driver usblp
Initializing USB Mass Storage driver...
usbcore: registered new interface driver usb-storage
USB Mass Storage support registered.
PNP: PS/2 Controller [PNP0303:PS2K,PNP0f13:PS2M] at 0x60,0x64 irq 1,12
i8042.c: Detected active multiplexing controller, rev 1.1.
serio: i8042 KBD port at 0x60,0x64 irq 1
serio: i8042 AUX0 port at 0x60,0x64 irq 12
serio: i8042 AUX1 port at 0x60,0x64 irq 12
serio: i8042 AUX2 port at 0x60,0x64 irq 12
serio: i8042 AUX3 port at 0x60,0x64 irq 12
mice: PS/2 mouse device common for all mice
device-mapper: ioctl: 4.13.0-ioctl (2007-10-18) initialised: dm-devel@redhat.com
cpuidle: using governor ladder
cpuidle: using governor menu
input: HID 046a:0021 as /class/input/input0
input: USB HID v1.11 Keyboard [HID 046a:0021] on usb-0000:00:1d.7-2.2.1
input: HID 046a:0021 as /class/input/input1
input: USB HID v1.11 Device [HID 046a:0021] on usb-0000:00:1d.7-2.2.1
input: Logitech USB-PS/2 Optical Mouse as /class/input/input2
input: USB HID v1.10 Mouse [Logitech USB-PS/2 Optical Mouse] on usb-0000:00:1d.7-2.2.2
usbcore: registered new interface driver usbhid
usbhid: v2.6:USB HID core driver
oprofile: using NMI interrupt.
TCP cubic registered
NET: Registered protocol family 10
IPv6 over IPv4 tunneling driver
NET: Registered protocol family 17
RPC: Registered udp transport module.
RPC: Registered tcp transport module.
Using IPI No-Shortcut mode
Synaptics Touchpad, model: 1, fw: 6.2, id: 0x25a0b1, caps: 0xa04713/0x200000
input: SynPS/2 Synaptics TouchPad as /class/input/input3
input: AT Translated Set 2 keyboard as /class/input/input4
EXT3-fs: INFO: recovery required on readonly filesystem.
EXT3-fs: write access will be enabled during recovery.
kjournald starting.  Commit interval 5 seconds
EXT3-fs: recovery complete.
EXT3-fs: mounted filesystem with ordered data mode.
VFS: Mounted root (ext3 filesystem) readonly.
Freeing unused kernel memory: 244k freed
EXT3 FS on hda6, internal journal

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect
  2008-05-01  8:00               ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect Yinghai Lu
  2008-05-01 11:45                 ` Gabriel C
  2008-05-01 12:09                 ` Mika Fischer
@ 2008-05-01 15:09                 ` Randy Dunlap
  2008-05-01 16:38                   ` Yinghai Lu
  2008-05-01 18:57                 ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2 Yinghai Lu
  3 siblings, 1 reply; 89+ messages in thread
From: Randy Dunlap @ 2008-05-01 15:09 UTC (permalink / raw)
  To: yhlu.kernel
  Cc: Yinghai Lu, Andrew Morton, Ingo Molnar, H. Peter Anvin,
	Thomas Gleixner, Gabriel C, Mika Fischer,
	linux-kernel@vger.kernel.org

On Thu, 1 May 2008 01:00:34 -0700 Yinghai Lu wrote:

> 
> loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.

What size step (increment) is used for these loops?

> so user don't need to add mtrr_chunk_size and mtrr_gran_size, 
> 
> if optimal value is not found, print out all list to help select less optimal
> value.
> 
> add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.
> 
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
> 

> Index: linux-2.6/Documentation/kernel-parameters.txt
> ===================================================================
> --- linux-2.6.orig/Documentation/kernel-parameters.txt
> +++ linux-2.6/Documentation/kernel-parameters.txt
> @@ -613,9 +613,17 @@ and is between 256 and 4096 characters. 
>  			that could hold holes aka. UC entries.
>  
>  	mtrr_gran_size=nn[KMG] [X86]
> -			used for mtrr cleanup. It is granity of mtrr block.
> +			used for mtrr cleanup. It is granularity of mtrr block.
> +			default is 1.

                        Default

>  			Big value could prevent small alignment use up MTRRs.

			Large value could prevent small alignment from
			using up MTRRs.

>  
> +	mtrr_spare_reg_nr=n [X86]
> +			Format: <integer>
> +			range: 0,7 : spare reg number
> +			default : 1
> +			used for mtrr cleanup. It is spare mtrr entries number.

			Used

> +			set to 2 or more if your graphical card need more.

			Set                                     needs more.

> +
>  	disable_mtrr_trim [X86, Intel and AMD only]
>  			By default the kernel will trim any uncacheable
>  			memory out of your available memory pool based on


---
~Randy

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect
  2008-05-01 12:09                 ` Mika Fischer
@ 2008-05-01 16:35                   ` Yinghai Lu
  2008-05-01 16:59                     ` Mika Fischer
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-05-01 16:35 UTC (permalink / raw)
  To: Mika Fischer
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, linux-kernel@vger.kernel.org

On Thu, May 1, 2008 at 5:09 AM, Mika Fischer <mika.fischer@zoopnet.de> wrote:
> Yinghai Lu schrieb:
>
> > loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.
>  >
>  > so user don't need to add mtrr_chunk_size and mtrr_gran_size,
>  >
>  > if optimal value is not found, print out all list to help select less optimal
>  > value.
>  >
>  > add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.
>
>  On my system x86-latest + this patch and using no boot options gives me
>  this /proc/mtrr:
>  reg00: base=0x00000000 (   0MB), size=2048MB: write-back, count=1
>  reg01: base=0x80000000 (2048MB), size= 512MB: write-back, count=1
>  reg02: base=0xa0000000 (2560MB), size= 256MB: write-back, count=1
>  reg03: base=0xb0000000 (2816MB), size= 256MB: write-back, count=1
>  reg04: base=0xbf700000 (3063MB), size=   1MB: uncachable, count=1
>  reg05: base=0xbf800000 (3064MB), size=   8MB: uncachable, count=1
>  reg06: base=0x100000000 (4096MB), size=1024MB: write-back, count=1
>
>  Which is OK. It could probably collapse reg01-reg03 into one but that's
>  a minor issue (for me at least, there are probably cases where
>  collapsing them might save the user from having to specify the
>  mtrr_spare_reg_nr boot option).

yes. please try mtrr_spare_reg_nr=3 or etc.

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect
  2008-05-01 15:09                 ` Randy Dunlap
@ 2008-05-01 16:38                   ` Yinghai Lu
  0 siblings, 0 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-05-01 16:38 UTC (permalink / raw)
  To: Randy Dunlap, Ingo Molnar
  Cc: Yinghai Lu, Andrew Morton, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, Mika Fischer, linux-kernel@vger.kernel.org

On Thu, May 1, 2008 at 8:09 AM, Randy Dunlap <randy.dunlap@oracle.com> wrote:
> On Thu, 1 May 2008 01:00:34 -0700 Yinghai Lu wrote:
>
>  >
>  > loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.
>
>  What size step (increment) is used for these loops?
>
>
>  > so user don't need to add mtrr_chunk_size and mtrr_gran_size,
>  >
>  > if optimal value is not found, print out all list to help select less optimal
>  > value.
>  >
>  > add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.
>  >
>  > Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
>  >
>
>
> > Index: linux-2.6/Documentation/kernel-parameters.txt
>  > ===================================================================
>  > --- linux-2.6.orig/Documentation/kernel-parameters.txt
>  > +++ linux-2.6/Documentation/kernel-parameters.txt
>  > @@ -613,9 +613,17 @@ and is between 256 and 4096 characters.
>  >                       that could hold holes aka. UC entries.
>  >
>  >       mtrr_gran_size=nn[KMG] [X86]
>  > -                     used for mtrr cleanup. It is granity of mtrr block.
>  > +                     used for mtrr cleanup. It is granularity of mtrr block.
>  > +                     default is 1.
>
>                         Default
>
>
>  >                       Big value could prevent small alignment use up MTRRs.
>
>                         Large value could prevent small alignment from
>                         using up MTRRs.
>
>
>  >
>  > +     mtrr_spare_reg_nr=n [X86]
>  > +                     Format: <integer>
>  > +                     range: 0,7 : spare reg number
>  > +                     default : 1
>  > +                     used for mtrr cleanup. It is spare mtrr entries number.
>
>                         Used
>
>
>  > +                     set to 2 or more if your graphical card need more.
>
>                         Set                                     needs more.
>
>
>  > +
>  >       disable_mtrr_trim [X86, Intel and AMD only]
>  >                       By default the kernel will trim any uncacheable
>  >                       memory out of your available memory pool based on
>

Ingo, can you change that directly in the patch?
or need me send another updated patch?

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect
  2008-05-01 16:35                   ` Yinghai Lu
@ 2008-05-01 16:59                     ` Mika Fischer
  2008-05-01 17:40                       ` Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: Mika Fischer @ 2008-05-01 16:59 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, linux-kernel@vger.kernel.org

Yinghai Lu schrieb:
> On Thu, May 1, 2008 at 5:09 AM, Mika Fischer <mika.fischer@zoopnet.de> wrote:
>> Yinghai Lu schrieb:
>>
>>> loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.
>>  >
>>  > so user don't need to add mtrr_chunk_size and mtrr_gran_size,
>>  >
>>  > if optimal value is not found, print out all list to help select less optimal
>>  > value.
>>  >
>>  > add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.
>>
>>  On my system x86-latest + this patch and using no boot options gives me
>>  this /proc/mtrr:
>>  reg00: base=0x00000000 (   0MB), size=2048MB: write-back, count=1
>>  reg01: base=0x80000000 (2048MB), size= 512MB: write-back, count=1
>>  reg02: base=0xa0000000 (2560MB), size= 256MB: write-back, count=1
>>  reg03: base=0xb0000000 (2816MB), size= 256MB: write-back, count=1
>>  reg04: base=0xbf700000 (3063MB), size=   1MB: uncachable, count=1
>>  reg05: base=0xbf800000 (3064MB), size=   8MB: uncachable, count=1
>>  reg06: base=0x100000000 (4096MB), size=1024MB: write-back, count=1
>>
>>  Which is OK. It could probably collapse reg01-reg03 into one but that's
>>  a minor issue (for me at least, there are probably cases where
>>  collapsing them might save the user from having to specify the
>>  mtrr_spare_reg_nr boot option).
> 
> yes. please try mtrr_spare_reg_nr=3 or etc.

Sure this works. But that was my point exactly. It should be possible to
figure out the better configuration automatically so that I *don't* have
to specify mtrr_spare_reg_nr=3.

Or in other words: If there are multiple equivalent configurations that
don't lose any RAM(!), the one with the most free MTRR regs should be
preferred.

AFAICT you loop over the chunk size and stop when you have found a
configuration that leaves the number of free MTRR registers requested
(default 1).

This will almost always result in a configuration where you have
*exactly* the number of requested free regs available, even if a more
efficient configuration was possible.

What I'm suggesting is, that - in the case where no RAM is lost at this
point - the loop should continue to try and free up more registers, as
long as no RAM is lost.

I.e. even if in my case chunk_size=256M gives adequate results and
leaves me with 1 free reg, since I don't lose any RAM at this point the
loop should continue as long as I do not lose any RAM. That way it would
find the ideal chunk_size (1g) automatically.

But again, this is non-critical. But I think it might help a few users
who need more than 1 free reg, because they probably will have no idea
about the kernel option...

Regards,
 Mika

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect
  2008-05-01 16:59                     ` Mika Fischer
@ 2008-05-01 17:40                       ` Yinghai Lu
  0 siblings, 0 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-05-01 17:40 UTC (permalink / raw)
  To: Mika Fischer
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, linux-kernel@vger.kernel.org

On Thu, May 1, 2008 at 9:59 AM, Mika Fischer <mika.fischer@zoopnet.de> wrote:
> Yinghai Lu schrieb:
>
>
> > On Thu, May 1, 2008 at 5:09 AM, Mika Fischer <mika.fischer@zoopnet.de> wrote:
>  >> Yinghai Lu schrieb:
>  >>
>  >>> loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.
>  >>  >
>  >>  > so user don't need to add mtrr_chunk_size and mtrr_gran_size,
>  >>  >
>  >>  > if optimal value is not found, print out all list to help select less optimal
>  >>  > value.
>  >>  >
>  >>  > add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.
>  >>
>  >>  On my system x86-latest + this patch and using no boot options gives me
>  >>  this /proc/mtrr:
>  >>  reg00: base=0x00000000 (   0MB), size=2048MB: write-back, count=1
>  >>  reg01: base=0x80000000 (2048MB), size= 512MB: write-back, count=1
>  >>  reg02: base=0xa0000000 (2560MB), size= 256MB: write-back, count=1
>  >>  reg03: base=0xb0000000 (2816MB), size= 256MB: write-back, count=1
>  >>  reg04: base=0xbf700000 (3063MB), size=   1MB: uncachable, count=1
>  >>  reg05: base=0xbf800000 (3064MB), size=   8MB: uncachable, count=1
>  >>  reg06: base=0x100000000 (4096MB), size=1024MB: write-back, count=1
>  >>
>  >>  Which is OK. It could probably collapse reg01-reg03 into one but that's
>  >>  a minor issue (for me at least, there are probably cases where
>  >>  collapsing them might save the user from having to specify the
>  >>  mtrr_spare_reg_nr boot option).
>  >
>  > yes. please try mtrr_spare_reg_nr=3 or etc.
>
>  Sure this works. But that was my point exactly. It should be possible to
>  figure out the better configuration automatically so that I *don't* have
>  to specify mtrr_spare_reg_nr=3.
>
>  Or in other words: If there are multiple equivalent configurations that
>  don't lose any RAM(!), the one with the most free MTRR regs should be
>  preferred.
>
>  AFAICT you loop over the chunk size and stop when you have found a
>  configuration that leaves the number of free MTRR registers requested
>  (default 1).
>
>  This will almost always result in a configuration where you have
>  *exactly* the number of requested free regs available, even if a more
>  efficient configuration was possible.

OK, will send another version out.

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2
  2008-05-01  8:00               ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect Yinghai Lu
                                   ` (2 preceding siblings ...)
  2008-05-01 15:09                 ` Randy Dunlap
@ 2008-05-01 18:57                 ` Yinghai Lu
  2008-05-01 19:42                   ` H. Peter Anvin
  2008-05-02  0:52                   ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v3 Yinghai Lu
  3 siblings, 2 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-05-01 18:57 UTC (permalink / raw)
  To: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, Mika Fischer
  Cc: linux-kernel@vger.kernel.org


loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.

so user don't need to add mtrr_chunk_size and mtrr_gran_size

if optimal value is not found, print out all list to help select less optimal
value.

add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.

v2: find the one with more spare entries
      if the specify mtrr_chunk_size and mtrr_gran_size if not good, will try to find one 

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -610,28 +610,6 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
-#ifdef CONFIG_MTRR_SANITIZER
-static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
-#else
-static int enable_mtrr_cleanup __initdata = -1;
-#endif
-
-static int __init disable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 0;
-	return 0;
-}
-early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
-
-static int __init enable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 1;
-	return 0;
-}
-early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
-
 /* should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM 256
 
@@ -702,13 +680,15 @@ subtract_range(struct res_range *range, 
 			continue;
 		}
 
-		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+		if (start <= range[j].start && end < range[j].end &&
+		    range[j].start < end + 1) {
 			range[j].start = end + 1;
 			continue;
 		}
 
 
-		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+		if (start > range[j].start && end >= range[j].end &&
+		    range[j].end > start - 1) {
 			range[j].end = start - 1;
 			continue;
 		}
@@ -743,18 +723,119 @@ static int __init cmp_range(const void *
 	return start1 - start2;
 }
 
+struct var_mtrr_range_state {
+	unsigned long base_pfn;
+	unsigned long size_pfn;
+	mtrr_type type;
+};
+
+struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+
+static int __init
+x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+		       unsigned long extra_remove_base,
+		       unsigned long extra_remove_size)
+{
+	unsigned long i, base, size;
+	mtrr_type type;
+
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		base = range_state[i].base_pfn;
+		size = range_state[i].size_pfn;
+		nr_range = add_range_with_merge(range, nr_range, base,
+						base + size - 1);
+	}
+	printk(KERN_DEBUG "After WB checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+				 range[i].start, range[i].end + 1);
+
+	/* take out UC ranges */
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
+		if (type != MTRR_TYPE_UNCACHABLE)
+			continue;
+		size = range_state[i].size_pfn;
+		if (!size)
+			continue;
+		base = range_state[i].base_pfn;
+		subtract_range(range, base, base + size - 1);
+	}
+	if (extra_remove_size)
+		subtract_range(range, extra_remove_base,
+				 extra_remove_base + extra_remove_size  - 1);
+
+	/* get new range num */
+	nr_range = 0;
+	for (i = 0; i < RANGE_NUM; i++) {
+		if (!range[i].end)
+			continue;
+		nr_range++;
+	}
+	printk(KERN_DEBUG "After UC checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+			 range[i].start, range[i].end + 1);
+
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	printk(KERN_DEBUG "After sorting\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+				 range[i].start, range[i].end + 1);
+
+	return nr_range;
+}
+
+static struct res_range __initdata range[RANGE_NUM];
+
+#ifdef CONFIG_MTRR_SANITIZER
+
+static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
+{
+	unsigned long sum;
+	int i;
+
+	sum = 0;
+	for (i = 0; i < nr_range; i++)
+		sum += range[i].end + 1 - range[i].start;
+
+	return sum;
+}
+
+static int enable_mtrr_cleanup __initdata =
+				 CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 0;
+	return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 1;
+	return 0;
+}
+early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
+
 struct var_mtrr_state {
 	unsigned long	range_startk;
 	unsigned long	range_sizek;
 	unsigned long	chunk_sizek;
 	unsigned long	gran_sizek;
 	unsigned int	reg;
-	unsigned int	address_bits;
 };
 
 static void __init
 set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
-		unsigned char type, unsigned address_bits)
+		unsigned char type, unsigned int address_bits)
 {
 	u32 base_lo, base_hi, mask_lo, mask_hi;
 	u64 base, mask;
@@ -781,10 +862,34 @@ set_var_mtrr(unsigned int reg, unsigned 
 	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
 }
 
+static void __init
+save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+		unsigned char type)
+{
+	range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
+	range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
+	range_state[reg].type = type;
+}
+
+static void __init
+set_var_mtrr_all(unsigned int address_bits)
+{
+	unsigned long basek, sizek;
+	unsigned char type;
+	unsigned int reg;
+
+	for (reg = 0; reg < num_var_ranges; reg++) {
+		basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
+		sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
+		type = range_state[reg].type;
+
+		set_var_mtrr(reg, basek, sizek, type, address_bits);
+	}
+}
+
 static unsigned int __init
 range_to_mtrr(unsigned int reg, unsigned long range_startk,
-	      unsigned long range_sizek, unsigned char type,
-	      unsigned address_bits)
+	      unsigned long range_sizek, unsigned char type)
 {
 	if (!range_sizek || (reg >= num_var_ranges))
 		return reg;
@@ -803,12 +908,12 @@ range_to_mtrr(unsigned int reg, unsigned
 			align = max_align;
 
 		sizek = 1 << align;
-		printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
+		printk(KERN_DEBUG "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
 			reg, range_startk >> 10, sizek >> 10,
 			(type == MTRR_TYPE_UNCACHABLE)?"UC":
 			    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
 			);
-		set_var_mtrr(reg++, range_startk, sizek, type, address_bits);
+		save_var_mtrr(reg++, range_startk, sizek, type);
 		range_startk += sizek;
 		range_sizek -= sizek;
 		if (reg >= num_var_ranges)
@@ -817,10 +922,12 @@ range_to_mtrr(unsigned int reg, unsigned
 	return reg;
 }
 
-static void __init
-range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
+static unsigned __init
+range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
+			unsigned long sizek)
 {
 	unsigned long hole_basek, hole_sizek;
+	unsigned long second_basek, second_sizek;
 	unsigned long range0_basek, range0_sizek;
 	unsigned long range_basek, range_sizek;
 	unsigned long chunk_sizek;
@@ -828,64 +935,86 @@ range_to_mtrr_with_hole(struct var_mtrr_
 
 	hole_basek = 0;
 	hole_sizek = 0;
+	second_basek = 0;
+	second_sizek = 0;
 	chunk_sizek = state->chunk_sizek;
 	gran_sizek = state->gran_sizek;
 
 	/* align with gran size, prevent small block used up MTRRs */
 	range_basek = ALIGN(state->range_startk, gran_sizek);
 	if ((range_basek > basek) && basek)
-		return;
-	range_sizek = ALIGN(state->range_sizek - (range_basek - state->range_startk), gran_sizek);
+		return second_sizek;
+	state->range_sizek -= (range_basek - state->range_startk);
+	range_sizek = ALIGN(state->range_sizek, gran_sizek);
 
-	while (range_basek + range_sizek > (state->range_startk + state->range_sizek)) {
+	while (range_sizek > state->range_sizek) {
 		range_sizek -= gran_sizek;
 		if (!range_sizek)
-			return;
+			return second_sizek;
 	}
-	state->range_startk = range_basek;
 	state->range_sizek = range_sizek;
 
 	/* try to append some small hole */
 	range0_basek = state->range_startk;
 	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
 	if (range0_sizek == state->range_sizek) {
-			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10);
-			state->reg = range_to_mtrr(state->reg, range0_basek,
-				state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
-		return;
-	} else if (basek) {
-	    while (range0_basek + range0_sizek - chunk_sizek > basek) {
+		printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", range0_basek<<10,
+				(range0_basek + state->range_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, range0_basek,
+				state->range_sizek, MTRR_TYPE_WRBACK);
+		return second_sizek;
+	}
+
+	range0_sizek -= chunk_sizek;
+	if (range0_sizek && sizek) {
+	    while (range0_basek + range0_sizek > (basek + sizek)) {
 		range0_sizek -= chunk_sizek;
 		if (!range0_sizek)
 			break;
 	    }
 	}
 
+	if (range0_sizek) {
+		printk(KERN_DEBUG "range0: %016lx - %016lx\n", range0_basek<<10,
+				(range0_basek + range0_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, range0_basek,
+				range0_sizek, MTRR_TYPE_WRBACK);
 
-	if (range0_sizek > chunk_sizek)
-		range0_sizek -= chunk_sizek;
-	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
-	state->reg = range_to_mtrr(state->reg, range0_basek,
-			range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+	}
 
 	range_basek = range0_basek + range0_sizek;
 	range_sizek = chunk_sizek;
 
-	if ((range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) &&
-	    (range_basek + range_sizek <= basek)) {
-		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
+	if (range_basek + range_sizek > basek &&
+	    range_basek + range_sizek <= (basek + sizek)) {
+		second_basek = basek;
+		second_sizek = range_basek + range_sizek - basek;
+	}
+
+	if (range_sizek - (state->range_sizek - range0_sizek) - second_sizek
+	     < (chunk_sizek >> 1)) {
+		hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
+				 - second_sizek;
 		hole_basek = range_basek + range_sizek - hole_sizek;
-	} else
+	} else {
 		range_sizek = state->range_sizek - range0_sizek;
+		second_basek = 0;
+		second_sizek = 0;
+	}
 
-	printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10);
-	state->reg = range_to_mtrr(state->reg, range_basek,
-			range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+	printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
+			 (range_basek + range_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
+					 MTRR_TYPE_WRBACK);
 	if (hole_sizek) {
-		printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10);
-		state->reg = range_to_mtrr(state->reg, hole_basek,
-				hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits);
+		printk(KERN_DEBUG "hole: %016lx - %016lx\n", hole_basek<<10,
+				 (hole_basek + hole_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
+						 MTRR_TYPE_UNCACHABLE);
+
 	}
+
+	return second_sizek;
 }
 
 static void __init
@@ -893,6 +1022,7 @@ set_var_mtrr_range(struct var_mtrr_state
 		   unsigned long size_pfn)
 {
 	unsigned long basek, sizek;
+	unsigned long second_sizek = 0;
 
 	if (state->reg >= num_var_ranges)
 		return;
@@ -901,21 +1031,19 @@ set_var_mtrr_range(struct var_mtrr_state
 	sizek = size_pfn << (PAGE_SHIFT - 10);
 
 	/* See if I can merge with the last range */
-	if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) {
+	if ((basek <= 1024) ||
+	    (state->range_startk + state->range_sizek == basek)) {
 		unsigned long endk = basek + sizek;
 		state->range_sizek = endk - state->range_startk;
 		return;
 	}
 	/* Write the range mtrrs */
-	if (state->range_sizek != 0) {
-		range_to_mtrr_with_hole(state, basek);
+	if (state->range_sizek != 0)
+		second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
 
-		state->range_startk = 0;
-		state->range_sizek = 0;
-	}
 	/* Allocate an msr */
-	state->range_startk = basek;
-	state->range_sizek  = sizek;
+	state->range_startk = basek + second_sizek;
+	state->range_sizek  = sizek - second_sizek;
 }
 
 /* mininum size of mtrr block that can take hole */
@@ -931,7 +1059,7 @@ static int __init parse_mtrr_chunk_size_
 early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
 
 /* granity of mtrr of block */
-static u64 mtrr_gran_size __initdata = (1ULL<<20);
+static u64 mtrr_gran_size __initdata;
 
 static int __init parse_mtrr_gran_size_opt(char *p)
 {
@@ -942,91 +1070,79 @@ static int __init parse_mtrr_gran_size_o
 }
 early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
 
-static void __init
+static int nr_mtrr_spare_reg __initdata =
+				 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
+
+static int __init parse_mtrr_spare_reg(char *arg)
+{
+	if (arg)
+		nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
+	return 0;
+}
+
+early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
+
+static int __init
 x86_setup_var_mtrrs(struct res_range *range, int nr_range,
-		    unsigned address_bits)
+		    u64 chunk_size, u64 gran_size)
 {
 	struct var_mtrr_state var_state;
 	int i;
+	int num_reg;
 
 	var_state.range_startk	= 0;
 	var_state.range_sizek	= 0;
 	var_state.reg		= 0;
-	var_state.address_bits	= address_bits;
-	var_state.chunk_sizek	= mtrr_chunk_size >> 10;
-	var_state.gran_sizek	= mtrr_gran_size >> 10;
+	var_state.chunk_sizek	= chunk_size >> 10;
+	var_state.gran_sizek	= gran_size >> 10;
+
+	memset(range_state, 0, sizeof(range_state));
 
 	/* Write the range etc */
 	for (i = 0; i < nr_range; i++)
-		set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1);
+		set_var_mtrr_range(&var_state, range[i].start,
+				   range[i].end - range[i].start + 1);
 
 	/* Write the last range */
-	range_to_mtrr_with_hole(&var_state, 0);
-	printk(KERN_INFO "DONE variable MTRRs\n");
+	if (var_state.range_sizek != 0)
+		range_to_mtrr_with_hole(&var_state, 0, 0);
+	printk(KERN_DEBUG "DONE variable MTRRs\n");
+
+	num_reg = var_state.reg;
 	/* Clear out the extra MTRR's */
 	while (var_state.reg < num_var_ranges) {
-		set_var_mtrr(var_state.reg, 0, 0, 0, var_state.address_bits);
+		save_var_mtrr(var_state.reg, 0, 0, 0);
 		var_state.reg++;
 	}
-}
-
-static int __init
-x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
-		       unsigned long extra_remove_base,
-		       unsigned long extra_remove_size)
-{
-	unsigned long i, base, size;
-	mtrr_type type;
-
-	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
-		if (type != MTRR_TYPE_WRBACK)
-			continue;
-		nr_range = add_range_with_merge(range, nr_range, base, base + size - 1);
-	}
-	printk(KERN_INFO "After WB checking\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
 
-	/* take out UC ranges */
-	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
-		if (type != MTRR_TYPE_UNCACHABLE)
-			continue;
-		if (!size)
-			continue;
-		subtract_range(range, base, base + size - 1);
-	}
-	if (extra_remove_size)
-		subtract_range(range, extra_remove_base,  extra_remove_base + extra_remove_size  - 1);
+	return num_reg;
+}
 
-	/* get new range num */
-	nr_range = 0;
-	for (i = 0; i < RANGE_NUM; i++) {
-		if (!range[i].end)
-			continue;
-		nr_range++;
-	}
-	printk(KERN_INFO "After UC checking\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+struct mtrr_cleanup_result {
+	unsigned long gran_sizek;
+	unsigned long chunk_sizek;
+	unsigned long lose_cover_sizek;
+	unsigned int num_reg;
+	int bad;
+};
 
-	/* sort the ranges */
-	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
-	printk(KERN_INFO "After sorting\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+/* 2G, 1G, ..., 1M, so need 12 items and half maxtrix need 13*6 */
+#define NUM_RESULT 78
 
-	return nr_range;
-}
+static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
+static struct res_range __initdata range_new[RANGE_NUM];
+static unsigned long __initdata min_loss_pfn[RANGE_NUM];
 
 static int __init mtrr_cleanup(unsigned address_bits)
 {
 	unsigned long extra_remove_base, extra_remove_size;
 	unsigned long i, base, size, def, dummy;
-	struct res_range range[RANGE_NUM];
 	mtrr_type type;
-	int nr_range;
+	int nr_range, nr_range_new;
+	u64 chunk_size, gran_size;
+	unsigned long range_sums, range_sums_new;
+	int index_good;
+	int num_reg_good;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1038,10 +1154,20 @@ static int __init mtrr_cleanup(unsigned 
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
 
+	/* get it and store it aside */
+	memset(range_state, 0, sizeof(range_state));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		range_state[i].base_pfn = base;
+		range_state[i].size_pfn = size;
+		range_state[i].type = type;
+	}
+
 	/* check entries number */
 	memset(num, 0, sizeof(num));
 	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
+		type = range_state[i].type;
+		size = range_state[i].size_pfn;
 		if (type >= MTRR_NUM_TYPES)
 			continue;
 		if (!size)
@@ -1064,13 +1190,151 @@ static int __init mtrr_cleanup(unsigned 
 		extra_remove_base = 1 << (32 - PAGE_SHIFT);
 		extra_remove_size = (mtrr_tom2>>PAGE_SHIFT) - extra_remove_base;
 	}
-	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size);
+	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
+					  extra_remove_size);
+	range_sums = sum_ranges(range, nr_range);
+	printk(KERN_INFO "total RAM coverred: %ldM\n",
+			 range_sums >> (20 - PAGE_SHIFT));
+
+	if (mtrr_chunk_size && mtrr_gran_size) {
+		int num_reg;
+
+		/* convert ranges to var ranges state */
+		num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
+						 mtrr_gran_size);
+
+		/* we got new setting in range_state, check it */
+		memset(range_new, 0, sizeof(range_new));
+		nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+					 extra_remove_base, extra_remove_size);
+		range_sums_new = sum_ranges(range_new, nr_range_new);
+
+		i = 0;
+		result[i].chunk_sizek = mtrr_chunk_size >> 10;
+		result[i].gran_sizek = mtrr_gran_size >> 10;
+		result[i].num_reg = num_reg;
+		if (range_sums < range_sums_new) {
+			result[i].lose_cover_sizek = (range_sums_new - range_sums) << (PAGE_SHIFT - 10);
+			result[i].bad = 1;
+		} else
+			result[i].lose_cover_sizek = (range_sums - range_sums_new) << (PAGE_SHIFT - 10);
+
+		printk(KERN_INFO " %sgran_size: %ldM  \tchunk_size: %ldM  \t",
+			 result[i].bad?" BAD ":"", result[i].gran_sizek >> 10,
+			 result[i].chunk_sizek >> 10);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
+			 result[i].num_reg, result[i].bad?"-":"",
+			 result[i].lose_cover_sizek >> 10);
+		if (!result[i].bad) {
+			set_var_mtrr_all(address_bits);
+			return 1;
+		}
+		printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, will find optimal one\n");
+		memset(result, 0, sizeof(result[0]));
+	}
+
+	i = 0;
+	memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
+	memset(result, 0, sizeof(result));
+	for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
+		for (chunk_size = gran_size; chunk_size < (1ULL<<32);
+		     chunk_size <<= 1) {
+			int num_reg;
+
+			printk(KERN_INFO "\ngran_size: %lldM   chunk_size_size: %lldM\n", gran_size >> 20, chunk_size >> 20);
+			if (i >= NUM_RESULT)
+				continue;
+
+			/* convert ranges to var ranges state */
+			num_reg = x86_setup_var_mtrrs(range, nr_range,
+							 chunk_size, gran_size);
+
+			/* we got new setting in range_state, check it */
+			memset(range_new, 0, sizeof(range_new));
+			nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+					 extra_remove_base, extra_remove_size);
+			range_sums_new = sum_ranges(range_new, nr_range_new);
+
+			result[i].chunk_sizek = chunk_size >> 10;
+			result[i].gran_sizek = gran_size >> 10;
+			result[i].num_reg = num_reg;
+			if (range_sums < range_sums_new) {
+				result[i].lose_cover_sizek = (range_sums_new - range_sums) << (PAGE_SHIFT - 10);
+				result[i].bad = 1;
+			} else
+				result[i].lose_cover_sizek = (range_sums - range_sums_new) << (PAGE_SHIFT - 10);
+
+			if (!result[i].bad && (range_sums - range_sums_new <
+				 min_loss_pfn[num_reg]))
+					min_loss_pfn[num_reg] = range_sums - range_sums_new;
+			i++;
+		}
+	}
+
+	/* print out all */
+	for (i = 0; i < NUM_RESULT; i++) {
+		printk(KERN_INFO "%sgran_size: %ldM  \tchunk_size: %ldM  \t",
+			 result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
+			 result[i].chunk_sizek >> 10);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
+			 result[i].num_reg, result[i].bad?"-":"",
+			 result[i].lose_cover_sizek >> 10);
+	}
+
+	/* try to find the optimal index */
+	if (nr_mtrr_spare_reg >= num_var_ranges)
+		nr_mtrr_spare_reg = num_var_ranges - 1;
+	num_reg_good = -1;
+	for (i = 1; i < num_var_ranges + 1 - nr_mtrr_spare_reg; i++) {
+		if (!min_loss_pfn[i]) {
+			num_reg_good = i;
+			break;
+		}
+	}
+
+	index_good = -1;
+	if (num_reg_good != -1) {
+		for (i = 0; i < NUM_RESULT; i++) {
+			if (!result[i].bad && result[i].num_reg == num_reg_good) {
+				index_good = i;
+				break;
+			}
+		}
+	}
 
-	/* convert ranges to var ranges state */
-	x86_setup_var_mtrrs(range, nr_range, address_bits);
+	if (index_good != -1) {
+		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
+		i = index_good;
+		printk(KERN_INFO "gran_size: %ldM  \tchunk_size: %ldM  \t",
+				result[i].gran_sizek >> 10,
+				result[i].chunk_sizek >> 10);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %ldM \n",
+				result[i].num_reg,
+				result[i].lose_cover_sizek >> 10);
+		/* convert ranges to var ranges state */
+		chunk_size = result[i].chunk_sizek;
+		chunk_size <<= 10;
+		gran_size = result[i].gran_sizek;
+		gran_size <<= 10;
+		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
+		set_var_mtrr_all(address_bits);
 
-	return 1;
+		return 1;
+	}
+
+	printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
+	printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
+
+	return 0;
+}
+#else
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+	return 0;
 }
+#endif
+
+static int __initdata changed_by_mtrr_cleanup;
 
 static int disable_mtrr_trim;
 
@@ -1111,7 +1375,8 @@ int __init amd_special_default_mtrr(void
 	return 0;
 }
 
-static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
+static u64 __init real_trim_memory(unsigned long start_pfn,
+				   unsigned long limit_pfn)
 {
 	u64 trim_start, trim_size;
 	trim_start = start_pfn;
@@ -1138,9 +1403,8 @@ int __init mtrr_trim_uncached_memory(uns
 {
 	unsigned long i, base, size, highest_pfn = 0, def, dummy;
 	mtrr_type type;
-	struct res_range range[RANGE_NUM];
 	int nr_range;
-	u64 total_real_trim_size;
+	u64 total_trim_size;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1155,11 +1419,22 @@ int __init mtrr_trim_uncached_memory(uns
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
 
-	/* Find highest cached pfn */
+	/* get it and store it aside */
+	memset(range_state, 0, sizeof(range_state));
 	for (i = 0; i < num_var_ranges; i++) {
 		mtrr_if->get(i, &base, &size, &type);
+		range_state[i].base_pfn = base;
+		range_state[i].size_pfn = size;
+		range_state[i].type = type;
+	}
+
+	/* Find highest cached pfn */
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
 		if (type != MTRR_TYPE_WRBACK)
 			continue;
+		base = range_state[i].base_pfn;
+		size = range_state[i].size_pfn;
 		if (highest_pfn < base + size)
 			highest_pfn = base + size;
 	}
@@ -1177,9 +1452,10 @@ int __init mtrr_trim_uncached_memory(uns
 	/* check entries number */
 	memset(num, 0, sizeof(num));
 	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
+		type = range_state[i].type;
 		if (type >= MTRR_NUM_TYPES)
 			continue;
+		size = range_state[i].size_pfn;
 		if (!size)
 			type = MTRR_NUM_TYPES;
 		num[type]++;
@@ -1205,26 +1481,28 @@ int __init mtrr_trim_uncached_memory(uns
 	}
 	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
 
-	total_real_trim_size = 0;
+	total_trim_size = 0;
 	/* check the head */
 	if (range[0].start)
-		total_real_trim_size += real_trim_memory(0, range[0].start);
+		total_trim_size += real_trim_memory(0, range[0].start);
 	/* check the holes */
 	for (i = 0; i < nr_range - 1; i++) {
 		if (range[i].end + 1 < range[i+1].start)
-			total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start);
+			total_trim_size += real_trim_memory(range[i].end + 1,
+							    range[i+1].start);
 	}
 	/* check the top */
 	i = nr_range - 1;
 	if (range[i].end + 1 < end_pfn)
-		total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn);
+		total_trim_size += real_trim_memory(range[i].end + 1,
+							 end_pfn);
 
-	if (total_real_trim_size) {
+	if (total_trim_size) {
 		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
 			" all of memory, losing %lluMB of RAM.\n",
-			total_real_trim_size >> 20);
+			total_trim_size >> 20);
 
-		if (enable_mtrr_cleanup < 1)
+		if (!changed_by_mtrr_cleanup)
 			WARN_ON(1);
 
 		printk(KERN_INFO "update e820 for mtrr\n");
@@ -1314,8 +1592,10 @@ void __init mtrr_bp_init(void)
 		if (use_intel()) {
 			get_mtrr_state();
 
-			if (mtrr_cleanup(phys_addr))
+			if (mtrr_cleanup(phys_addr)) {
+				changed_by_mtrr_cleanup = 1;
 				mtrr_if->set_all();
+			}
 
 		}
 	}
@@ -1355,7 +1635,7 @@ static int __init mtrr_init_finialize(vo
 	if (!mtrr_if)
 		return 0;
 	if (use_intel()) {
-		if (enable_mtrr_cleanup < 1)
+		if (!changed_by_mtrr_cleanup)
 			mtrr_state_warn();
 	} else {
 		/* The CPUs haven't MTRR and seem to not support SMP. They have
Index: linux-2.6/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.orig/Documentation/kernel-parameters.txt
+++ linux-2.6/Documentation/kernel-parameters.txt
@@ -613,8 +613,17 @@ and is between 256 and 4096 characters. 
 			that could hold holes aka. UC entries.
 
 	mtrr_gran_size=nn[KMG] [X86]
-			used for mtrr cleanup. It is granity of mtrr block.
-			Big value could prevent small alignment use up MTRRs.
+			Used for mtrr cleanup. It is granularity of mtrr block.
+			Default is 1.
+			Large value could prevent small alignment from
+			using up MTRRs.
+
+	mtrr_spare_reg_nr=n [X86]
+			Format: <integer>
+			Range: 0,7 : spare reg number
+			Default : 1
+			Used for mtrr cleanup. It is spare mtrr entries number.
+			Set to 2 or more if your graphical card needs more.
 
 	disable_mtrr_trim [X86, Intel and AMD only]
 			By default the kernel will trim any uncacheable
Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig
+++ linux-2.6/arch/x86/Kconfig
@@ -1100,6 +1100,15 @@ config MTRR_SANITIZER_ENABLE_DEFAULT
 	help
 	  Enable mtrr cleanup default value
 
+config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
+	int "MTRR cleanup spare reg num (0-7)"
+	range 0 7
+	default "1"
+	depends on MTRR_SANITIZER
+	help
+	  mtrr cleanup spare entries default, it can be changed via
+	  mtrr_spare_reg_nr=
+
 config X86_PAT
 	bool
 	prompt "x86 PAT support"

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2
  2008-05-01 18:57                 ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2 Yinghai Lu
@ 2008-05-01 19:42                   ` H. Peter Anvin
  2008-05-01 21:02                     ` Yinghai Lu
  2008-05-02  0:52                   ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v3 Yinghai Lu
  1 sibling, 1 reply; 89+ messages in thread
From: H. Peter Anvin @ 2008-05-01 19:42 UTC (permalink / raw)
  To: yhlu.kernel
  Cc: Andrew Morton, Ingo Molnar, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org

Yinghai Lu wrote:
> loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.

Why stopping at 2 GB?


^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2
  2008-05-01 19:42                   ` H. Peter Anvin
@ 2008-05-01 21:02                     ` Yinghai Lu
  2008-05-01 21:10                       ` H. Peter Anvin
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-05-01 21:02 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andrew Morton, Ingo Molnar, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org

On Thu, May 1, 2008 at 12:42 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> Yinghai Lu wrote:
>
> > loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal
> value.
> >
>
>  Why stopping at 2 GB?

if you select 4g for chunk size, we don't need to convert that from
continuous to discrete to make X server driver happen.

actually the code could support any chunk_size...

for example: 16 g system
orginal:
0-4g WB
3.5g-4g UC
4g-8g WB
8g-16g WB
16g-16.5g WB

if you set chunk size to 16g, and gran size <= 512M
you will get
0-16g WB
3.5g-4g UC
16g-16.5g WB

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2
  2008-05-01 21:02                     ` Yinghai Lu
@ 2008-05-01 21:10                       ` H. Peter Anvin
  2008-05-01 21:20                         ` Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: H. Peter Anvin @ 2008-05-01 21:10 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org

Yinghai Lu wrote:
> On Thu, May 1, 2008 at 12:42 PM, H. Peter Anvin <hpa@zytor.com> wrote:
>> Yinghai Lu wrote:
>>
>>> loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal
>> value.
>>  Why stopping at 2 GB?
> 
> if you select 4g for chunk size, we don't need to convert that from
> continuous to discrete to make X server driver happen.
> 
> actually the code could support any chunk_size...
> 
> for example: 16 g system
> orginal:
> 0-4g WB
> 3.5g-4g UC
> 4g-8g WB
> 8g-16g WB
> 16g-16.5g WB
> 
> if you set chunk size to 16g, and gran size <= 512M
> you will get
> 0-16g WB
> 3.5g-4g UC
> 16g-16.5g WB
> 

Yes, 16 GB systems are already mainstream; 32 GB is common, so I don't 
see any reason to stop at 2 GB.  Instead, it should loop up to the 
physical address size.

	-hpa

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2
  2008-05-01 21:10                       ` H. Peter Anvin
@ 2008-05-01 21:20                         ` Yinghai Lu
  2008-05-01 21:26                           ` H. Peter Anvin
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-05-01 21:20 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andrew Morton, Ingo Molnar, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org

On Thu, May 1, 2008 at 2:10 PM, H. Peter Anvin <hpa@zytor.com> wrote:
>
> Yinghai Lu wrote:
>
> > On Thu, May 1, 2008 at 12:42 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> >
> > > Yinghai Lu wrote:
> > >
> > >
> > > > loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal
> > > >
> > > value.
> > >  Why stopping at 2 GB?
> > >
> >
> > if you select 4g for chunk size, we don't need to convert that from
> > continuous to discrete to make X server driver happen.
> >
> > actually the code could support any chunk_size...
> >
> > for example: 16 g system
> > orginal:
> > 0-4g WB
> > 3.5g-4g UC
> > 4g-8g WB
> > 8g-16g WB
> > 16g-16.5g WB
> >
> > if you set chunk size to 16g, and gran size <= 512M
> > you will get
> > 0-16g WB
> > 3.5g-4g UC
> > 16g-16.5g WB
> >
> >
>
>  Yes, 16 GB systems are already mainstream; 32 GB is common, so I don't see
> any reason to stop at 2 GB.  Instead, it should loop up to the physical
> address size.

but our objective is that has 0xd0000000-0xe0000000 (WC) not to be
overlapping with other MTRR entries (UC)..

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2
  2008-05-01 21:20                         ` Yinghai Lu
@ 2008-05-01 21:26                           ` H. Peter Anvin
  2008-05-01 21:31                             ` Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: H. Peter Anvin @ 2008-05-01 21:26 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org

Yinghai Lu wrote:
>>>
>>  Yes, 16 GB systems are already mainstream; 32 GB is common, so I don't see
>> any reason to stop at 2 GB.  Instead, it should loop up to the physical
>> address size.
> 
> but our objective is that has 0xd0000000-0xe0000000 (WC) not to be
> overlapping with other MTRR entries (UC)..
> 

So, pray tell, how comes this algorithm can come up with a non-solution 
to the problem presented to it?

Overall, I'm feeling there is something really completely wrong if this 
needs manual tunables of any sort.

	-hpa

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2
  2008-05-01 21:26                           ` H. Peter Anvin
@ 2008-05-01 21:31                             ` Yinghai Lu
  2008-05-01 21:33                               ` H. Peter Anvin
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-05-01 21:31 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andrew Morton, Ingo Molnar, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org

On Thu, May 1, 2008 at 2:26 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> Yinghai Lu wrote:
>
> >
> > >
> > > >
> > > >
> > >  Yes, 16 GB systems are already mainstream; 32 GB is common, so I don't
> see
> > > any reason to stop at 2 GB.  Instead, it should loop up to the physical
> > > address size.
> > >
> >
> > but our objective is that has 0xd0000000-0xe0000000 (WC) not to be
> > overlapping with other MTRR entries (UC)..
> >
> >
>
>  So, pray tell, how comes this algorithm can come up with a non-solution to
> the problem presented to it?
>
>  Overall, I'm feeling there is something really completely wrong if this
> needs manual tunables of any sort.

the problem is BIOS set MTRR like BIG WB - SOME SAMLL UC to spare mtrr regs.

but later X server driver want to set some range to WC. that already
is fallen in UC...

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2
  2008-05-01 21:31                             ` Yinghai Lu
@ 2008-05-01 21:33                               ` H. Peter Anvin
  2008-05-01 21:44                                 ` Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: H. Peter Anvin @ 2008-05-01 21:33 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org

Yinghai Lu wrote:
>>  Overall, I'm feeling there is something really completely wrong if this
>> needs manual tunables of any sort.
> 
> the problem is BIOS set MTRR like BIG WB - SOME SAMLL UC to spare mtrr regs.
> 
> but later X server driver want to set some range to WC. that already
> is fallen in UC...

That's not the point.  I understand you want to flatten the layout.  The 
point is: why do you need manual tunables for the algorithm to do the 
right thing?

	-hpa

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2
  2008-05-01 21:33                               ` H. Peter Anvin
@ 2008-05-01 21:44                                 ` Yinghai Lu
  2008-05-01 21:49                                   ` H. Peter Anvin
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-05-01 21:44 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andrew Morton, Ingo Molnar, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org

On Thu, May 1, 2008 at 2:33 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> Yinghai Lu wrote:
>
> >
> > >  Overall, I'm feeling there is something really completely wrong if this
> > > needs manual tunables of any sort.
> > >
> >
> > the problem is BIOS set MTRR like BIG WB - SOME SAMLL UC to spare mtrr
> regs.
> >
> > but later X server driver want to set some range to WC. that already
> > is fallen in UC...
> >
>
>  That's not the point.  I understand you want to flatten the layout.  The
> point is: why do you need manual tunables for the algorithm to do the right
> thing?

optimal result is not losing covering for ranges that is originally
covered, and still keep as many of spare mtrr entries for X server
driver.
we only have 8 mtrrs, could lose some covering because of run out of mtrr regs.
So we need to search it according to chunk/gran with ram ranges that
is defined by old mtrr layout.

and if we can not find the optimal setting, user could select one
setting (chunk/gran size) to boot next time, but he will lose some
covering.
for some regions. later trim_mtrr will remove those range from e820 map

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2
  2008-05-01 21:44                                 ` Yinghai Lu
@ 2008-05-01 21:49                                   ` H. Peter Anvin
  2008-05-01 22:52                                     ` Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: H. Peter Anvin @ 2008-05-01 21:49 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org

Yinghai Lu wrote:
>>>
>>  That's not the point.  I understand you want to flatten the layout.  The
>> point is: why do you need manual tunables for the algorithm to do the right
>> thing?
> 
> optimal result is not losing covering for ranges that is originally
> covered, and still keep as many of spare mtrr entries for X server
> driver.
> we only have 8 mtrrs, could lose some covering because of run out of mtrr regs.
> So we need to search it according to chunk/gran with ram ranges that
> is defined by old mtrr layout.

Yes.  You have a search space of less than 1000 possible combinations 
(64..20 bits), so it hardly is any reason to not search the entire 
universe of possibilities, even if by exhaustive search.

Now, if even that searching can't come up with the optimal solution (if 
one exists) in all cases, then the algorithm is broken.

> and if we can not find the optimal setting, user could select one
> setting (chunk/gran size) to boot next time, but he will lose some
> covering.
> for some regions. later trim_mtrr will remove those range from e820 map

Right, now we're talking policy, which obviously has to be entered by 
the user.

	-hpa


^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2
  2008-05-01 21:49                                   ` H. Peter Anvin
@ 2008-05-01 22:52                                     ` Yinghai Lu
  2008-05-01 22:57                                       ` H. Peter Anvin
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-05-01 22:52 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andrew Morton, Ingo Molnar, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org

On Thu, May 1, 2008 at 2:49 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> Yinghai Lu wrote:
>
> >
> > >
> > > >
> > > >
> > >  That's not the point.  I understand you want to flatten the layout.
> The
> > > point is: why do you need manual tunables for the algorithm to do the
> right
> > > thing?
> > >
> >
> > optimal result is not losing covering for ranges that is originally
> > covered, and still keep as many of spare mtrr entries for X server
> > driver.
> > we only have 8 mtrrs, could lose some covering because of run out of mtrr
> regs.
> > So we need to search it according to chunk/gran with ram ranges that
> > is defined by old mtrr layout.
> >
>
>  Yes.  You have a search space of less than 1000 possible combinations
> (64..20 bits), so it hardly is any reason to not search the entire universe
> of possibilities, even if by exhaustive search.

only search 78
2g, 1g, ...1m, and half matrix 13 * 6..

and don't need to search than 78.

also if we don't need to get the more spare regs than
mtrr_spare_reg_nr, we could search less... about 10 etc.

>
>  Now, if even that searching can't come up with the optimal solution (if one
> exists) in all cases, then the algorithm is broken.

because we only have 8 mtrrs, and user may specify mtrr_spare_reg_nr =
2 or 3 to get more entries for the graphics cards...
then can not the optimal setting without losing any covering.

so if the optimal is there (only need to search to 2g), it will catch it.

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2
  2008-05-01 22:52                                     ` Yinghai Lu
@ 2008-05-01 22:57                                       ` H. Peter Anvin
  2008-05-01 23:10                                         ` Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: H. Peter Anvin @ 2008-05-01 22:57 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org

Yinghai Lu wrote:
> 
> only search 78
> 2g, 1g, ...1m, and half matrix 13 * 6..
> 
> and don't need to search than 78.
> 

Again, it's not clear to me why there is an inherent limit at 2 GB.

	-hpa

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2
  2008-05-01 22:57                                       ` H. Peter Anvin
@ 2008-05-01 23:10                                         ` Yinghai Lu
  0 siblings, 0 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-05-01 23:10 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andrew Morton, Ingo Molnar, Thomas Gleixner, Gabriel C,
	Mika Fischer, linux-kernel@vger.kernel.org

On Thu, May 1, 2008 at 3:57 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> Yinghai Lu wrote:
>
> >
> > only search 78
> > 2g, 1g, ...1m, and half matrix 13 * 6..
> >
> > and don't need to search than 78.
> >
> >
>
>  Again, it's not clear to me why there is an inherent limit at 2 GB.

above 2g, we will use 4g (chunk size, because if use 4g gran_size will
trim too much RAM). it will get the continous layout like
0-4g WB
3.5g-4G UC

instead of discrete that we want. (chunk_size=2g, gran_size=512M)
0-2g WB
2g-4g WB
3.5g-4g UC

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect
  2008-05-01 11:45                 ` Gabriel C
@ 2008-05-02  0:06                   ` Yinghai Lu
  2008-05-02  0:29                     ` Gabriel C
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-05-02  0:06 UTC (permalink / raw)
  To: Gabriel C
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

On Thu, May 1, 2008 at 4:45 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
> Yinghai Lu wrote:
>  > loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.
>  >
>  > so user don't need to add mtrr_chunk_size and mtrr_gran_size,
>  >
>  > if optimal value is not found, print out all list to help select less optimal
>  > value.
>  >
>  > add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.
>
>  WOW :)
>
>  With this patch all is working fine , no RAM is lost , X is fast ,
>  so far everything else seems to work fine. \o/
>
>  I will test on 32bit tomorrow and stress the box later on today to be sure everything works fine.
>
>  There is my dmesg , meminfo , mtrr output with this patch on top x86-latest :
>
>  http://frugalware.org/~crazy/mtrr_x86-latest/

while look at that you boot log, it seems there is one bug about hole
position. but I look that code, it should already be handled.

Can you send out boot msg and /proc/mtrr when using
disable_mtrr_cleanup command line?

Thanks

Yinghai Lu

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect
  2008-05-02  0:06                   ` Yinghai Lu
@ 2008-05-02  0:29                     ` Gabriel C
  2008-05-02  0:35                       ` Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: Gabriel C @ 2008-05-02  0:29 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

Yinghai Lu wrote:
> On Thu, May 1, 2008 at 4:45 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>> Yinghai Lu wrote:
>>  > loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.
>>  >
>>  > so user don't need to add mtrr_chunk_size and mtrr_gran_size,
>>  >
>>  > if optimal value is not found, print out all list to help select less optimal
>>  > value.
>>  >
>>  > add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.
>>
>>  WOW :)
>>
>>  With this patch all is working fine , no RAM is lost , X is fast ,
>>  so far everything else seems to work fine. \o/
>>
>>  I will test on 32bit tomorrow and stress the box later on today to be sure everything works fine.
>>
>>  There is my dmesg , meminfo , mtrr output with this patch on top x86-latest :
>>
>>  http://frugalware.org/~crazy/mtrr_x86-latest/
> 
> while look at that you boot log, it seems there is one bug about hole
> position. but I look that code, it should already be handled.
> 
> Can you send out boot msg and /proc/mtrr when using
> disable_mtrr_cleanup command line?

Sure , there it is :

http://frugalware.org/~crazy/mtrr_x86-latest/dmesg2
http://frugalware.org/~crazy/mtrr_x86-latest/proc_mtrr2

I'm still using this version of your patch , didn't got any time to update to v2.
If you want me to try v2 tell me , I have some free time in about 30 minutes.

> 
> Thanks
> 
> Yinghai Lu
> 

Gabriel

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect
  2008-05-02  0:29                     ` Gabriel C
@ 2008-05-02  0:35                       ` Yinghai Lu
  2008-05-02  1:18                         ` Gabriel C
  0 siblings, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-05-02  0:35 UTC (permalink / raw)
  To: Gabriel C
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

On Thu, May 1, 2008 at 5:29 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>
> Yinghai Lu wrote:
>  > On Thu, May 1, 2008 at 4:45 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >> Yinghai Lu wrote:
>  >>  > loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.
>  >>  >
>  >>  > so user don't need to add mtrr_chunk_size and mtrr_gran_size,
>  >>  >
>  >>  > if optimal value is not found, print out all list to help select less optimal
>  >>  > value.
>  >>  >
>  >>  > add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.
>  >>
>  >>  WOW :)
>  >>
>  >>  With this patch all is working fine , no RAM is lost , X is fast ,
>  >>  so far everything else seems to work fine. \o/
>  >>
>  >>  I will test on 32bit tomorrow and stress the box later on today to be sure everything works fine.
>  >>
>  >>  There is my dmesg , meminfo , mtrr output with this patch on top x86-latest :
>  >>
>  >>  http://frugalware.org/~crazy/mtrr_x86-latest/
>  >
>  > while look at that you boot log, it seems there is one bug about hole
>  > position. but I look that code, it should already be handled.
>  >
>  > Can you send out boot msg and /proc/mtrr when using
>  > disable_mtrr_cleanup command line?
>
>  Sure , there it is :
>
>  http://frugalware.org/~crazy/mtrr_x86-latest/dmesg2
>  http://frugalware.org/~crazy/mtrr_x86-latest/proc_mtrr2
>
>  I'm still using this version of your patch , didn't got any time to update to v2.
>  If you want me to try v2 tell me , I have some free time in about 30 minutes.
>
original

reg00: base=0xd0000000 (3328MB), size= 256MB: uncachable, count=1
reg01: base=0xe0000000 (3584MB), size= 512MB: uncachable, count=1
reg02: base=0x00000000 (   0MB), size=4096MB: write-back, count=1
reg03: base=0x100000000 (4096MB), size= 512MB: write-back, count=1
reg04: base=0x120000000 (4608MB), size= 128MB: write-back, count=1
reg05: base=0x128000000 (4736MB), size=  64MB: write-back, count=1
reg06: base=0xcf600000 (3318MB), size=   2MB: uncachable, count=1

after clean up

reg00: base=0x00000000 (   0MB), size=2048MB: write-back, count=1
reg01: base=0x80000000 (2048MB), size=1024MB: write-back, count=1
reg02: base=0xc0000000 (3072MB), size= 256MB: write-back, count=1
reg03: base=0xcfe00000 (3326MB), size=   2MB: uncachable, count=1
reg04: base=0x100000000 (4096MB), size= 512MB: write-back, count=1
reg05: base=0x120000000 (4608MB), size= 256MB: write-back, count=1
reg06: base=0x12c000000 (4800MB), size=  64MB: uncachable, count=1

so the hole base is not right, it should be at 3318MB instead of 3326MB.
please hold to test v3 ...

Thanks

Yinghai Lu

^ permalink raw reply	[flat|nested] 89+ messages in thread

* [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v3
  2008-05-01 18:57                 ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2 Yinghai Lu
  2008-05-01 19:42                   ` H. Peter Anvin
@ 2008-05-02  0:52                   ` Yinghai Lu
  2008-05-02  9:40                     ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v4 Yinghai Lu
  1 sibling, 1 reply; 89+ messages in thread
From: Yinghai Lu @ 2008-05-02  0:52 UTC (permalink / raw)
  To: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, Mika Fischer
  Cc: linux-kernel@vger.kernel.org


loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.

so user don't need to add mtrr_chunk_size and mtrr_gran_size

if optimal value is not found, print out all list to help select less optimal
value.

add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.

v2: find the one with more spare entries
v3: fix hole_basek offset

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -610,28 +610,6 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
-#ifdef CONFIG_MTRR_SANITIZER
-static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
-#else
-static int enable_mtrr_cleanup __initdata = -1;
-#endif
-
-static int __init disable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 0;
-	return 0;
-}
-early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
-
-static int __init enable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 1;
-	return 0;
-}
-early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
-
 /* should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM 256
 
@@ -702,13 +680,15 @@ subtract_range(struct res_range *range, 
 			continue;
 		}
 
-		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+		if (start <= range[j].start && end < range[j].end &&
+		    range[j].start < end + 1) {
 			range[j].start = end + 1;
 			continue;
 		}
 
 
-		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+		if (start > range[j].start && end >= range[j].end &&
+		    range[j].end > start - 1) {
 			range[j].end = start - 1;
 			continue;
 		}
@@ -743,18 +723,119 @@ static int __init cmp_range(const void *
 	return start1 - start2;
 }
 
+struct var_mtrr_range_state {
+	unsigned long base_pfn;
+	unsigned long size_pfn;
+	mtrr_type type;
+};
+
+struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+
+static int __init
+x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+		       unsigned long extra_remove_base,
+		       unsigned long extra_remove_size)
+{
+	unsigned long i, base, size;
+	mtrr_type type;
+
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		base = range_state[i].base_pfn;
+		size = range_state[i].size_pfn;
+		nr_range = add_range_with_merge(range, nr_range, base,
+						base + size - 1);
+	}
+	printk(KERN_DEBUG "After WB checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+				 range[i].start, range[i].end + 1);
+
+	/* take out UC ranges */
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
+		if (type != MTRR_TYPE_UNCACHABLE)
+			continue;
+		size = range_state[i].size_pfn;
+		if (!size)
+			continue;
+		base = range_state[i].base_pfn;
+		subtract_range(range, base, base + size - 1);
+	}
+	if (extra_remove_size)
+		subtract_range(range, extra_remove_base,
+				 extra_remove_base + extra_remove_size  - 1);
+
+	/* get new range num */
+	nr_range = 0;
+	for (i = 0; i < RANGE_NUM; i++) {
+		if (!range[i].end)
+			continue;
+		nr_range++;
+	}
+	printk(KERN_DEBUG "After UC checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+			 range[i].start, range[i].end + 1);
+
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	printk(KERN_DEBUG "After sorting\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+				 range[i].start, range[i].end + 1);
+
+	return nr_range;
+}
+
+static struct res_range __initdata range[RANGE_NUM];
+
+#ifdef CONFIG_MTRR_SANITIZER
+
+static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
+{
+	unsigned long sum;
+	int i;
+
+	sum = 0;
+	for (i = 0; i < nr_range; i++)
+		sum += range[i].end + 1 - range[i].start;
+
+	return sum;
+}
+
+static int enable_mtrr_cleanup __initdata =
+				 CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 0;
+	return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 1;
+	return 0;
+}
+early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
+
 struct var_mtrr_state {
 	unsigned long	range_startk;
 	unsigned long	range_sizek;
 	unsigned long	chunk_sizek;
 	unsigned long	gran_sizek;
 	unsigned int	reg;
-	unsigned int	address_bits;
 };
 
 static void __init
 set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
-		unsigned char type, unsigned address_bits)
+		unsigned char type, unsigned int address_bits)
 {
 	u32 base_lo, base_hi, mask_lo, mask_hi;
 	u64 base, mask;
@@ -781,10 +862,34 @@ set_var_mtrr(unsigned int reg, unsigned 
 	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
 }
 
+static void __init
+save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+		unsigned char type)
+{
+	range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
+	range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
+	range_state[reg].type = type;
+}
+
+static void __init
+set_var_mtrr_all(unsigned int address_bits)
+{
+	unsigned long basek, sizek;
+	unsigned char type;
+	unsigned int reg;
+
+	for (reg = 0; reg < num_var_ranges; reg++) {
+		basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
+		sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
+		type = range_state[reg].type;
+
+		set_var_mtrr(reg, basek, sizek, type, address_bits);
+	}
+}
+
 static unsigned int __init
 range_to_mtrr(unsigned int reg, unsigned long range_startk,
-	      unsigned long range_sizek, unsigned char type,
-	      unsigned address_bits)
+	      unsigned long range_sizek, unsigned char type)
 {
 	if (!range_sizek || (reg >= num_var_ranges))
 		return reg;
@@ -803,12 +908,12 @@ range_to_mtrr(unsigned int reg, unsigned
 			align = max_align;
 
 		sizek = 1 << align;
-		printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
+		printk(KERN_DEBUG "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
 			reg, range_startk >> 10, sizek >> 10,
 			(type == MTRR_TYPE_UNCACHABLE)?"UC":
 			    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
 			);
-		set_var_mtrr(reg++, range_startk, sizek, type, address_bits);
+		save_var_mtrr(reg++, range_startk, sizek, type);
 		range_startk += sizek;
 		range_sizek -= sizek;
 		if (reg >= num_var_ranges)
@@ -817,10 +922,12 @@ range_to_mtrr(unsigned int reg, unsigned
 	return reg;
 }
 
-static void __init
-range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
+static unsigned __init
+range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
+			unsigned long sizek)
 {
 	unsigned long hole_basek, hole_sizek;
+	unsigned long second_basek, second_sizek;
 	unsigned long range0_basek, range0_sizek;
 	unsigned long range_basek, range_sizek;
 	unsigned long chunk_sizek;
@@ -828,64 +935,87 @@ range_to_mtrr_with_hole(struct var_mtrr_
 
 	hole_basek = 0;
 	hole_sizek = 0;
+	second_basek = 0;
+	second_sizek = 0;
 	chunk_sizek = state->chunk_sizek;
 	gran_sizek = state->gran_sizek;
 
 	/* align with gran size, prevent small block used up MTRRs */
 	range_basek = ALIGN(state->range_startk, gran_sizek);
 	if ((range_basek > basek) && basek)
-		return;
-	range_sizek = ALIGN(state->range_sizek - (range_basek - state->range_startk), gran_sizek);
+		return second_sizek;
+	state->range_sizek -= (range_basek - state->range_startk);
+	range_sizek = ALIGN(state->range_sizek, gran_sizek);
 
-	while (range_basek + range_sizek > (state->range_startk + state->range_sizek)) {
+	while (range_sizek > state->range_sizek) {
 		range_sizek -= gran_sizek;
 		if (!range_sizek)
-			return;
+			return second_sizek;
 	}
-	state->range_startk = range_basek;
 	state->range_sizek = range_sizek;
 
 	/* try to append some small hole */
 	range0_basek = state->range_startk;
 	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
 	if (range0_sizek == state->range_sizek) {
-			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10);
-			state->reg = range_to_mtrr(state->reg, range0_basek,
-				state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
-		return;
-	} else if (basek) {
-	    while (range0_basek + range0_sizek - chunk_sizek > basek) {
+		printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", range0_basek<<10,
+				(range0_basek + state->range_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, range0_basek,
+				state->range_sizek, MTRR_TYPE_WRBACK);
+		return second_sizek;
+	}
+
+	range0_sizek -= chunk_sizek;
+	if (range0_sizek && sizek) {
+	    while (range0_basek + range0_sizek > (basek + sizek)) {
 		range0_sizek -= chunk_sizek;
 		if (!range0_sizek)
 			break;
 	    }
 	}
 
+	if (range0_sizek) {
+		printk(KERN_DEBUG "range0: %016lx - %016lx\n", range0_basek<<10,
+				(range0_basek + range0_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, range0_basek,
+				range0_sizek, MTRR_TYPE_WRBACK);
 
-	if (range0_sizek > chunk_sizek)
-		range0_sizek -= chunk_sizek;
-	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
-	state->reg = range_to_mtrr(state->reg, range0_basek,
-			range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+	}
 
 	range_basek = range0_basek + range0_sizek;
 	range_sizek = chunk_sizek;
 
-	if ((range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) &&
-	    (range_basek + range_sizek <= basek)) {
-		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
-		hole_basek = range_basek + range_sizek - hole_sizek;
-	} else
+	if (range_basek + range_sizek > basek &&
+	    range_basek + range_sizek <= (basek + sizek)) {
+		second_basek = basek;
+		second_sizek = range_basek + range_sizek - basek;
+	}
+
+	if (range_sizek - (state->range_sizek - range0_sizek) - second_sizek
+	     < (chunk_sizek >> 1)) {
+		hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
+				 - second_sizek;
+		hole_basek = range_basek + range_sizek - hole_sizek
+				 - second_sizek;
+	} else {
 		range_sizek = state->range_sizek - range0_sizek;
+		second_basek = 0;
+		second_sizek = 0;
+	}
 
-	printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10);
-	state->reg = range_to_mtrr(state->reg, range_basek,
-			range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+	printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
+			 (range_basek + range_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
+					 MTRR_TYPE_WRBACK);
 	if (hole_sizek) {
-		printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10);
-		state->reg = range_to_mtrr(state->reg, hole_basek,
-				hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits);
+		printk(KERN_DEBUG "hole: %016lx - %016lx\n", hole_basek<<10,
+				 (hole_basek + hole_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
+						 MTRR_TYPE_UNCACHABLE);
+
 	}
+
+	return second_sizek;
 }
 
 static void __init
@@ -893,6 +1023,7 @@ set_var_mtrr_range(struct var_mtrr_state
 		   unsigned long size_pfn)
 {
 	unsigned long basek, sizek;
+	unsigned long second_sizek = 0;
 
 	if (state->reg >= num_var_ranges)
 		return;
@@ -901,21 +1032,19 @@ set_var_mtrr_range(struct var_mtrr_state
 	sizek = size_pfn << (PAGE_SHIFT - 10);
 
 	/* See if I can merge with the last range */
-	if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) {
+	if ((basek <= 1024) ||
+	    (state->range_startk + state->range_sizek == basek)) {
 		unsigned long endk = basek + sizek;
 		state->range_sizek = endk - state->range_startk;
 		return;
 	}
 	/* Write the range mtrrs */
-	if (state->range_sizek != 0) {
-		range_to_mtrr_with_hole(state, basek);
+	if (state->range_sizek != 0)
+		second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
 
-		state->range_startk = 0;
-		state->range_sizek = 0;
-	}
 	/* Allocate an msr */
-	state->range_startk = basek;
-	state->range_sizek  = sizek;
+	state->range_startk = basek + second_sizek;
+	state->range_sizek  = sizek - second_sizek;
 }
 
 /* mininum size of mtrr block that can take hole */
@@ -931,7 +1060,7 @@ static int __init parse_mtrr_chunk_size_
 early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
 
 /* granity of mtrr of block */
-static u64 mtrr_gran_size __initdata = (1ULL<<20);
+static u64 mtrr_gran_size __initdata;
 
 static int __init parse_mtrr_gran_size_opt(char *p)
 {
@@ -942,91 +1071,79 @@ static int __init parse_mtrr_gran_size_o
 }
 early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
 
-static void __init
+static int nr_mtrr_spare_reg __initdata =
+				 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
+
+static int __init parse_mtrr_spare_reg(char *arg)
+{
+	if (arg)
+		nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
+	return 0;
+}
+
+early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
+
+static int __init
 x86_setup_var_mtrrs(struct res_range *range, int nr_range,
-		    unsigned address_bits)
+		    u64 chunk_size, u64 gran_size)
 {
 	struct var_mtrr_state var_state;
 	int i;
+	int num_reg;
 
 	var_state.range_startk	= 0;
 	var_state.range_sizek	= 0;
 	var_state.reg		= 0;
-	var_state.address_bits	= address_bits;
-	var_state.chunk_sizek	= mtrr_chunk_size >> 10;
-	var_state.gran_sizek	= mtrr_gran_size >> 10;
+	var_state.chunk_sizek	= chunk_size >> 10;
+	var_state.gran_sizek	= gran_size >> 10;
+
+	memset(range_state, 0, sizeof(range_state));
 
 	/* Write the range etc */
 	for (i = 0; i < nr_range; i++)
-		set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1);
+		set_var_mtrr_range(&var_state, range[i].start,
+				   range[i].end - range[i].start + 1);
 
 	/* Write the last range */
-	range_to_mtrr_with_hole(&var_state, 0);
-	printk(KERN_INFO "DONE variable MTRRs\n");
+	if (var_state.range_sizek != 0)
+		range_to_mtrr_with_hole(&var_state, 0, 0);
+	printk(KERN_DEBUG "DONE variable MTRRs\n");
+
+	num_reg = var_state.reg;
 	/* Clear out the extra MTRR's */
 	while (var_state.reg < num_var_ranges) {
-		set_var_mtrr(var_state.reg, 0, 0, 0, var_state.address_bits);
+		save_var_mtrr(var_state.reg, 0, 0, 0);
 		var_state.reg++;
 	}
-}
-
-static int __init
-x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
-		       unsigned long extra_remove_base,
-		       unsigned long extra_remove_size)
-{
-	unsigned long i, base, size;
-	mtrr_type type;
-
-	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
-		if (type != MTRR_TYPE_WRBACK)
-			continue;
-		nr_range = add_range_with_merge(range, nr_range, base, base + size - 1);
-	}
-	printk(KERN_INFO "After WB checking\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
 
-	/* take out UC ranges */
-	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
-		if (type != MTRR_TYPE_UNCACHABLE)
-			continue;
-		if (!size)
-			continue;
-		subtract_range(range, base, base + size - 1);
-	}
-	if (extra_remove_size)
-		subtract_range(range, extra_remove_base,  extra_remove_base + extra_remove_size  - 1);
+	return num_reg;
+}
 
-	/* get new range num */
-	nr_range = 0;
-	for (i = 0; i < RANGE_NUM; i++) {
-		if (!range[i].end)
-			continue;
-		nr_range++;
-	}
-	printk(KERN_INFO "After UC checking\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+struct mtrr_cleanup_result {
+	unsigned long gran_sizek;
+	unsigned long chunk_sizek;
+	unsigned long lose_cover_sizek;
+	unsigned int num_reg;
+	int bad;
+};
 
-	/* sort the ranges */
-	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
-	printk(KERN_INFO "After sorting\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+/* 2G, 1G, ..., 1M, so need 12 items and half maxtrix need 13*6 */
+#define NUM_RESULT 78
 
-	return nr_range;
-}
+static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
+static struct res_range __initdata range_new[RANGE_NUM];
+static unsigned long __initdata min_loss_pfn[RANGE_NUM];
 
 static int __init mtrr_cleanup(unsigned address_bits)
 {
 	unsigned long extra_remove_base, extra_remove_size;
 	unsigned long i, base, size, def, dummy;
-	struct res_range range[RANGE_NUM];
 	mtrr_type type;
-	int nr_range;
+	int nr_range, nr_range_new;
+	u64 chunk_size, gran_size;
+	unsigned long range_sums, range_sums_new;
+	int index_good;
+	int num_reg_good;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1038,10 +1155,20 @@ static int __init mtrr_cleanup(unsigned 
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
 
+	/* get it and store it aside */
+	memset(range_state, 0, sizeof(range_state));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		range_state[i].base_pfn = base;
+		range_state[i].size_pfn = size;
+		range_state[i].type = type;
+	}
+
 	/* check entries number */
 	memset(num, 0, sizeof(num));
 	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
+		type = range_state[i].type;
+		size = range_state[i].size_pfn;
 		if (type >= MTRR_NUM_TYPES)
 			continue;
 		if (!size)
@@ -1064,13 +1191,150 @@ static int __init mtrr_cleanup(unsigned 
 		extra_remove_base = 1 << (32 - PAGE_SHIFT);
 		extra_remove_size = (mtrr_tom2>>PAGE_SHIFT) - extra_remove_base;
 	}
-	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size);
+	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
+					  extra_remove_size);
+	range_sums = sum_ranges(range, nr_range);
+	printk(KERN_INFO "total RAM coverred: %ldM\n",
+			 range_sums >> (20 - PAGE_SHIFT));
+
+	if (mtrr_chunk_size && mtrr_gran_size) {
+		int num_reg;
+
+		/* convert ranges to var ranges state */
+		num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
+						 mtrr_gran_size);
+
+		/* we got new setting in range_state, check it */
+		memset(range_new, 0, sizeof(range_new));
+		nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+					 extra_remove_base, extra_remove_size);
+		range_sums_new = sum_ranges(range_new, nr_range_new);
+
+		i = 0;
+		result[i].chunk_sizek = mtrr_chunk_size >> 10;
+		result[i].gran_sizek = mtrr_gran_size >> 10;
+		result[i].num_reg = num_reg;
+		if (range_sums < range_sums_new) {
+			result[i].lose_cover_sizek = (range_sums_new - range_sums) << (PAGE_SHIFT - 10);
+			result[i].bad = 1;
+		} else
+			result[i].lose_cover_sizek = (range_sums - range_sums_new) << (PAGE_SHIFT - 10);
+
+		printk(KERN_INFO " %sgran_size: %ldM  \tchunk_size: %ldM  \t",
+			 result[i].bad?" BAD ":"", result[i].gran_sizek >> 10,
+			 result[i].chunk_sizek >> 10);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
+			 result[i].num_reg, result[i].bad?"-":"",
+			 result[i].lose_cover_sizek >> 10);
+		if (!result[i].bad) {
+			set_var_mtrr_all(address_bits);
+			return 1;
+		}
+		printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, will find optimal one\n");
+		memset(result, 0, sizeof(result[0]));
+	}
 
-	/* convert ranges to var ranges state */
-	x86_setup_var_mtrrs(range, nr_range, address_bits);
+	i = 0;
+	memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
+	memset(result, 0, sizeof(result));
+	for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
+		for (chunk_size = gran_size; chunk_size < (1ULL<<32);
+		     chunk_size <<= 1) {
+			int num_reg;
+
+			printk(KERN_INFO "\ngran_size: %lldM   chunk_size_size: %lldM\n", gran_size >> 20, chunk_size >> 20);
+			if (i >= NUM_RESULT)
+				continue;
+
+			/* convert ranges to var ranges state */
+			num_reg = x86_setup_var_mtrrs(range, nr_range,
+							 chunk_size, gran_size);
+
+			/* we got new setting in range_state, check it */
+			memset(range_new, 0, sizeof(range_new));
+			nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+					 extra_remove_base, extra_remove_size);
+			range_sums_new = sum_ranges(range_new, nr_range_new);
+
+			result[i].chunk_sizek = chunk_size >> 10;
+			result[i].gran_sizek = gran_size >> 10;
+			result[i].num_reg = num_reg;
+			if (range_sums < range_sums_new) {
+				result[i].lose_cover_sizek = (range_sums_new - range_sums) << (PAGE_SHIFT - 10);
+				result[i].bad = 1;
+			} else
+				result[i].lose_cover_sizek = (range_sums - range_sums_new) << (PAGE_SHIFT - 10);
+
+			if (!result[i].bad && (range_sums - range_sums_new <
+				 min_loss_pfn[num_reg]))
+					min_loss_pfn[num_reg] = range_sums - range_sums_new;
+			i++;
+		}
+	}
 
-	return 1;
+	/* print out all */
+	for (i = 0; i < NUM_RESULT; i++) {
+		printk(KERN_INFO "%sgran_size: %ldM  \tchunk_size: %ldM  \t",
+			 result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
+			 result[i].chunk_sizek >> 10);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
+			 result[i].num_reg, result[i].bad?"-":"",
+			 result[i].lose_cover_sizek >> 10);
+	}
+
+	/* try to find the optimal index */
+	if (nr_mtrr_spare_reg >= num_var_ranges)
+		nr_mtrr_spare_reg = num_var_ranges - 1;
+	num_reg_good = -1;
+	for (i = 1; i < num_var_ranges + 1 - nr_mtrr_spare_reg; i++) {
+		if (!min_loss_pfn[i]) {
+			num_reg_good = i;
+			break;
+		}
+	}
+
+	index_good = -1;
+	if (num_reg_good != -1) {
+		for (i = 0; i < NUM_RESULT; i++) {
+			if (!result[i].bad && result[i].num_reg == num_reg_good) {
+				index_good = i;
+				break;
+			}
+		}
+	}
+
+	if (index_good != -1) {
+		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
+		i = index_good;
+		printk(KERN_INFO "gran_size: %ldM  \tchunk_size: %ldM  \t",
+				result[i].gran_sizek >> 10,
+				result[i].chunk_sizek >> 10);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %ldM \n",
+				result[i].num_reg,
+				result[i].lose_cover_sizek >> 10);
+		/* convert ranges to var ranges state */
+		chunk_size = result[i].chunk_sizek;
+		chunk_size <<= 10;
+		gran_size = result[i].gran_sizek;
+		gran_size <<= 10;
+		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
+		set_var_mtrr_all(address_bits);
+		return 1;
+	}
+
+	printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
+	printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
+
+	return 0;
 }
+#else
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+	return 0;
+}
+#endif
+
+static int __initdata changed_by_mtrr_cleanup;
 
 static int disable_mtrr_trim;
 
@@ -1111,7 +1375,8 @@ int __init amd_special_default_mtrr(void
 	return 0;
 }
 
-static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
+static u64 __init real_trim_memory(unsigned long start_pfn,
+				   unsigned long limit_pfn)
 {
 	u64 trim_start, trim_size;
 	trim_start = start_pfn;
@@ -1138,9 +1403,8 @@ int __init mtrr_trim_uncached_memory(uns
 {
 	unsigned long i, base, size, highest_pfn = 0, def, dummy;
 	mtrr_type type;
-	struct res_range range[RANGE_NUM];
 	int nr_range;
-	u64 total_real_trim_size;
+	u64 total_trim_size;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1155,11 +1419,22 @@ int __init mtrr_trim_uncached_memory(uns
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
 
-	/* Find highest cached pfn */
+	/* get it and store it aside */
+	memset(range_state, 0, sizeof(range_state));
 	for (i = 0; i < num_var_ranges; i++) {
 		mtrr_if->get(i, &base, &size, &type);
+		range_state[i].base_pfn = base;
+		range_state[i].size_pfn = size;
+		range_state[i].type = type;
+	}
+
+	/* Find highest cached pfn */
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
 		if (type != MTRR_TYPE_WRBACK)
 			continue;
+		base = range_state[i].base_pfn;
+		size = range_state[i].size_pfn;
 		if (highest_pfn < base + size)
 			highest_pfn = base + size;
 	}
@@ -1177,9 +1452,10 @@ int __init mtrr_trim_uncached_memory(uns
 	/* check entries number */
 	memset(num, 0, sizeof(num));
 	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
+		type = range_state[i].type;
 		if (type >= MTRR_NUM_TYPES)
 			continue;
+		size = range_state[i].size_pfn;
 		if (!size)
 			type = MTRR_NUM_TYPES;
 		num[type]++;
@@ -1205,26 +1481,28 @@ int __init mtrr_trim_uncached_memory(uns
 	}
 	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
 
-	total_real_trim_size = 0;
+	total_trim_size = 0;
 	/* check the head */
 	if (range[0].start)
-		total_real_trim_size += real_trim_memory(0, range[0].start);
+		total_trim_size += real_trim_memory(0, range[0].start);
 	/* check the holes */
 	for (i = 0; i < nr_range - 1; i++) {
 		if (range[i].end + 1 < range[i+1].start)
-			total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start);
+			total_trim_size += real_trim_memory(range[i].end + 1,
+							    range[i+1].start);
 	}
 	/* check the top */
 	i = nr_range - 1;
 	if (range[i].end + 1 < end_pfn)
-		total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn);
+		total_trim_size += real_trim_memory(range[i].end + 1,
+							 end_pfn);
 
-	if (total_real_trim_size) {
+	if (total_trim_size) {
 		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
 			" all of memory, losing %lluMB of RAM.\n",
-			total_real_trim_size >> 20);
+			total_trim_size >> 20);
 
-		if (enable_mtrr_cleanup < 1)
+		if (!changed_by_mtrr_cleanup)
 			WARN_ON(1);
 
 		printk(KERN_INFO "update e820 for mtrr\n");
@@ -1314,8 +1592,10 @@ void __init mtrr_bp_init(void)
 		if (use_intel()) {
 			get_mtrr_state();
 
-			if (mtrr_cleanup(phys_addr))
+			if (mtrr_cleanup(phys_addr)) {
+				changed_by_mtrr_cleanup = 1;
 				mtrr_if->set_all();
+			}
 
 		}
 	}
@@ -1355,7 +1635,7 @@ static int __init mtrr_init_finialize(vo
 	if (!mtrr_if)
 		return 0;
 	if (use_intel()) {
-		if (enable_mtrr_cleanup < 1)
+		if (!changed_by_mtrr_cleanup)
 			mtrr_state_warn();
 	} else {
 		/* The CPUs haven't MTRR and seem to not support SMP. They have
Index: linux-2.6/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.orig/Documentation/kernel-parameters.txt
+++ linux-2.6/Documentation/kernel-parameters.txt
@@ -613,8 +613,17 @@ and is between 256 and 4096 characters. 
 			that could hold holes aka. UC entries.
 
 	mtrr_gran_size=nn[KMG] [X86]
-			used for mtrr cleanup. It is granity of mtrr block.
-			Big value could prevent small alignment use up MTRRs.
+			Used for mtrr cleanup. It is granularity of mtrr block.
+			Default is 1.
+			Large value could prevent small alignment from
+			using up MTRRs.
+
+	mtrr_spare_reg_nr=n [X86]
+			Format: <integer>
+			Range: 0,7 : spare reg number
+			Default : 1
+			Used for mtrr cleanup. It is spare mtrr entries number.
+			Set to 2 or more if your graphical card needs more.
 
 	disable_mtrr_trim [X86, Intel and AMD only]
 			By default the kernel will trim any uncacheable
Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig
+++ linux-2.6/arch/x86/Kconfig
@@ -1100,6 +1100,15 @@ config MTRR_SANITIZER_ENABLE_DEFAULT
 	help
 	  Enable mtrr cleanup default value
 
+config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
+	int "MTRR cleanup spare reg num (0-7)"
+	range 0 7
+	default "1"
+	depends on MTRR_SANITIZER
+	help
+	  mtrr cleanup spare entries default, it can be changed via
+	  mtrr_spare_reg_nr=
+
 config X86_PAT
 	bool
 	prompt "x86 PAT support"

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect
  2008-05-02  0:35                       ` Yinghai Lu
@ 2008-05-02  1:18                         ` Gabriel C
  2008-05-02  1:55                           ` Yinghai Lu
  0 siblings, 1 reply; 89+ messages in thread
From: Gabriel C @ 2008-05-02  1:18 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

Yinghai Lu wrote:
> On Thu, May 1, 2008 at 5:29 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>> Yinghai Lu wrote:
>>  > On Thu, May 1, 2008 at 4:45 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>>  >> Yinghai Lu wrote:
>>  >>  > loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.
>>  >>  >
>>  >>  > so user don't need to add mtrr_chunk_size and mtrr_gran_size,
>>  >>  >
>>  >>  > if optimal value is not found, print out all list to help select less optimal
>>  >>  > value.
>>  >>  >
>>  >>  > add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.
>>  >>
>>  >>  WOW :)
>>  >>
>>  >>  With this patch all is working fine , no RAM is lost , X is fast ,
>>  >>  so far everything else seems to work fine. \o/
>>  >>
>>  >>  I will test on 32bit tomorrow and stress the box later on today to be sure everything works fine.
>>  >>
>>  >>  There is my dmesg , meminfo , mtrr output with this patch on top x86-latest :
>>  >>
>>  >>  http://frugalware.org/~crazy/mtrr_x86-latest/
>>  >
>>  > while look at that you boot log, it seems there is one bug about hole
>>  > position. but I look that code, it should already be handled.
>>  >
>>  > Can you send out boot msg and /proc/mtrr when using
>>  > disable_mtrr_cleanup command line?
>>
>>  Sure , there it is :
>>
>>  http://frugalware.org/~crazy/mtrr_x86-latest/dmesg2
>>  http://frugalware.org/~crazy/mtrr_x86-latest/proc_mtrr2
>>
>>  I'm still using this version of your patch , didn't got any time to update to v2.
>>  If you want me to try v2 tell me , I have some free time in about 30 minutes.
>>
> original
> 
> reg00: base=0xd0000000 (3328MB), size= 256MB: uncachable, count=1
> reg01: base=0xe0000000 (3584MB), size= 512MB: uncachable, count=1
> reg02: base=0x00000000 (   0MB), size=4096MB: write-back, count=1
> reg03: base=0x100000000 (4096MB), size= 512MB: write-back, count=1
> reg04: base=0x120000000 (4608MB), size= 128MB: write-back, count=1
> reg05: base=0x128000000 (4736MB), size=  64MB: write-back, count=1
> reg06: base=0xcf600000 (3318MB), size=   2MB: uncachable, count=1
> 
> after clean up
> 
> reg00: base=0x00000000 (   0MB), size=2048MB: write-back, count=1
> reg01: base=0x80000000 (2048MB), size=1024MB: write-back, count=1
> reg02: base=0xc0000000 (3072MB), size= 256MB: write-back, count=1
> reg03: base=0xcfe00000 (3326MB), size=   2MB: uncachable, count=1
> reg04: base=0x100000000 (4096MB), size= 512MB: write-back, count=1
> reg05: base=0x120000000 (4608MB), size= 256MB: write-back, count=1
> reg06: base=0x12c000000 (4800MB), size=  64MB: uncachable, count=1
> 
> so the hole base is not right, it should be at 3318MB instead of 3326MB.
> please hold to test v3 ...

All is still fine here after an quick test ( BTW that version is really chatty :P )

http://frugalware.org/~crazy/mtrr_x86-latest/dmesg_v3

> 
> Thanks
> 
> Yinghai Lu
> 


Gabriel

^ permalink raw reply	[flat|nested] 89+ messages in thread

* Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect
  2008-05-02  1:18                         ` Gabriel C
@ 2008-05-02  1:55                           ` Yinghai Lu
  0 siblings, 0 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-05-02  1:55 UTC (permalink / raw)
  To: Gabriel C
  Cc: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Mika Fischer, linux-kernel@vger.kernel.org

On Thu, May 1, 2008 at 6:18 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>
> Yinghai Lu wrote:
>  > On Thu, May 1, 2008 at 5:29 PM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >> Yinghai Lu wrote:
>  >>  > On Thu, May 1, 2008 at 4:45 AM, Gabriel C <nix.or.die@googlemail.com> wrote:
>  >>  >> Yinghai Lu wrote:
>  >>  >>  > loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.
>  >>  >>  >
>  >>  >>  > so user don't need to add mtrr_chunk_size and mtrr_gran_size,
>  >>  >>  >
>  >>  >>  > if optimal value is not found, print out all list to help select less optimal
>  >>  >>  > value.
>  >>  >>  >
>  >>  >>  > add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.
>  >>  >>
>  >>  >>  WOW :)
>  >>  >>
>  >>  >>  With this patch all is working fine , no RAM is lost , X is fast ,
>  >>  >>  so far everything else seems to work fine. \o/
>  >>  >>
>  >>  >>  I will test on 32bit tomorrow and stress the box later on today to be sure everything works fine.
>  >>  >>
>  >>  >>  There is my dmesg , meminfo , mtrr output with this patch on top x86-latest :
>  >>  >>
>  >>  >>  http://frugalware.org/~crazy/mtrr_x86-latest/
>  >>  >
>  >>  > while look at that you boot log, it seems there is one bug about hole
>  >>  > position. but I look that code, it should already be handled.
>  >>  >
>  >>  > Can you send out boot msg and /proc/mtrr when using
>  >>  > disable_mtrr_cleanup command line?
>  >>
>  >>  Sure , there it is :
>  >>
>  >>  http://frugalware.org/~crazy/mtrr_x86-latest/dmesg2
>  >>  http://frugalware.org/~crazy/mtrr_x86-latest/proc_mtrr2
>  >>
>  >>  I'm still using this version of your patch , didn't got any time to update to v2.
>  >>  If you want me to try v2 tell me , I have some free time in about 30 minutes.
>  >>
>  > original
>  >
>  > reg00: base=0xd0000000 (3328MB), size= 256MB: uncachable, count=1
>  > reg01: base=0xe0000000 (3584MB), size= 512MB: uncachable, count=1
>  > reg02: base=0x00000000 (   0MB), size=4096MB: write-back, count=1
>  > reg03: base=0x100000000 (4096MB), size= 512MB: write-back, count=1
>  > reg04: base=0x120000000 (4608MB), size= 128MB: write-back, count=1
>  > reg05: base=0x128000000 (4736MB), size=  64MB: write-back, count=1
>  > reg06: base=0xcf600000 (3318MB), size=   2MB: uncachable, count=1
>  >
>  > after clean up
>  >
>  > reg00: base=0x00000000 (   0MB), size=2048MB: write-back, count=1
>  > reg01: base=0x80000000 (2048MB), size=1024MB: write-back, count=1
>  > reg02: base=0xc0000000 (3072MB), size= 256MB: write-back, count=1
>  > reg03: base=0xcfe00000 (3326MB), size=   2MB: uncachable, count=1
>  > reg04: base=0x100000000 (4096MB), size= 512MB: write-back, count=1
>  > reg05: base=0x120000000 (4608MB), size= 256MB: write-back, count=1
>  > reg06: base=0x12c000000 (4800MB), size=  64MB: uncachable, count=1
>  >
>  > so the hole base is not right, it should be at 3318MB instead of 3326MB.
>  > please hold to test v3 ...
>
>  All is still fine here after an quick test ( BTW that version is really chatty :P )

thanks. yeah, but can you remove debug in command line to see if it
still talks too much.

YH

^ permalink raw reply	[flat|nested] 89+ messages in thread

* [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v4
  2008-05-02  0:52                   ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v3 Yinghai Lu
@ 2008-05-02  9:40                     ` Yinghai Lu
  0 siblings, 0 replies; 89+ messages in thread
From: Yinghai Lu @ 2008-05-02  9:40 UTC (permalink / raw)
  To: Andrew Morton, Ingo Molnar, H. Peter Anvin, Thomas Gleixner,
	Gabriel C, Mika Fischer
  Cc: linux-kernel@vger.kernel.org


loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.

so user don't need to add mtrr_chunk_size and mtrr_gran_size

if optimal value is not found, print out all list to help select less optimal
value.

add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.

v2: find the one with more spare entries
v3: fix hole_basek offset
v4: tight the compare between range and range_new
    loop stop with 4g

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -610,28 +610,6 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
-#ifdef CONFIG_MTRR_SANITIZER
-static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
-#else
-static int enable_mtrr_cleanup __initdata = -1;
-#endif
-
-static int __init disable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 0;
-	return 0;
-}
-early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
-
-static int __init enable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 1;
-	return 0;
-}
-early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
-
 /* should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM 256
 
@@ -702,13 +680,15 @@ subtract_range(struct res_range *range, 
 			continue;
 		}
 
-		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+		if (start <= range[j].start && end < range[j].end &&
+		    range[j].start < end + 1) {
 			range[j].start = end + 1;
 			continue;
 		}
 
 
-		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+		if (start > range[j].start && end >= range[j].end &&
+		    range[j].end > start - 1) {
 			range[j].end = start - 1;
 			continue;
 		}
@@ -743,18 +723,123 @@ static int __init cmp_range(const void *
 	return start1 - start2;
 }
 
+struct var_mtrr_range_state {
+	unsigned long base_pfn;
+	unsigned long size_pfn;
+	mtrr_type type;
+};
+
+struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+
+static int __init
+x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+		       unsigned long extra_remove_base,
+		       unsigned long extra_remove_size)
+{
+	unsigned long i, base, size;
+	mtrr_type type;
+
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
+		if (type != MTRR_TYPE_WRBACK)
+			continue;
+		base = range_state[i].base_pfn;
+		size = range_state[i].size_pfn;
+		nr_range = add_range_with_merge(range, nr_range, base,
+						base + size - 1);
+	}
+	printk(KERN_DEBUG "After WB checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+				 range[i].start, range[i].end + 1);
+
+	/* take out UC ranges */
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
+		if (type != MTRR_TYPE_UNCACHABLE)
+			continue;
+		size = range_state[i].size_pfn;
+		if (!size)
+			continue;
+		base = range_state[i].base_pfn;
+		subtract_range(range, base, base + size - 1);
+	}
+	if (extra_remove_size)
+		subtract_range(range, extra_remove_base,
+				 extra_remove_base + extra_remove_size  - 1);
+
+	/* get new range num */
+	nr_range = 0;
+	for (i = 0; i < RANGE_NUM; i++) {
+		if (!range[i].end)
+			continue;
+		nr_range++;
+	}
+	printk(KERN_DEBUG "After UC checking\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+			 range[i].start, range[i].end + 1);
+
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	printk(KERN_DEBUG "After sorting\n");
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+				 range[i].start, range[i].end + 1);
+
+	/* clear those is not used */
+	for (i = nr_range; i < RANGE_NUM; i++)
+		memset(&range[i], 0, sizeof(range[i]));
+
+	return nr_range;
+}
+
+static struct res_range __initdata range[RANGE_NUM];
+
+#ifdef CONFIG_MTRR_SANITIZER
+
+static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
+{
+	unsigned long sum;
+	int i;
+
+	sum = 0;
+	for (i = 0; i < nr_range; i++)
+		sum += range[i].end + 1 - range[i].start;
+
+	return sum;
+}
+
+static int enable_mtrr_cleanup __initdata =
+				 CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 0;
+	return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 1;
+	return 0;
+}
+early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
+
 struct var_mtrr_state {
 	unsigned long	range_startk;
 	unsigned long	range_sizek;
 	unsigned long	chunk_sizek;
 	unsigned long	gran_sizek;
 	unsigned int	reg;
-	unsigned int	address_bits;
 };
 
 static void __init
 set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
-		unsigned char type, unsigned address_bits)
+		unsigned char type, unsigned int address_bits)
 {
 	u32 base_lo, base_hi, mask_lo, mask_hi;
 	u64 base, mask;
@@ -781,10 +866,34 @@ set_var_mtrr(unsigned int reg, unsigned 
 	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
 }
 
+static void __init
+save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+		unsigned char type)
+{
+	range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
+	range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
+	range_state[reg].type = type;
+}
+
+static void __init
+set_var_mtrr_all(unsigned int address_bits)
+{
+	unsigned long basek, sizek;
+	unsigned char type;
+	unsigned int reg;
+
+	for (reg = 0; reg < num_var_ranges; reg++) {
+		basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
+		sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
+		type = range_state[reg].type;
+
+		set_var_mtrr(reg, basek, sizek, type, address_bits);
+	}
+}
+
 static unsigned int __init
 range_to_mtrr(unsigned int reg, unsigned long range_startk,
-	      unsigned long range_sizek, unsigned char type,
-	      unsigned address_bits)
+	      unsigned long range_sizek, unsigned char type)
 {
 	if (!range_sizek || (reg >= num_var_ranges))
 		return reg;
@@ -803,12 +912,12 @@ range_to_mtrr(unsigned int reg, unsigned
 			align = max_align;
 
 		sizek = 1 << align;
-		printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
+		printk(KERN_DEBUG "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
 			reg, range_startk >> 10, sizek >> 10,
 			(type == MTRR_TYPE_UNCACHABLE)?"UC":
 			    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
 			);
-		set_var_mtrr(reg++, range_startk, sizek, type, address_bits);
+		save_var_mtrr(reg++, range_startk, sizek, type);
 		range_startk += sizek;
 		range_sizek -= sizek;
 		if (reg >= num_var_ranges)
@@ -817,10 +926,12 @@ range_to_mtrr(unsigned int reg, unsigned
 	return reg;
 }
 
-static void __init
-range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
+static unsigned __init
+range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
+			unsigned long sizek)
 {
 	unsigned long hole_basek, hole_sizek;
+	unsigned long second_basek, second_sizek;
 	unsigned long range0_basek, range0_sizek;
 	unsigned long range_basek, range_sizek;
 	unsigned long chunk_sizek;
@@ -828,64 +939,91 @@ range_to_mtrr_with_hole(struct var_mtrr_
 
 	hole_basek = 0;
 	hole_sizek = 0;
+	second_basek = 0;
+	second_sizek = 0;
 	chunk_sizek = state->chunk_sizek;
 	gran_sizek = state->gran_sizek;
 
 	/* align with gran size, prevent small block used up MTRRs */
 	range_basek = ALIGN(state->range_startk, gran_sizek);
 	if ((range_basek > basek) && basek)
-		return;
-	range_sizek = ALIGN(state->range_sizek - (range_basek - state->range_startk), gran_sizek);
+		return second_sizek;
+	state->range_sizek -= (range_basek - state->range_startk);
+	range_sizek = ALIGN(state->range_sizek, gran_sizek);
 
-	while (range_basek + range_sizek > (state->range_startk + state->range_sizek)) {
+	while (range_sizek > state->range_sizek) {
 		range_sizek -= gran_sizek;
 		if (!range_sizek)
-			return;
+			return 0;
 	}
-	state->range_startk = range_basek;
 	state->range_sizek = range_sizek;
 
 	/* try to append some small hole */
 	range0_basek = state->range_startk;
 	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
 	if (range0_sizek == state->range_sizek) {
-			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10);
-			state->reg = range_to_mtrr(state->reg, range0_basek,
-				state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
-		return;
-	} else if (basek) {
-	    while (range0_basek + range0_sizek - chunk_sizek > basek) {
+		printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", range0_basek<<10,
+				(range0_basek + state->range_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, range0_basek,
+				state->range_sizek, MTRR_TYPE_WRBACK);
+		return 0;
+	}
+
+	range0_sizek -= chunk_sizek;
+	if (range0_sizek && sizek) {
+	    while (range0_basek + range0_sizek > (basek + sizek)) {
 		range0_sizek -= chunk_sizek;
 		if (!range0_sizek)
 			break;
 	    }
 	}
 
+	if (range0_sizek) {
+		printk(KERN_DEBUG "range0: %016lx - %016lx\n", range0_basek<<10,
+				(range0_basek + range0_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, range0_basek,
+				range0_sizek, MTRR_TYPE_WRBACK);
 
-	if (range0_sizek > chunk_sizek)
-		range0_sizek -= chunk_sizek;
-	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
-	state->reg = range_to_mtrr(state->reg, range0_basek,
-			range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+	}
 
 	range_basek = range0_basek + range0_sizek;
 	range_sizek = chunk_sizek;
 
-	if ((range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) &&
-	    (range_basek + range_sizek <= basek)) {
-		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
-		hole_basek = range_basek + range_sizek - hole_sizek;
-	} else
+	if (range_basek + range_sizek > basek &&
+	    range_basek + range_sizek <= (basek + sizek)) {
+		/* one hole */
+		second_basek = basek;
+		second_sizek = range_basek + range_sizek - basek;
+	}
+
+	/* if last piece, only could one hole near end */
+	if ((second_basek || !basek) &&
+	    range_sizek - (state->range_sizek - range0_sizek) - second_sizek < (chunk_sizek >> 1)) {
+		/* one hole in middle (second_sizek is 0) or at end (second_sizek is 0 ) */
+		hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
+				 - second_sizek;
+		hole_basek = range_basek + range_sizek - hole_sizek
+				 - second_sizek;
+	} else {
+		/* fallback for big hole, or several holes */
 		range_sizek = state->range_sizek - range0_sizek;
+		second_basek = 0;
+		second_sizek = 0;
+	}
 
-	printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10);
-	state->reg = range_to_mtrr(state->reg, range_basek,
-			range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
+	printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
+			 (range_basek + range_sizek)<<10);
+	state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
+					 MTRR_TYPE_WRBACK);
 	if (hole_sizek) {
-		printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10);
-		state->reg = range_to_mtrr(state->reg, hole_basek,
-				hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits);
+		printk(KERN_DEBUG "hole: %016lx - %016lx\n", hole_basek<<10,
+				 (hole_basek + hole_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
+						 MTRR_TYPE_UNCACHABLE);
+
 	}
+
+	return second_sizek;
 }
 
 static void __init
@@ -893,6 +1031,7 @@ set_var_mtrr_range(struct var_mtrr_state
 		   unsigned long size_pfn)
 {
 	unsigned long basek, sizek;
+	unsigned long second_sizek = 0;
 
 	if (state->reg >= num_var_ranges)
 		return;
@@ -901,21 +1040,19 @@ set_var_mtrr_range(struct var_mtrr_state
 	sizek = size_pfn << (PAGE_SHIFT - 10);
 
 	/* See if I can merge with the last range */
-	if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) {
+	if ((basek <= 1024) ||
+	    (state->range_startk + state->range_sizek == basek)) {
 		unsigned long endk = basek + sizek;
 		state->range_sizek = endk - state->range_startk;
 		return;
 	}
 	/* Write the range mtrrs */
-	if (state->range_sizek != 0) {
-		range_to_mtrr_with_hole(state, basek);
+	if (state->range_sizek != 0)
+		second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
 
-		state->range_startk = 0;
-		state->range_sizek = 0;
-	}
 	/* Allocate an msr */
-	state->range_startk = basek;
-	state->range_sizek  = sizek;
+	state->range_startk = basek + second_sizek;
+	state->range_sizek  = sizek - second_sizek;
 }
 
 /* mininum size of mtrr block that can take hole */
@@ -931,7 +1068,7 @@ static int __init parse_mtrr_chunk_size_
 early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
 
 /* granity of mtrr of block */
-static u64 mtrr_gran_size __initdata = (1ULL<<20);
+static u64 mtrr_gran_size __initdata;
 
 static int __init parse_mtrr_gran_size_opt(char *p)
 {
@@ -942,91 +1079,79 @@ static int __init parse_mtrr_gran_size_o
 }
 early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
 
-static void __init
+static int nr_mtrr_spare_reg __initdata =
+				 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
+
+static int __init parse_mtrr_spare_reg(char *arg)
+{
+	if (arg)
+		nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
+	return 0;
+}
+
+early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
+
+static int __init
 x86_setup_var_mtrrs(struct res_range *range, int nr_range,
-		    unsigned address_bits)
+		    u64 chunk_size, u64 gran_size)
 {
 	struct var_mtrr_state var_state;
 	int i;
+	int num_reg;
 
 	var_state.range_startk	= 0;
 	var_state.range_sizek	= 0;
 	var_state.reg		= 0;
-	var_state.address_bits	= address_bits;
-	var_state.chunk_sizek	= mtrr_chunk_size >> 10;
-	var_state.gran_sizek	= mtrr_gran_size >> 10;
+	var_state.chunk_sizek	= chunk_size >> 10;
+	var_state.gran_sizek	= gran_size >> 10;
+
+	memset(range_state, 0, sizeof(range_state));
 
 	/* Write the range etc */
 	for (i = 0; i < nr_range; i++)
-		set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1);
+		set_var_mtrr_range(&var_state, range[i].start,
+				   range[i].end - range[i].start + 1);
 
 	/* Write the last range */
-	range_to_mtrr_with_hole(&var_state, 0);
-	printk(KERN_INFO "DONE variable MTRRs\n");
+	if (var_state.range_sizek != 0)
+		range_to_mtrr_with_hole(&var_state, 0, 0);
+	printk(KERN_DEBUG "DONE variable MTRRs\n");
+
+	num_reg = var_state.reg;
 	/* Clear out the extra MTRR's */
 	while (var_state.reg < num_var_ranges) {
-		set_var_mtrr(var_state.reg, 0, 0, 0, var_state.address_bits);
+		save_var_mtrr(var_state.reg, 0, 0, 0);
 		var_state.reg++;
 	}
-}
 
-static int __init
-x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
-		       unsigned long extra_remove_base,
-		       unsigned long extra_remove_size)
-{
-	unsigned long i, base, size;
-	mtrr_type type;
-
-	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
-		if (type != MTRR_TYPE_WRBACK)
-			continue;
-		nr_range = add_range_with_merge(range, nr_range, base, base + size - 1);
-	}
-	printk(KERN_INFO "After WB checking\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
-
-	/* take out UC ranges */
-	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
-		if (type != MTRR_TYPE_UNCACHABLE)
-			continue;
-		if (!size)
-			continue;
-		subtract_range(range, base, base + size - 1);
-	}
-	if (extra_remove_size)
-		subtract_range(range, extra_remove_base,  extra_remove_base + extra_remove_size  - 1);
+	return num_reg;
+}
 
-	/* get new range num */
-	nr_range = 0;
-	for (i = 0; i < RANGE_NUM; i++) {
-		if (!range[i].end)
-			continue;
-		nr_range++;
-	}
-	printk(KERN_INFO "After UC checking\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+struct mtrr_cleanup_result {
+	unsigned long gran_sizek;
+	unsigned long chunk_sizek;
+	unsigned long lose_cover_sizek;
+	unsigned int num_reg;
+	int bad;
+};
 
-	/* sort the ranges */
-	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
-	printk(KERN_INFO "After sorting\n");
-	for (i = 0; i < nr_range; i++)
-		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);
+/* gran_size: 1M, 2M, ..., 2G, chunk size: gran_size, ..., 4G. so need (2+13)*6 */
+#define NUM_RESULT 90
 
-	return nr_range;
-}
+static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
+static struct res_range __initdata range_new[RANGE_NUM];
+static unsigned long __initdata min_loss_pfn[RANGE_NUM];
 
 static int __init mtrr_cleanup(unsigned address_bits)
 {
 	unsigned long extra_remove_base, extra_remove_size;
 	unsigned long i, base, size, def, dummy;
-	struct res_range range[RANGE_NUM];
 	mtrr_type type;
-	int nr_range;
+	int nr_range, nr_range_new;
+	u64 chunk_size, gran_size;
+	unsigned long range_sums, range_sums_new;
+	int index_good;
+	int num_reg_good;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1038,10 +1163,20 @@ static int __init mtrr_cleanup(unsigned 
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
 
+	/* get it and store it aside */
+	memset(range_state, 0, sizeof(range_state));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		range_state[i].base_pfn = base;
+		range_state[i].size_pfn = size;
+		range_state[i].type = type;
+	}
+
 	/* check entries number */
 	memset(num, 0, sizeof(num));
 	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
+		type = range_state[i].type;
+		size = range_state[i].size_pfn;
 		if (type >= MTRR_NUM_TYPES)
 			continue;
 		if (!size)
@@ -1064,13 +1199,159 @@ static int __init mtrr_cleanup(unsigned 
 		extra_remove_base = 1 << (32 - PAGE_SHIFT);
 		extra_remove_size = (mtrr_tom2>>PAGE_SHIFT) - extra_remove_base;
 	}
-	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size);
+	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
+					  extra_remove_size);
+	range_sums = sum_ranges(range, nr_range);
+	printk(KERN_INFO "total RAM coverred: %ldM\n",
+			 range_sums >> (20 - PAGE_SHIFT));
+
+	if (mtrr_chunk_size && mtrr_gran_size) {
+		int num_reg;
+
+		/* convert ranges to var ranges state */
+		num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
+						 mtrr_gran_size);
+
+		/* we got new setting in range_state, check it */
+		memset(range_new, 0, sizeof(range_new));
+		nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+					 extra_remove_base, extra_remove_size);
+		range_sums_new = sum_ranges(range_new, nr_range_new);
+
+		i = 0;
+		result[i].chunk_sizek = mtrr_chunk_size >> 10;
+		result[i].gran_sizek = mtrr_gran_size >> 10;
+		result[i].num_reg = num_reg;
+		if (range_sums < range_sums_new) {
+			result[i].lose_cover_sizek = (range_sums_new - range_sums) << (PAGE_SHIFT - 10);
+			result[i].bad = 1;
+		} else
+			result[i].lose_cover_sizek = (range_sums - range_sums_new) << (PAGE_SHIFT - 10);
+
+		printk(KERN_INFO " %sgran_size: %ldM  \tchunk_size: %ldM  \t",
+			 result[i].bad?" BAD ":"", result[i].gran_sizek >> 10,
+			 result[i].chunk_sizek >> 10);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
+			 result[i].num_reg, result[i].bad?"-":"",
+			 result[i].lose_cover_sizek >> 10);
+		if (!result[i].bad) {
+			set_var_mtrr_all(address_bits);
+			return 1;
+		}
+		printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, will find optimal one\n");
+		memset(result, 0, sizeof(result[0]));
+	}
+
+	i = 0;
+	memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
+	memset(result, 0, sizeof(result));
+	for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
+		for (chunk_size = gran_size; chunk_size < (1ULL<<33);
+		     chunk_size <<= 1) {
+			int num_reg;
+
+			printk(KERN_INFO "\ngran_size: %lldM   chunk_size_size: %lldM\n", gran_size >> 20, chunk_size >> 20);
+			if (i >= NUM_RESULT)
+				continue;
+
+			/* convert ranges to var ranges state */
+			num_reg = x86_setup_var_mtrrs(range, nr_range,
+							 chunk_size, gran_size);
+
+			/* we got new setting in range_state, check it */
+			memset(range_new, 0, sizeof(range_new));
+			nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+					 extra_remove_base, extra_remove_size);
+			range_sums_new = sum_ranges(range_new, nr_range_new);
+
+			result[i].chunk_sizek = chunk_size >> 10;
+			result[i].gran_sizek = gran_size >> 10;
+			result[i].num_reg = num_reg;
+			if (range_sums < range_sums_new) {
+				result[i].lose_cover_sizek = (range_sums_new - range_sums) << (PAGE_SHIFT - 10);
+				result[i].bad = 1;
+			} else
+				result[i].lose_cover_sizek = (range_sums - range_sums_new) << (PAGE_SHIFT - 10);
+
+			/* double check it */
+			if (!result[i].bad && !result[i].lose_cover_sizek) {
+				if (nr_range_new != nr_range ||
+					memcmp(range, range_new, sizeof(range)))
+						result[i].bad = 1;
+			}
+
+			if (!result[i].bad && (range_sums - range_sums_new <
+				 min_loss_pfn[num_reg]))
+					min_loss_pfn[num_reg] = range_sums - range_sums_new;
+			i++;
+		}
+	}
 
-	/* convert ranges to var ranges state */
-	x86_setup_var_mtrrs(range, nr_range, address_bits);
+	/* print out all */
+	for (i = 0; i < NUM_RESULT; i++) {
+		printk(KERN_INFO "%sgran_size: %ldM  \tchunk_size: %ldM  \t",
+			 result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
+			 result[i].chunk_sizek >> 10);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
+			 result[i].num_reg, result[i].bad?"-":"",
+			 result[i].lose_cover_sizek >> 10);
+	}
+
+	/* try to find the optimal index */
+	if (nr_mtrr_spare_reg >= num_var_ranges)
+		nr_mtrr_spare_reg = num_var_ranges - 1;
+	num_reg_good = -1;
+	for (i = 1; i < num_var_ranges + 1 - nr_mtrr_spare_reg; i++) {
+		if (!min_loss_pfn[i]) {
+			num_reg_good = i;
+			break;
+		}
+	}
+
+	index_good = -1;
+	if (num_reg_good != -1) {
+		for (i = 0; i < NUM_RESULT; i++) {
+			if (!result[i].bad &&
+			    result[i].num_reg == num_reg_good &&
+			    !result[i].lose_cover_sizek) {
+				index_good = i;
+				break;
+			}
+		}
+	}
 
-	return 1;
+	if (index_good != -1) {
+		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
+		i = index_good;
+		printk(KERN_INFO "gran_size: %ldM  \tchunk_size: %ldM  \t",
+				result[i].gran_sizek >> 10,
+				result[i].chunk_sizek >> 10);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %ldM \n",
+				result[i].num_reg,
+				result[i].lose_cover_sizek >> 10);
+		/* convert ranges to var ranges state */
+		chunk_size = result[i].chunk_sizek;
+		chunk_size <<= 10;
+		gran_size = result[i].gran_sizek;
+		gran_size <<= 10;
+		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
+		set_var_mtrr_all(address_bits);
+		return 1;
+	}
+
+	printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
+	printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
+
+	return 0;
+}
+#else
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+	return 0;
 }
+#endif
+
+static int __initdata changed_by_mtrr_cleanup;
 
 static int disable_mtrr_trim;
 
@@ -1111,7 +1392,8 @@ int __init amd_special_default_mtrr(void
 	return 0;
 }
 
-static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
+static u64 __init real_trim_memory(unsigned long start_pfn,
+				   unsigned long limit_pfn)
 {
 	u64 trim_start, trim_size;
 	trim_start = start_pfn;
@@ -1138,9 +1420,8 @@ int __init mtrr_trim_uncached_memory(uns
 {
 	unsigned long i, base, size, highest_pfn = 0, def, dummy;
 	mtrr_type type;
-	struct res_range range[RANGE_NUM];
 	int nr_range;
-	u64 total_real_trim_size;
+	u64 total_trim_size;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1155,11 +1436,22 @@ int __init mtrr_trim_uncached_memory(uns
 	if (def != MTRR_TYPE_UNCACHABLE)
 		return 0;
 
-	/* Find highest cached pfn */
+	/* get it and store it aside */
+	memset(range_state, 0, sizeof(range_state));
 	for (i = 0; i < num_var_ranges; i++) {
 		mtrr_if->get(i, &base, &size, &type);
+		range_state[i].base_pfn = base;
+		range_state[i].size_pfn = size;
+		range_state[i].type = type;
+	}
+
+	/* Find highest cached pfn */
+	for (i = 0; i < num_var_ranges; i++) {
+		type = range_state[i].type;
 		if (type != MTRR_TYPE_WRBACK)
 			continue;
+		base = range_state[i].base_pfn;
+		size = range_state[i].size_pfn;
 		if (highest_pfn < base + size)
 			highest_pfn = base + size;
 	}
@@ -1177,9 +1469,10 @@ int __init mtrr_trim_uncached_memory(uns
 	/* check entries number */
 	memset(num, 0, sizeof(num));
 	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
+		type = range_state[i].type;
 		if (type >= MTRR_NUM_TYPES)
 			continue;
+		size = range_state[i].size_pfn;
 		if (!size)
 			type = MTRR_NUM_TYPES;
 		num[type]++;
@@ -1205,26 +1498,28 @@ int __init mtrr_trim_uncached_memory(uns
 	}
 	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
 
-	total_real_trim_size = 0;
+	total_trim_size = 0;
 	/* check the head */
 	if (range[0].start)
-		total_real_trim_size += real_trim_memory(0, range[0].start);
+		total_trim_size += real_trim_memory(0, range[0].start);
 	/* check the holes */
 	for (i = 0; i < nr_range - 1; i++) {
 		if (range[i].end + 1 < range[i+1].start)
-			total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start);
+			total_trim_size += real_trim_memory(range[i].end + 1,
+							    range[i+1].start);
 	}
 	/* check the top */
 	i = nr_range - 1;
 	if (range[i].end + 1 < end_pfn)
-		total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn);
+		total_trim_size += real_trim_memory(range[i].end + 1,
+							 end_pfn);
 
-	if (total_real_trim_size) {
+	if (total_trim_size) {
 		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
 			" all of memory, losing %lluMB of RAM.\n",
-			total_real_trim_size >> 20);
+			total_trim_size >> 20);
 
-		if (enable_mtrr_cleanup < 1)
+		if (!changed_by_mtrr_cleanup)
 			WARN_ON(1);
 
 		printk(KERN_INFO "update e820 for mtrr\n");
@@ -1314,8 +1609,10 @@ void __init mtrr_bp_init(void)
 		if (use_intel()) {
 			get_mtrr_state();
 
-			if (mtrr_cleanup(phys_addr))
+			if (mtrr_cleanup(phys_addr)) {
+				changed_by_mtrr_cleanup = 1;
 				mtrr_if->set_all();
+			}
 
 		}
 	}
@@ -1355,7 +1652,7 @@ static int __init mtrr_init_finialize(vo
 	if (!mtrr_if)
 		return 0;
 	if (use_intel()) {
-		if (enable_mtrr_cleanup < 1)
+		if (!changed_by_mtrr_cleanup)
 			mtrr_state_warn();
 	} else {
 		/* The CPUs haven't MTRR and seem to not support SMP. They have
Index: linux-2.6/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.orig/Documentation/kernel-parameters.txt
+++ linux-2.6/Documentation/kernel-parameters.txt
@@ -613,8 +613,17 @@ and is between 256 and 4096 characters. 
 			that could hold holes aka. UC entries.
 
 	mtrr_gran_size=nn[KMG] [X86]
-			used for mtrr cleanup. It is granity of mtrr block.
-			Big value could prevent small alignment use up MTRRs.
+			Used for mtrr cleanup. It is granularity of mtrr block.
+			Default is 1.
+			Large value could prevent small alignment from
+			using up MTRRs.
+
+	mtrr_spare_reg_nr=n [X86]
+			Format: <integer>
+			Range: 0,7 : spare reg number
+			Default : 1
+			Used for mtrr cleanup. It is spare mtrr entries number.
+			Set to 2 or more if your graphical card needs more.
 
 	disable_mtrr_trim [X86, Intel and AMD only]
 			By default the kernel will trim any uncacheable
Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig
+++ linux-2.6/arch/x86/Kconfig
@@ -1100,6 +1100,15 @@ config MTRR_SANITIZER_ENABLE_DEFAULT
 	help
 	  Enable mtrr cleanup default value
 
+config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
+	int "MTRR cleanup spare reg num (0-7)"
+	range 0 7
+	default "1"
+	depends on MTRR_SANITIZER
+	help
+	  mtrr cleanup spare entries default, it can be changed via
+	  mtrr_spare_reg_nr=
+
 config X86_PAT
 	bool
 	prompt "x86 PAT support"

^ permalink raw reply	[flat|nested] 89+ messages in thread

end of thread, other threads:[~2008-05-02  9:40 UTC | newest]

Thread overview: 89+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-04-28  6:37 [PATCH] x86: mtrr cleanup for converting continuous to discrete layout Yinghai Lu
2008-04-28  9:06 ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v2 Yinghai Lu
2008-04-28 13:08   ` Ingo Molnar
2008-04-28 13:49     ` Arjan van de Ven
2008-04-28 15:28       ` Mika Fischer
2008-04-28  5:50         ` Arjan van de Ven
2008-04-28 16:01         ` Gabriel C
2008-04-28 16:28           ` Mika Fischer
2008-04-28 19:44   ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v3 Yinghai Lu
2008-04-28 20:15     ` Ingo Molnar
2008-04-28 20:18       ` Yinghai Lu
2008-04-28 20:29         ` Ingo Molnar
2008-04-28 20:16     ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v4 Yinghai Lu
2008-04-28 22:05       ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5 Yinghai Lu
2008-04-28 22:36         ` Randy Dunlap
2008-04-28 22:47           ` Yinghai Lu
2008-04-29  2:42         ` Andrew Morton
2008-04-29  3:01           ` Yinghai Lu
     [not found]         ` <200804290157.30651.yhlu.kernel@gmail.com>
2008-04-29  8:59           ` [PATCH 2/2] x86: fix trimming e820 with MTRR holes Yinghai Lu
2008-04-29 11:35             ` Ingo Molnar
2008-04-29 17:18               ` Yinghai Lu
2008-04-29 17:20                 ` Yinghai Lu
2008-04-30  3:25             ` [PATCH] x86: fix trimming e820 with MTRR holes. - fix Yinghai Lu
2008-04-30 12:09               ` Ingo Molnar
2008-04-29  9:00         ` [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v7 Yinghai Lu
2008-04-29  9:47           ` Gabriel C
2008-04-29 10:30             ` Yinghai Lu
2008-04-29 10:56               ` Yinghai Lu
2008-04-29 11:26                 ` Ingo Molnar
2008-04-29 11:51                 ` Gabriel C
2008-04-29 17:11                   ` Yinghai Lu
2008-04-29 20:25                     ` Gabriel C
2008-04-29 21:49                       ` Yinghai Lu
2008-04-29 23:56                         ` Gabriel C
2008-04-30  0:06                           ` Gabriel C
2008-04-30  0:38                             ` Yinghai Lu
2008-04-30  1:02                               ` Gabriel C
2008-04-30  3:00                                 ` Yinghai Lu
2008-04-30  3:29                                   ` Yinghai Lu
2008-04-30  4:12                                     ` Gabriel C
2008-04-30  4:25                                       ` Yinghai Lu
2008-04-30 12:04                                         ` Gabriel C
2008-04-30 16:26                                           ` Yinghai Lu
2008-04-30  0:13                           ` Yinghai Lu
2008-04-29 10:52           ` [PATCH 1/2] x86: mtrr cleanup for converting continuous to discrete layout v8 Yinghai Lu
2008-04-29 13:07             ` Ingo Molnar
2008-04-29 17:25               ` Yinghai Lu
2008-04-29 20:46             ` Randy Dunlap
2008-04-29 21:54               ` Yinghai Lu
2008-04-30  3:25             ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v8 - fix Yinghai Lu
2008-04-30 12:09               ` Ingo Molnar
2008-05-01  8:00               ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect Yinghai Lu
2008-05-01 11:45                 ` Gabriel C
2008-05-02  0:06                   ` Yinghai Lu
2008-05-02  0:29                     ` Gabriel C
2008-05-02  0:35                       ` Yinghai Lu
2008-05-02  1:18                         ` Gabriel C
2008-05-02  1:55                           ` Yinghai Lu
2008-05-01 12:09                 ` Mika Fischer
2008-05-01 16:35                   ` Yinghai Lu
2008-05-01 16:59                     ` Mika Fischer
2008-05-01 17:40                       ` Yinghai Lu
2008-05-01 15:09                 ` Randy Dunlap
2008-05-01 16:38                   ` Yinghai Lu
2008-05-01 18:57                 ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v2 Yinghai Lu
2008-05-01 19:42                   ` H. Peter Anvin
2008-05-01 21:02                     ` Yinghai Lu
2008-05-01 21:10                       ` H. Peter Anvin
2008-05-01 21:20                         ` Yinghai Lu
2008-05-01 21:26                           ` H. Peter Anvin
2008-05-01 21:31                             ` Yinghai Lu
2008-05-01 21:33                               ` H. Peter Anvin
2008-05-01 21:44                                 ` Yinghai Lu
2008-05-01 21:49                                   ` H. Peter Anvin
2008-05-01 22:52                                     ` Yinghai Lu
2008-05-01 22:57                                       ` H. Peter Anvin
2008-05-01 23:10                                         ` Yinghai Lu
2008-05-02  0:52                   ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v3 Yinghai Lu
2008-05-02  9:40                     ` [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect v4 Yinghai Lu
2008-04-29 19:00         ` [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v5 Eric W. Biederman
2008-04-29 20:04           ` Yinghai Lu
2008-04-29 20:29             ` Eric W. Biederman
2008-04-29 21:57               ` Yinghai Lu
2008-04-29 22:09                 ` Ingo Molnar
2008-04-29 22:18                   ` Yinghai Lu
2008-04-29 22:14                 ` Eric W. Biederman
2008-04-29 22:54                   ` Thomas Gleixner
2008-04-30  1:16                     ` Eric W. Biederman
2008-04-30  9:57                       ` Alan Cox

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).