* [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table
@ 2006-03-23 18:22 Arjan van de Ven
2006-03-23 17:56 ` Andi Kleen
0 siblings, 1 reply; 21+ messages in thread
From: Arjan van de Ven @ 2006-03-23 18:22 UTC (permalink / raw)
To: linux-kernel; +Cc: akpm, ak
Hi,
There have been several machines that don't have a working MMCONFIG,
often because of a buggy MCFG table in the ACPI bios. This patch adds a
simple sanity check that detects a whole bunch of these cases, and when
it detects it, linux now boots rather than crash-and-burns.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
diff -purN linux-2.6.16/arch/i386/kernel/setup.c linux-2.6.16-mmconfig/arch/i386/kernel/setup.c
--- linux-2.6.16/arch/i386/kernel/setup.c 2006-03-20 06:53:29.000000000 +0100
+++ linux-2.6.16-mmconfig/arch/i386/kernel/setup.c 2006-03-23 17:26:13.000000000 +0100
@@ -1377,6 +1377,31 @@ static void __init register_memory(void)
pci_mem_start, gapstart, gapsize);
}
+
+
+/*
+ * Check if an address is reserved in the e820 map
+ */
+int is_e820_reserved(u64 address)
+{
+ int i;
+ i = e820.nr_map;
+ while (--i >= 0) {
+ unsigned long long start = e820.map[i].addr;
+ unsigned long long end = start + e820.map[i].size;
+
+ if (address <=end && address >= start) {
+ if (e820.map[i].type == E820_RESERVED)
+ return 1;
+ else
+ return 0;
+ }
+ }
+ return 0;
+}
+
+
+
/* Use inline assembly to define this because the nops are defined
as inline assembly strings in the include files and we cannot
get them easily into strings. */
diff -purN linux-2.6.16/arch/i386/pci/mmconfig.c linux-2.6.16-mmconfig/arch/i386/pci/mmconfig.c
--- linux-2.6.16/arch/i386/pci/mmconfig.c 2006-03-20 06:53:29.000000000 +0100
+++ linux-2.6.16-mmconfig/arch/i386/pci/mmconfig.c 2006-03-23 17:26:13.000000000 +0100
@@ -12,6 +12,7 @@
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/acpi.h>
+#include <asm/e820.h>
#include "pci.h"
#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
@@ -183,6 +184,17 @@ static int __init pci_mmcfg_init(void)
(pci_mmcfg_config[0].base_address == 0))
goto out;
+ /*
+ * several bioses have a buggy MCFG table. While this is hard
+ * to test for conclusively, we know the value is defective
+ * if the memory isn't marked reserved in the e820 table
+ */
+ if (!is_e820_reserved(pci_mmcfg_config[0].base_address)) {
+ printk(KERN_INFO "PCI: BIOS Bug: MCFG area is not reserved\n");
+ printk(KERN_INFO "PCI: Not using MMCONFIG.\n");
+ goto out;
+ }
+
printk(KERN_INFO "PCI: Using MMCONFIG\n");
raw_pci_ops = &pci_mmcfg;
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
diff -purN linux-2.6.16/arch/x86_64/kernel/e820.c linux-2.6.16-mmconfig/arch/x86_64/kernel/e820.c
--- linux-2.6.16/arch/x86_64/kernel/e820.c 2006-03-20 06:53:29.000000000 +0100
+++ linux-2.6.16-mmconfig/arch/x86_64/kernel/e820.c 2006-03-23 17:27:04.000000000 +0100
@@ -639,3 +639,25 @@ __init void e820_setup_gap(void)
printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
pci_mem_start, gapstart, gapsize);
}
+
+/*
+ * Check if an address is reserved in the e820 map
+ */
+int is_e820_reserved(u64 address)
+{
+ int i;
+ i = e820.nr_map;
+ while (--i >= 0) {
+ unsigned long long start = e820.map[i].addr;
+ unsigned long long end = start + e820.map[i].size;
+
+ if (address <=end && address >= start) {
+ if (e820.map[i].type == E820_RESERVED)
+ return 1;
+ else
+ return 0;
+ }
+ }
+ return 0;
+}
+
diff -purN linux-2.6.16/arch/x86_64/pci/mmconfig.c linux-2.6.16-mmconfig/arch/x86_64/pci/mmconfig.c
--- linux-2.6.16/arch/x86_64/pci/mmconfig.c 2006-03-20 06:53:29.000000000 +0100
+++ linux-2.6.16-mmconfig/arch/x86_64/pci/mmconfig.c 2006-03-23 17:34:06.000000000 +0100
@@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/bitmap.h>
+#include <asm/e820.h>
#include "pci.h"
#define MMCONFIG_APER_SIZE (256*1024*1024)
@@ -161,6 +162,19 @@ static int __init pci_mmcfg_init(void)
(pci_mmcfg_config[0].base_address == 0))
return 0;
+
+ /*
+ * several bioses have a buggy MCFG table. While this is hard
+ * to test for conclusively, we know the value is defective
+ * if the memory isn't marked reserved in the e820 table
+ */
+ if (!is_e820_reserved(pci_mmcfg_config[0].base_address)) {
+ printk(KERN_INFO "PCI: BIOS Bug: MCFG area is not reserved\n");
+ printk(KERN_INFO "PCI: Not using MMCONFIG.\n");
+ return 0;
+ }
+
+
/* RED-PEN i386 doesn't do _nocache right now */
pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL);
if (pci_mmcfg_virt == NULL) {
diff -purN linux-2.6.16/include/asm-i386/e820.h linux-2.6.16-mmconfig/include/asm-i386/e820.h
--- linux-2.6.16/include/asm-i386/e820.h 2006-03-20 06:53:29.000000000 +0100
+++ linux-2.6.16-mmconfig/include/asm-i386/e820.h 2006-03-23 17:26:13.000000000 +0100
@@ -35,6 +35,9 @@ struct e820map {
};
extern struct e820map e820;
+
+extern int is_e820_reserved(u64 address);
+
#endif/*!__ASSEMBLY__*/
#endif/*__E820_HEADER*/
diff -purN linux-2.6.16/include/asm-x86_64/e820.h linux-2.6.16-mmconfig/include/asm-x86_64/e820.h
--- linux-2.6.16/include/asm-x86_64/e820.h 2006-03-20 06:53:29.000000000 +0100
+++ linux-2.6.16-mmconfig/include/asm-x86_64/e820.h 2006-03-23 17:27:41.000000000 +0100
@@ -47,6 +47,7 @@ extern void contig_e820_setup(void);
extern unsigned long e820_end_of_ram(void);
extern void e820_reserve_resources(void);
extern void e820_print_map(char *who);
+extern int is_e820_reserved(u64 address);
extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end);
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table
2006-03-23 18:22 [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table Arjan van de Ven
@ 2006-03-23 17:56 ` Andi Kleen
2006-03-23 19:02 ` Arjan van de Ven
0 siblings, 1 reply; 21+ messages in thread
From: Andi Kleen @ 2006-03-23 17:56 UTC (permalink / raw)
To: Arjan van de Ven; +Cc: linux-kernel, akpm
On Thursday 23 March 2006 19:22, Arjan van de Ven wrote:
> Hi,
>
> There have been several machines that don't have a working MMCONFIG,
> often because of a buggy MCFG table in the ACPI bios. This patch adds a
> simple sanity check that detects a whole bunch of these cases, and when
> it detects it, linux now boots rather than crash-and-burns.
Yes, MCFG is a pain recently. Looks like we did the grave mistake
of using something in the BIOS before Windows again.
> +
> +/*
> + * Check if an address is reserved in the e820 map
> + */
> +int is_e820_reserved(u64 address)
> +{
> + int i;
> + i = e820.nr_map;
> + while (--i >= 0) {
> + unsigned long long start = e820.map[i].addr;
> + unsigned long long end = start + e820.map[i].size;
> +
> + if (address <=end && address >= start) {
> + if (e820.map[i].type == E820_RESERVED)
> + return 1;
> + else
> + return 0;
> + }
> + }
> + return 0;
> +}
That is e820_mapped(address, address+size, E820_RESERVED)
And not having a size is definitely wrong on i386 too.
-Andi
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table
2006-03-23 17:56 ` Andi Kleen
@ 2006-03-23 19:02 ` Arjan van de Ven
2006-03-23 19:15 ` Arjan van de Ven
0 siblings, 1 reply; 21+ messages in thread
From: Arjan van de Ven @ 2006-03-23 19:02 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel, akpm
> That is e820_mapped(address, address+size, E820_RESERVED)
>
> And not having a size is definitely wrong on i386 too.
s/wrong/not selective enough/
and e820_mapped doesn't check this either anyway, at least not the way
you imply it does.
I'll do a new patch using this for x86_64 though, no need to make a
second function like this.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table
2006-03-23 19:02 ` Arjan van de Ven
@ 2006-03-23 19:15 ` Arjan van de Ven
2006-03-24 12:19 ` Andi Kleen
2006-03-24 15:22 ` [patch] Ignore MCFG if the mmconfig area isn't reserved in thee820 table Ashok Raj
0 siblings, 2 replies; 21+ messages in thread
From: Arjan van de Ven @ 2006-03-23 19:15 UTC (permalink / raw)
To: Andi Kleen; +Cc: akpm, linux-kernel
On Thu, 2006-03-23 at 20:02 +0100, Arjan van de Ven wrote:
> > That is e820_mapped(address, address+size, E820_RESERVED)
> >
> > And not having a size is definitely wrong on i386 too.
>
> s/wrong/not selective enough/
>
> and e820_mapped doesn't check this either anyway, at least not the way
> you imply it does.
>
> I'll do a new patch using this for x86_64 though, no need to make a
> second function like this.
There have been several machines that don't have a working MMCONFIG,
often because of a buggy MCFG table in the ACPI bios. This patch adds a
simple sanity check that detects a whole bunch of these cases, and when
it detects it, linux now boots rather than crash-and-burns. The accuracy
of this detection can in principle be improved if there was a "is this
entire range in e820 with THIS attribute", but no such function exist
and the complexity needed for this is not really worth it; this simple
check already catches most cases anyway.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
diff -purN linux-2.6.16/arch/i386/kernel/setup.c linux-2.6.16-mmconfig/arch/i386/kernel/setup.c
--- linux-2.6.16/arch/i386/kernel/setup.c 2006-03-20 06:53:29.000000000 +0100
+++ linux-2.6.16-mmconfig/arch/i386/kernel/setup.c 2006-03-23 20:06:22.000000000 +0100
@@ -1377,6 +1377,27 @@ static void __init register_memory(void)
pci_mem_start, gapstart, gapsize);
}
+/*
+ * Check if an address is reserved in the e820 map
+ */
+int is_e820_reserved(u64 address)
+{
+ int i;
+ i = e820.nr_map;
+ while (--i >= 0) {
+ unsigned long long start = e820.map[i].addr;
+ unsigned long long end = start + e820.map[i].size;
+
+ if (address <=end && address >= start) {
+ if (e820.map[i].type == E820_RESERVED)
+ return 1;
+ else
+ return 0;
+ }
+ }
+ return 0;
+}
+
/* Use inline assembly to define this because the nops are defined
as inline assembly strings in the include files and we cannot
get them easily into strings. */
diff -purN linux-2.6.16/arch/i386/pci/mmconfig.c linux-2.6.16-mmconfig/arch/i386/pci/mmconfig.c
--- linux-2.6.16/arch/i386/pci/mmconfig.c 2006-03-20 06:53:29.000000000 +0100
+++ linux-2.6.16-mmconfig/arch/i386/pci/mmconfig.c 2006-03-23 20:06:22.000000000 +0100
@@ -12,6 +12,7 @@
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/acpi.h>
+#include <asm/e820.h>
#include "pci.h"
#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
@@ -183,6 +184,17 @@ static int __init pci_mmcfg_init(void)
(pci_mmcfg_config[0].base_address == 0))
goto out;
+ /*
+ * several bioses have a buggy MCFG table. While this is hard
+ * to test for conclusively, we know the value is defective
+ * if the memory isn't marked reserved in the e820 table
+ */
+ if (!is_e820_reserved(pci_mmcfg_config[0].base_address)) {
+ printk(KERN_INFO "PCI: BIOS Bug: MCFG area is not reserved\n");
+ printk(KERN_INFO "PCI: Not using MMCONFIG.\n");
+ goto out;
+ }
+
printk(KERN_INFO "PCI: Using MMCONFIG\n");
raw_pci_ops = &pci_mmcfg;
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
diff -purN linux-2.6.16/arch/x86_64/kernel/e820.c linux-2.6.16-mmconfig/arch/x86_64/kernel/e820.c
--- linux-2.6.16/arch/x86_64/kernel/e820.c 2006-03-20 06:53:29.000000000 +0100
+++ linux-2.6.16-mmconfig/arch/x86_64/kernel/e820.c 2006-03-23 20:05:33.000000000 +0100
@@ -80,6 +80,11 @@ static inline int bad_addr(unsigned long
return 0;
}
+/*
+ * This function returns 1 if any part of the <start, end> range is in the
+ * E820 map having "type". There may be parts in this range that are not in
+ * E820 at all and/or parts with different types in addition.
+ */
int __init e820_mapped(unsigned long start, unsigned long end, unsigned type)
{
int i;
diff -purN linux-2.6.16/arch/x86_64/pci/mmconfig.c linux-2.6.16-mmconfig/arch/x86_64/pci/mmconfig.c
--- linux-2.6.16/arch/x86_64/pci/mmconfig.c 2006-03-20 06:53:29.000000000 +0100
+++ linux-2.6.16-mmconfig/arch/x86_64/pci/mmconfig.c 2006-03-23 20:08:21.000000000 +0100
@@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/bitmap.h>
+#include <asm/e820.h>
#include "pci.h"
#define MMCONFIG_APER_SIZE (256*1024*1024)
@@ -161,6 +162,19 @@ static int __init pci_mmcfg_init(void)
(pci_mmcfg_config[0].base_address == 0))
return 0;
+ /*
+ * several bioses have a buggy MCFG table. While this is hard
+ * to test for conclusively, we know the value is defective
+ * if the memory isn't marked reserved in the e820 table
+ */
+ if (!e820_mapped(pci_mmcfg_config[0].base_address,
+ pci_mmcfg_config[0].base_address + MMCONFIG_APER_SIZE,
+ E820_RESERVED)) {
+ printk(KERN_INFO "PCI: BIOS Bug: MCFG area is not reserved\n");
+ printk(KERN_INFO "PCI: Not using MMCONFIG.\n");
+ return 0;
+ }
+
/* RED-PEN i386 doesn't do _nocache right now */
pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL);
if (pci_mmcfg_virt == NULL) {
diff -purN linux-2.6.16/include/asm-i386/e820.h linux-2.6.16-mmconfig/include/asm-i386/e820.h
--- linux-2.6.16/include/asm-i386/e820.h 2006-03-20 06:53:29.000000000 +0100
+++ linux-2.6.16-mmconfig/include/asm-i386/e820.h 2006-03-23 20:06:22.000000000 +0100
@@ -35,6 +35,9 @@ struct e820map {
};
extern struct e820map e820;
+
+extern int is_e820_reserved(u64 address);
+
#endif/*!__ASSEMBLY__*/
#endif/*__E820_HEADER*/
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table
2006-03-23 19:15 ` Arjan van de Ven
@ 2006-03-24 12:19 ` Andi Kleen
2006-03-24 13:36 ` Arjan van de Ven
2006-03-24 21:25 ` Greg KH
2006-03-24 15:22 ` [patch] Ignore MCFG if the mmconfig area isn't reserved in thee820 table Ashok Raj
1 sibling, 2 replies; 21+ messages in thread
From: Andi Kleen @ 2006-03-24 12:19 UTC (permalink / raw)
To: Arjan van de Ven; +Cc: akpm, linux-kernel, gregkh
Arjan van de Ven <arjan@linux.intel.com> writes:
> On Thu, 2006-03-23 at 20:02 +0100, Arjan van de Ven wrote:
> > > That is e820_mapped(address, address+size, E820_RESERVED)
> > >
> > > And not having a size is definitely wrong on i386 too.
> >
> > s/wrong/not selective enough/
> >
> > and e820_mapped doesn't check this either anyway, at least not the way
> > you imply it does.
> >
> > I'll do a new patch using this for x86_64 though, no need to make a
> > second function like this.
>
>
> There have been several machines that don't have a working MMCONFIG,
> often because of a buggy MCFG table in the ACPI bios. This patch adds a
> simple sanity check that detects a whole bunch of these cases, and when
> it detects it, linux now boots rather than crash-and-burns. The accuracy
> of this detection can in principle be improved if there was a "is this
> entire range in e820 with THIS attribute", but no such function exist
> and the complexity needed for this is not really worth it; this simple
> check already catches most cases anyway.
I added the patch to my patchkit now. I also have an older patch (needs a bit
more cleanup) that checks for all busses if they are reachable using MCFG
Still needs some more work and interaction check with PCI hotplug though.
-Andi
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table
2006-03-24 12:19 ` Andi Kleen
@ 2006-03-24 13:36 ` Arjan van de Ven
2006-03-24 21:25 ` Greg KH
1 sibling, 0 replies; 21+ messages in thread
From: Arjan van de Ven @ 2006-03-24 13:36 UTC (permalink / raw)
To: Andi Kleen; +Cc: akpm, linux-kernel, gregkh
Andi Kleen wrote:
> I added the patch to my patchkit now.
thanks
> I also have an older patch (needs a bit
> more cleanup) that checks for all busses if they are reachable using MCFG
> Still needs some more work and interaction check with PCI hotplug though.
>
yes more advanced tests are going to be useful; at least this one was simple and catches
a large portion of the problem cases.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table
2006-03-24 12:19 ` Andi Kleen
2006-03-24 13:36 ` Arjan van de Ven
@ 2006-03-24 21:25 ` Greg KH
1 sibling, 0 replies; 21+ messages in thread
From: Greg KH @ 2006-03-24 21:25 UTC (permalink / raw)
To: Andi Kleen; +Cc: Arjan van de Ven, akpm, linux-kernel, gregkh
On Fri, Mar 24, 2006 at 01:19:02PM +0100, Andi Kleen wrote:
> Arjan van de Ven <arjan@linux.intel.com> writes:
>
> > On Thu, 2006-03-23 at 20:02 +0100, Arjan van de Ven wrote:
> > > > That is e820_mapped(address, address+size, E820_RESERVED)
> > > >
> > > > And not having a size is definitely wrong on i386 too.
> > >
> > > s/wrong/not selective enough/
> > >
> > > and e820_mapped doesn't check this either anyway, at least not the way
> > > you imply it does.
> > >
> > > I'll do a new patch using this for x86_64 though, no need to make a
> > > second function like this.
> >
> >
> > There have been several machines that don't have a working MMCONFIG,
> > often because of a buggy MCFG table in the ACPI bios. This patch adds a
> > simple sanity check that detects a whole bunch of these cases, and when
> > it detects it, linux now boots rather than crash-and-burns. The accuracy
> > of this detection can in principle be improved if there was a "is this
> > entire range in e820 with THIS attribute", but no such function exist
> > and the complexity needed for this is not really worth it; this simple
> > check already catches most cases anyway.
>
> I added the patch to my patchkit now. I also have an older patch (needs a bit
> more cleanup) that checks for all busses if they are reachable using MCFG
> Still needs some more work and interaction check with PCI hotplug though.
If you need help with that, please let me know.
Otherwise I'll let you push this to Linus when you feel it's ready :)
thanks,
greg k-h
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in thee820 table
2006-03-23 19:15 ` Arjan van de Ven
2006-03-24 12:19 ` Andi Kleen
@ 2006-03-24 15:22 ` Ashok Raj
2006-03-24 15:24 ` Arjan van de Ven
1 sibling, 1 reply; 21+ messages in thread
From: Ashok Raj @ 2006-03-24 15:22 UTC (permalink / raw)
To: Arjan van de Ven; +Cc: Andi Kleen, akpm, linux-kernel
On Thu, Mar 23, 2006 at 11:15:19AM -0800, Arjan van de Ven wrote:
>
> >
> > I'll do a new patch using this for x86_64 though, no need to make a
> > second function like this.
>
> int __init e820_mapped(unsigned long start, unsigned long end,
> unsigned type)
Why not use the same type of function like x86_64 as well instead of the newly
added is_820_mapped()? If the purpose of both functions is the same, i386 could benefit
with same style code instead of a slight variant.
--
Cheers,
Ashok Raj
- Open Source Technology Center
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in thee820 table
2006-03-24 15:22 ` [patch] Ignore MCFG if the mmconfig area isn't reserved in thee820 table Ashok Raj
@ 2006-03-24 15:24 ` Arjan van de Ven
2006-03-24 15:39 ` Andi Kleen
0 siblings, 1 reply; 21+ messages in thread
From: Arjan van de Ven @ 2006-03-24 15:24 UTC (permalink / raw)
To: Ashok Raj; +Cc: Andi Kleen, akpm, linux-kernel
Ashok Raj wrote:
> On Thu, Mar 23, 2006 at 11:15:19AM -0800, Arjan van de Ven wrote:
>> >
>> > I'll do a new patch using this for x86_64 though, no need to make a
>> > second function like this.
>>
>> int __init e820_mapped(unsigned long start, unsigned long end,
>> unsigned type)
>
>
> Why not use the same type of function like x86_64 as well instead of the newly
> added is_820_mapped()? If the purpose of both functions is the same, i386 could benefit
> with same style code instead of a slight variant.
the purpose is not the same. the e820_mapped function is far less strict in its check
(I'm still afraid it is too weak for this purpose actually)
and it's not is_e820_mapped but is_e820_reserved()
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in thee820 table
2006-03-24 15:24 ` Arjan van de Ven
@ 2006-03-24 15:39 ` Andi Kleen
2006-03-24 15:42 ` Arjan van de Ven
0 siblings, 1 reply; 21+ messages in thread
From: Andi Kleen @ 2006-03-24 15:39 UTC (permalink / raw)
To: Arjan van de Ven; +Cc: Ashok Raj, akpm, linux-kernel
On Friday 24 March 2006 16:24, Arjan van de Ven wrote:
> Ashok Raj wrote:
> > On Thu, Mar 23, 2006 at 11:15:19AM -0800, Arjan van de Ven wrote:
> >> >
> >> > I'll do a new patch using this for x86_64 though, no need to make a
> >> > second function like this.
> >>
> >> int __init e820_mapped(unsigned long start, unsigned long end,
> >> unsigned type)
> >
> >
> > Why not use the same type of function like x86_64 as well instead of the newly
> > added is_820_mapped()? If the purpose of both functions is the same, i386 could benefit
> > with same style code instead of a slight variant.
>
> the purpose is not the same. the e820_mapped function is far less strict in its check
> (I'm still afraid it is too weak for this purpose actually)
In theory they should be the same. What do you think is different?
>
> and it's not is_e820_mapped but is_e820_reserved()
That's just a special case.
-Andi
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in thee820 table
2006-03-24 15:39 ` Andi Kleen
@ 2006-03-24 15:42 ` Arjan van de Ven
2006-03-24 15:48 ` Ashok Raj
2006-03-24 15:48 ` Andi Kleen
0 siblings, 2 replies; 21+ messages in thread
From: Arjan van de Ven @ 2006-03-24 15:42 UTC (permalink / raw)
To: Andi Kleen; +Cc: Ashok Raj, akpm, linux-kernel
Andi Kleen wrote:
> In theory they should be the same. What do you think is different?
in practice the x86-64 version returns "success" if there is one byte in the entire
memory range that complies with the requested type, even if the rest of the range is
of another type. What the ideal is for the purpose here is "is the entire range reserved",
but for now I'll settle for "is the start address reserved".
(and yes you can express the "is the start address reserved" as a question to the current function for
a 1 byte range, I probably should do that I suppose)
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in thee820 table
2006-03-24 15:42 ` Arjan van de Ven
@ 2006-03-24 15:48 ` Ashok Raj
2006-03-24 15:48 ` Arjan van de Ven
2006-03-24 15:48 ` Andi Kleen
1 sibling, 1 reply; 21+ messages in thread
From: Ashok Raj @ 2006-03-24 15:48 UTC (permalink / raw)
To: Arjan van de Ven; +Cc: Andi Kleen, Ashok Raj, akpm, linux-kernel
On Fri, Mar 24, 2006 at 04:42:17PM +0100, Arjan van de Ven wrote:
> Andi Kleen wrote:
> > In theory they should be the same. What do you think is different?
>
> in practice the x86-64 version returns "success" if there is one byte in the entire
> memory range that complies with the requested type, even if the rest of the range is
> of another type. What the ideal is for the purpose here is "is the entire range reserved",
> but for now I'll settle for "is the start address reserved".
>
> (and yes you can express the "is the start address reserved" as a question to the current function for
> a 1 byte range, I probably should do that I suppose)
or why not check
if (type == ei->type && start >= ei->addr && end <= (ei->addr + ei->size))
return 1;
will this make the range check check stricter?
--
Cheers,
Ashok Raj
- Open Source Technology Center
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in thee820 table
2006-03-24 15:48 ` Ashok Raj
@ 2006-03-24 15:48 ` Arjan van de Ven
0 siblings, 0 replies; 21+ messages in thread
From: Arjan van de Ven @ 2006-03-24 15:48 UTC (permalink / raw)
To: Ashok Raj; +Cc: Andi Kleen, akpm, linux-kernel
Ashok Raj wrote:
> On Fri, Mar 24, 2006 at 04:42:17PM +0100, Arjan van de Ven wrote:
>> Andi Kleen wrote:
>>> In theory they should be the same. What do you think is different?
>> in practice the x86-64 version returns "success" if there is one byte in the entire
>> memory range that complies with the requested type, even if the rest of the range is
>> of another type. What the ideal is for the purpose here is "is the entire range reserved",
>> but for now I'll settle for "is the start address reserved".
>>
>> (and yes you can express the "is the start address reserved" as a question to the current function for
>> a 1 byte range, I probably should do that I suppose)
>
> or why not check
>
> if (type == ei->type && start >= ei->addr && end <= (ei->addr + ei->size))
> return 1;
>
> will this make the range check check stricter?
that's not going to cut it; you can have 2 e820 entries that together span the range
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in thee820 table
2006-03-24 15:42 ` Arjan van de Ven
2006-03-24 15:48 ` Ashok Raj
@ 2006-03-24 15:48 ` Andi Kleen
2006-03-24 15:50 ` Arjan van de Ven
1 sibling, 1 reply; 21+ messages in thread
From: Andi Kleen @ 2006-03-24 15:48 UTC (permalink / raw)
To: Arjan van de Ven; +Cc: Ashok Raj, akpm, linux-kernel
On Friday 24 March 2006 16:42, Arjan van de Ven wrote:
> Andi Kleen wrote:
> > In theory they should be the same. What do you think is different?
>
> in practice the x86-64 version returns "success" if there is one byte in the entire
> memory range that complies with the requested type, even if the rest of the range is
> of another type.
I would consider that a bug. Please send fix.
-Andi
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in thee820 table
2006-03-24 15:48 ` Andi Kleen
@ 2006-03-24 15:50 ` Arjan van de Ven
0 siblings, 0 replies; 21+ messages in thread
From: Arjan van de Ven @ 2006-03-24 15:50 UTC (permalink / raw)
To: Andi Kleen; +Cc: Ashok Raj, akpm, linux-kernel
Andi Kleen wrote:
> On Friday 24 March 2006 16:42, Arjan van de Ven wrote:
>> Andi Kleen wrote:
>>> In theory they should be the same. What do you think is different?
>> in practice the x86-64 version returns "success" if there is one byte in the entire
>> memory range that complies with the requested type, even if the rest of the range is
>> of another type.
>
> I would consider that a bug. Please send fix.
I'm less sure. It's what the function does, and I can see very valid usage models for it;
to detect that a certain type is NOT present. And the code is clearly written with that goal
in mind at least. I'm tempted to write a real range function but I also was hoping to avoid
doing that, since for the MCFG test it really is a bit overkill.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table.
@ 2006-05-18 2:53 Konrad Rzeszutek
2006-05-18 13:03 ` Arjan van de Ven
0 siblings, 1 reply; 21+ messages in thread
From: Konrad Rzeszutek @ 2006-05-18 2:53 UTC (permalink / raw)
To: linux-kernel, konradr, konradr, arjan
> Hi,
> There have been several machines that don't have a working MMCONFIG,
> often because of a buggy MCFG table in the ACPI bios. This patch adds a
> simple sanity check that detects a whole bunch of these cases, and when
> it detects it, linux now boots rather than crash-and-burns.
> [snip]
Arjan,
I am not sure if your analysis and your solution to the problem is correct.
It was my understanding that any memory NOT defined in the E820 tables
is NOT considered system memory. Therefore memory addresses defined in the
ACPI MCFG table do not have to show up in the E820 table.
Also the ACPI spec v3.0 (pg 405 of PDF, section 14.2, titled:
"E820 Assumptions and Limitations") agrees with this:
"The BIOS does not return a range description for the memory mapping
of PCI devices, ISA Option ROMs, and the ISA PNP cards because the OS
has mechanisms available to detect them."
(The ACPI v2.0c spec has this in section 15.2).
Obviously specifications are sometimes outdated, or assumptions are made in
some specifications which are not in other specification, so I was wondering
if you could tell me which specification defines that the memory addresses
in ACPI MCFG table have to be reserved in the E820 tables?
If this is not a specification issue, I was wondering if perhaps for the
machines you refer to, their BIOS bug is that the E820 have memory ranges
which also encompass what MMCONF points to?
Thanks!
Konrad Rzeszutek
IBM, (617)-693-1718
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table.
2006-05-18 2:53 [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table Konrad Rzeszutek
@ 2006-05-18 13:03 ` Arjan van de Ven
2006-05-18 15:56 ` Konrad Rzeszutek
0 siblings, 1 reply; 21+ messages in thread
From: Arjan van de Ven @ 2006-05-18 13:03 UTC (permalink / raw)
To: konradr; +Cc: linux-kernel, konradr
Konrad Rzeszutek wrote:
>> Hi,
>> There have been several machines that don't have a working MMCONFIG,
>> often because of a buggy MCFG table in the ACPI bios. This patch adds a
>> simple sanity check that detects a whole bunch of these cases, and when
>> it detects it, linux now boots rather than crash-and-burns.
>> [snip]
>
> Arjan,
>
> I am not sure if your analysis and your solution to the problem is correct.
> It was my understanding that any memory NOT defined in the E820 tables
> is NOT considered system memory. Therefore memory addresses defined in the
> ACPI MCFG table do not have to show up in the E820 table.
the problem is that Linux considers these 'free game' and will happily put
something like IO windows for cardbus cards there.
> Also the ACPI spec v3.0 (pg 405 of PDF, section 14.2, titled:
> "E820 Assumptions and Limitations") agrees with this:
>
> "The BIOS does not return a range description for the memory mapping
> of PCI devices, ISA Option ROMs, and the ISA PNP cards because the OS
> has mechanisms available to detect them."
MCFG is none of these...
> If this is not a specification issue, I was wondering if perhaps for the
> machines you refer to, their BIOS bug is that the E820 have memory ranges
> which also encompass what MMCONF points to?
no their bug is mostly that MCFG is garbage in those bioses. It points plain to
the wrong place. They even reserved the correct range, just pointed mcfg at the
wrong place.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table.
2006-05-18 13:03 ` Arjan van de Ven
@ 2006-05-18 15:56 ` Konrad Rzeszutek
2006-05-18 16:46 ` Arjan van de Ven
0 siblings, 1 reply; 21+ messages in thread
From: Konrad Rzeszutek @ 2006-05-18 15:56 UTC (permalink / raw)
To: Arjan van de Ven; +Cc: linux-kernel, konradr
On Thu, May 18, 2006 at 06:03:36AM -0700, Arjan van de Ven wrote:
> Konrad Rzeszutek wrote:
> >>Hi,
> >>There have been several machines that don't have a working MMCONFIG,
> >>often because of a buggy MCFG table in the ACPI bios. This patch adds a
> >>simple sanity check that detects a whole bunch of these cases, and when
> >>it detects it, linux now boots rather than crash-and-burns.
> >>[snip]
> >
> >Arjan,
> >
> >I am not sure if your analysis and your solution to the problem is
> >correct. It was my understanding that any memory NOT defined in the E820
> >tables is NOT considered system memory. Therefore memory addresses defined
> >in the ACPI MCFG table do not have to show up in the E820 table.
>
> the problem is that Linux considers these 'free game' and will happily put
> something like IO windows for cardbus cards there.
I can see this being a definite problem. I do not have machines with
ACPI MCFG and PCMCIA capability so I cannot reproduce this behavior.
Interestingly enough, somebody had a similar thought on their mind and
implemented a function 'clear_kernel_mapping' which, well, unmaps the
pages from the addresses that ACPI MCFG points to - so that it would not
be used as 'free game'.
This does not inhibit PCMCIA drivers from using other memory addresses
that are not in E820 and not in ACPI MCFG. However, those are reserved
during boot up (for example the EBDA). Also, the ACPI marks many memory
segments as reserved (which are not neccesarily marked reserved in
E820) to help the OS.
As I mentioned earlier, I do not have a machine with PCMCIA capability
and ACPI MCFG - but if you have access to such a machine, I would be
happy to look with you into creating a more comprehensive solution.
>
> >Also the ACPI spec v3.0 (pg 405 of PDF, section 14.2, titled:
> >"E820 Assumptions and Limitations") agrees with this:
> >
> >"The BIOS does not return a range description for the memory mapping
> >of PCI devices, ISA Option ROMs, and the ISA PNP cards because the OS
> >has mechanisms available to detect them."
>
> MCFG is none of these...
This is a bit of gray area. The end result of MCFG or 0xCF8/0xCFF is to
configure PCI devices and retrieve PCI memory mapped addresses (IO MMU).
0xCF8/0xCFF is limited in what it can do (only 256 bytes of
configuration date can be controlled), while MMCONFIG allows to
configure advanced features of the PCI-X and PCI Express (4KB of conf
data). Neither of these addresses - IO MMU or MCFG _has_ to show up in
the E820 tables.
Sometimes the IO MMU reserved memory shows up in the E820 as reserved.
The reason for this was to make life for the OS easier. Some OSes mark
256MB the IOMMU as reserved without bothering to check what is in there
(which can be done via querying the PCI devices and constructing a memory
table of PCI devices memory mapped addresses and how they fall in that
256MB).
The PCI Express and PCI-X configuration data can (but not always depending
on how much IOMMU the machine has set) be in the 256MB segment that the OS
reserves for IO MMU. The MMCONFIG is usually next to the start of IOMMU
memory address. On some machines that segment starts at 0xE0000000 and shows
up in the E820 tables as reserved. On other machines, this IOMMU segment is
in 0xF4000000, while MMCFG is in 0xF0000000 (64MB for MCFG, and 192MB for IOMMU).
And that memory segment (0xF0000000) does not show up in E820 table.
(FYI: No need to set up the full 256MB for MCFG as there isn't enought
PCI slots to need so much).
If the MMCONFIG falls in that assumed memory mapped 256MB - then
the quote from section 14.2 can apply to that particular situation
"The BIOS does not return a range for the memory mapping of PCI devices ...";
The end result is that the MCFG and IOMMU can point to memory ranges
reserved in E820, but do not neccesarily have to.
>
> >If this is not a specification issue, I was wondering if perhaps for the
> >machines you refer to, their BIOS bug is that the E820 have memory ranges
> >which also encompass what MMCONF points to?
>
> no their bug is mostly that MCFG is garbage in those bioses. It points
> plain to
> the wrong place. They even reserved the correct range, just pointed mcfg at
> the
> wrong place.
>
That is definitely a problem - and the "sanity-check" can definitly bail
out on those BIOSes and not crash Linux. The other side of the coin is that
BIOSes that do implement the MCFG/E820 correctly are penalized: the
MMCONFIG capability on those machines is turned off when using Linux
2.6.17.
Could you provide more details on the BIOS? Did the vendor released an updated BIOS?
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table.
2006-05-18 15:56 ` Konrad Rzeszutek
@ 2006-05-18 16:46 ` Arjan van de Ven
2006-05-18 18:06 ` Petr Vandrovec
0 siblings, 1 reply; 21+ messages in thread
From: Arjan van de Ven @ 2006-05-18 16:46 UTC (permalink / raw)
To: Konrad Rzeszutek; +Cc: linux-kernel, konradr
Konrad Rzeszutek wrote:
>>> Also the ACPI spec v3.0 (pg 405 of PDF, section 14.2, titled:
>>> "E820 Assumptions and Limitations") agrees with this:
>>>
>>> "The BIOS does not return a range description for the memory mapping
>>> of PCI devices, ISA Option ROMs, and the ISA PNP cards because the OS
>>> has mechanisms available to detect them."
>> MCFG is none of these...
>
> This is a bit of gray area.
How? The segment you quoted basically says "no need to put any "MEM" resources
into E820 because the OS will know about those from its PCI scan".
And for an OS to map pci devices like cardbus or hotplug PCI anywhere, it obviously
needs to know about PCI so this is entirely a fair assumption.
MCFG is new. It's not traditional and it generally isn't a MEM region on any
PCI device. The result is that OSes that do not know about MCFG (like Windows XP)
do not at all know that there is something there already in the address space.
Neither the wording of the spec pieces you quoted nor common sense can lead to
a "but it doesn't have to be marked reserved" interpretation... the OS just has
to know what address space is free for mapping things into, via one way or another.
> If the MMCONFIG falls in that assumed memory mapped 256MB - then
> the quote from section 14.2 can apply to that particular situation
> "The BIOS does not return a range for the memory mapping of PCI devices ...";
yes this is the MEM BARs. Different animal.
>
>>> If this is not a specification issue, I was wondering if perhaps for the
>>> machines you refer to, their BIOS bug is that the E820 have memory ranges
>>> which also encompass what MMCONF points to?
>> no their bug is mostly that MCFG is garbage in those bioses. It points
>> plain to
>> the wrong place. They even reserved the correct range, just pointed mcfg at
>> the
>> wrong place.
>>
>
> That is definitely a problem - and the "sanity-check" can definitly bail
> out on those BIOSes and not crash Linux. The other side of the coin is that
> BIOSes that do implement the MCFG/E820 correctly are penalized:
I hereby contest that it's implemented correctly if it's not marked reserved...
> the
> MMCONFIG capability on those machines is turned off when using Linux
> 2.6.17.
>
> Could you provide more details on the BIOS? Did the vendor released an updated BIOS?
there were several, some I can't talk about.
Let me ask you a counter question: could you provide more details on the opposite case, and did the
vendor release an updated, fixed BIOS ?
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table.
2006-05-18 16:46 ` Arjan van de Ven
@ 2006-05-18 18:06 ` Petr Vandrovec
2006-05-18 18:15 ` Arjan van de Ven
0 siblings, 1 reply; 21+ messages in thread
From: Petr Vandrovec @ 2006-05-18 18:06 UTC (permalink / raw)
To: Arjan van de Ven; +Cc: Konrad Rzeszutek, linux-kernel, konradr
Arjan van de Ven wrote:
> Konrad Rzeszutek wrote:
>
>> That is definitely a problem - and the "sanity-check" can definitly bail
>> out on those BIOSes and not crash Linux. The other side of the coin is
>> that BIOSes that do implement the MCFG/E820 correctly are penalized:
>
> I hereby contest that it's implemented correctly if it's not marked
> reserved...
PCI Firmware Specification 3.0
(http://www.pcisig.com/members/downloads/specifications/conventional/pcifw_r3.0.pdf),
page 42, notes for table 4-2, paragraph 2 says that firmware must report MCFG as
reserved region. Last sentence of same paragraph says that resources may be
optionally marked reserved by E820 or EFIGetMemoryMap, but must be always
reported as motherboard resources through ACPI (for exact citation please see
document itself, it is not freely available so I'm not going to copy-paste text
from it without written permission from pcisig...).
So it seems to me that BIOS not reporting MMCONFIG as reserved through E820 is
compliant, and what matters is that MMCONFIG must be reported as ACPI
motherboard resource.
Petr
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table.
2006-05-18 18:06 ` Petr Vandrovec
@ 2006-05-18 18:15 ` Arjan van de Ven
0 siblings, 0 replies; 21+ messages in thread
From: Arjan van de Ven @ 2006-05-18 18:15 UTC (permalink / raw)
To: Petr Vandrovec; +Cc: Konrad Rzeszutek, linux-kernel, konradr
Petr Vandrovec wrote:
> Arjan van de Ven wrote:
>> Konrad Rzeszutek wrote:
>>
>>> That is definitely a problem - and the "sanity-check" can definitly bail
>>> out on those BIOSes and not crash Linux. The other side of the coin
>>> is that BIOSes that do implement the MCFG/E820 correctly are penalized:
>>
>> I hereby contest that it's implemented correctly if it's not marked
>> reserved...
>
> PCI Firmware Specification 3.0
> (http://www.pcisig.com/members/downloads/specifications/conventional/pcifw_r3.0.pdf),
> page 42, notes for table 4-2, paragraph 2 says that firmware must report
> MCFG as reserved region. Last sentence of same paragraph says that
> resources may be optionally marked reserved by E820 or EFIGetMemoryMap,
resources == BARs, MCFG is a whole different beast
> but must be always reported as motherboard resources through ACPI (for
> exact citation please see document itself, it is not freely available so
> I'm not going to copy-paste text from it without written permission from
> pcisig...).
>
> So it seems to me that BIOS not reporting MMCONFIG as reserved through
> E820 is compliant, and what matters is that MMCONFIG must be reported as
> ACPI motherboard resource.
I think that's not the right interpretation; resources==BARs in this context.
I'll find a way to get that document and recheck to make sure...
^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2006-05-18 18:15 UTC | newest]
Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-03-23 18:22 [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table Arjan van de Ven
2006-03-23 17:56 ` Andi Kleen
2006-03-23 19:02 ` Arjan van de Ven
2006-03-23 19:15 ` Arjan van de Ven
2006-03-24 12:19 ` Andi Kleen
2006-03-24 13:36 ` Arjan van de Ven
2006-03-24 21:25 ` Greg KH
2006-03-24 15:22 ` [patch] Ignore MCFG if the mmconfig area isn't reserved in thee820 table Ashok Raj
2006-03-24 15:24 ` Arjan van de Ven
2006-03-24 15:39 ` Andi Kleen
2006-03-24 15:42 ` Arjan van de Ven
2006-03-24 15:48 ` Ashok Raj
2006-03-24 15:48 ` Arjan van de Ven
2006-03-24 15:48 ` Andi Kleen
2006-03-24 15:50 ` Arjan van de Ven
-- strict thread matches above, loose matches on Subject: below --
2006-05-18 2:53 [patch] Ignore MCFG if the mmconfig area isn't reserved in the e820 table Konrad Rzeszutek
2006-05-18 13:03 ` Arjan van de Ven
2006-05-18 15:56 ` Konrad Rzeszutek
2006-05-18 16:46 ` Arjan van de Ven
2006-05-18 18:06 ` Petr Vandrovec
2006-05-18 18:15 ` Arjan van de Ven
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox