* [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources
@ 2007-04-30 2:14 Robert Hancock
2007-04-30 2:59 ` Randy Dunlap
` (2 more replies)
0 siblings, 3 replies; 61+ messages in thread
From: Robert Hancock @ 2007-04-30 2:14 UTC (permalink / raw)
To: linux-kernel; +Cc: Jesse Barnes, Andi Kleen, Chuck Ebbert, Len Brown
This path adds validation of the MMCONFIG table against the ACPI reserved
motherboard resources. If the MMCONFIG table is found to be reserved in
ACPI, we don't bother checking the E820 table. The PCI Express firmware spec
apparently tells BIOS developers that reservation in ACPI is required and
E820 reservation is optional, so checking against ACPI first makes sense.
Many BIOSes don't reserve the MMCONFIG region in E820 even though it is
perfectly functional, the existing check needlessly disables MMCONFIG in
these cases.
In order to do this, MMCONFIG setup has been split into two phases. If PCI
configuration type 1 is not available (like on EFI Macs) then MMCONFIG is
enabled early as before. Otherwise, it is enabled later after the ACPI
interpreter is enabled, since we need to be able to execute control methods
in order to check the ACPI reserved resources. Presently this is just triggered
off the end of ACPI interpreter initialization.
There are a few other behavioral changes here:
-Validate all MMCONFIG configurations provided, not just the first one.
-Validate the entire required length of each configuration according to the
provided ending bus number is reserved, not just the minimum required allocation.
-Validate that the area is reserved even if we read it from the chipset directly
and not from the MCFG table. This catches the case where the BIOS didn't set the
location properly in the chipset and has mapped it over other things it shouldn't have.
This might be overly pessimistic - we might be able to instead verify that no other
reserved resources (like chipset registers) are inside this memory range.
Some testing is needed to see if this rejects MMCONFIG on all systems where it
is problematic. There were some patches floating around to read the table
location out of the chipset for Intel 915 and 965, I think the author found the
latter to be problematic since the chipset had the table mapped over top of
motherboard resources. The extra checking here may catch that case if we add
that chipset-specific support.
Applies to 2.6.21.1.
Signed-off-by: Robert Hancock <hancockr@shaw.ca>
diff -up linux-2.6.21.1/arch/i386/pci/init.c linux-2.6.21.1edit/arch/i386/pci/init.c
--- linux-2.6.21.1/arch/i386/pci/init.c 2007-04-27 15:49:26.000000000 -0600
+++ linux-2.6.21.1edit/arch/i386/pci/init.c 2007-04-29 18:36:32.000000000 -0600
@@ -12,7 +12,7 @@ static __init int pci_access_init(void)
type = pci_direct_probe();
#endif
#ifdef CONFIG_PCI_MMCONFIG
- pci_mmcfg_init(type);
+ pci_mmcfg_early_init(type);
#endif
if (raw_pci_ops)
return 0;
diff -up linux-2.6.21.1/arch/i386/pci/mmconfig-shared.c linux-2.6.21.1edit/arch/i386/pci/mmconfig-shared.c
--- linux-2.6.21.1/arch/i386/pci/mmconfig-shared.c 2007-04-27 15:49:26.000000000 -0600
+++ linux-2.6.21.1edit/arch/i386/pci/mmconfig-shared.c 2007-04-29 19:47:57.000000000 -0600
@@ -191,9 +191,77 @@ static void __init pci_mmcfg_insert_reso
}
}
-static void __init pci_mmcfg_reject_broken(int type)
+static acpi_status __init check_mcfg_resource(struct acpi_resource *res,
+ void *data)
+{
+ struct resource *mcfg_res = data;
+ struct acpi_resource_address64 address;
+ acpi_status status;
+
+ if (res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) {
+ struct acpi_resource_fixed_memory32 *fixmem32 =
+ &res->data.fixed_memory32;
+ if (!fixmem32)
+ return AE_OK;
+ if ((mcfg_res->start >= fixmem32->address) &&
+ (mcfg_res->end <= (fixmem32->address +
+ fixmem32->address_length))) {
+ mcfg_res->flags = 1;
+ return AE_CTRL_TERMINATE;
+ }
+ }
+ if ((res->type != ACPI_RESOURCE_TYPE_ADDRESS32) &&
+ (res->type != ACPI_RESOURCE_TYPE_ADDRESS64))
+ return AE_OK;
+
+ status = acpi_resource_to_address64(res, &address);
+ if (ACPI_FAILURE(status) || (address.address_length <= 0) ||
+ (address.resource_type != ACPI_MEMORY_RANGE))
+ return AE_OK;
+
+ if ((mcfg_res->start >= address.minimum) &&
+ (mcfg_res->end <=
+ (address.minimum +address.address_length))) {
+ mcfg_res->flags = 1;
+ return AE_CTRL_TERMINATE;
+ }
+ return AE_OK;
+}
+
+static acpi_status __init find_mboard_resource(acpi_handle handle, u32 lvl,
+ void *context, void **rv)
+{
+ struct resource *mcfg_res = context;
+
+ acpi_walk_resources(handle, METHOD_NAME__CRS,
+ check_mcfg_resource, context);
+
+ if (mcfg_res->flags)
+ return AE_CTRL_TERMINATE;
+
+ return AE_OK;
+}
+
+static int __init is_acpi_reserved(unsigned long start, unsigned long end)
+{
+ struct resource mcfg_res;
+
+ mcfg_res.start = start;
+ mcfg_res.end = end;
+ mcfg_res.flags = 0;
+
+ acpi_get_devices("PNP0C01", find_mboard_resource, &mcfg_res, NULL);
+
+ if( !mcfg_res.flags )
+ acpi_get_devices("PNP0C02", find_mboard_resource, &mcfg_res, NULL);
+
+ return mcfg_res.flags;
+}
+
+static void __init pci_mmcfg_reject_broken(void)
{
typeof(pci_mmcfg_config[0]) *cfg;
+ int i;
if ((pci_mmcfg_config_num == 0) ||
(pci_mmcfg_config == NULL) ||
@@ -213,18 +281,36 @@ static void __init pci_mmcfg_reject_brok
"Rejected as broken MCFG.\n");
goto reject;
}
-
- /*
- * Only do this check when type 1 works. If it doesn't work
- * assume we run on a Mac and always use MCFG
- */
- if (type == 1 && !e820_all_mapped(cfg->address,
- cfg->address + MMCONFIG_APER_MIN,
- E820_RESERVED)) {
- printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not"
- " E820-reserved\n", cfg->address);
- goto reject;
+
+ for(i=0; i < pci_mmcfg_config_num; i++) {
+ u32 size = (cfg->end_bus_number + 1) << 20;
+ cfg = &pci_mmcfg_config[i];
+ printk(KERN_NOTICE "PCI: MCFG configuration %d: base %p segment %hu buses %u - %u\n",
+ i, (void*)cfg->address, cfg->pci_segment,
+ (unsigned int)cfg->start_bus_number,
+ (unsigned int)cfg->end_bus_number);
+ if(is_acpi_reserved(cfg->address,
+ cfg->address + size - 1))
+ printk(KERN_NOTICE "PCI: MCFG area at %Lx reserved "
+ "in ACPI motherboard resources\n",
+ cfg->address);
+ else {
+ printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not "
+ "reserved in ACPI motherboard resources\n",
+ cfg->address);
+ /* Don't try to do this check unless configuration type 1 is
+ available. */
+ if((pci_probe & PCI_PROBE_CONF1) &&
+ e820_all_mapped(cfg->address,
+ cfg->address + size - 1,
+ E820_RESERVED))
+ printk(KERN_NOTICE "PCI: MCFG area at %Lx reserved in E820\n",
+ cfg->address);
+ else
+ goto reject;
+ }
}
+
return;
reject:
@@ -234,20 +320,46 @@ reject:
pci_mmcfg_config_num = 0;
}
-void __init pci_mmcfg_init(int type)
+void __init pci_mmcfg_early_init(int type)
+{
+ if ((pci_probe & PCI_PROBE_MMCONF) == 0)
+ return;
+
+ /* If type 1 access is available, no need to enable MMCONFIG yet, we can
+ defer until later when the ACPI interpreter is available to better
+ validate things. */
+ if( type == 1 )
+ return;
+
+ acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
+
+ if ((pci_mmcfg_config_num == 0) ||
+ (pci_mmcfg_config == NULL) ||
+ (pci_mmcfg_config[0].address == 0))
+ return;
+
+ if (pci_mmcfg_arch_init())
+ pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
+}
+
+void __init pci_mmcfg_late_init(void)
{
int known_bridge = 0;
+ /* MMCONFIG disabled */
if ((pci_probe & PCI_PROBE_MMCONF) == 0)
return;
+
+ /* MMCONFIG already enabled */
+ if (!(pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF))
+ return;
- if (type == 1 && pci_mmcfg_check_hostbridge())
+ if ((pci_probe & PCI_PROBE_CONF1) && pci_mmcfg_check_hostbridge())
known_bridge = 1;
-
- if (!known_bridge) {
+ else
acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
- pci_mmcfg_reject_broken(type);
- }
+
+ pci_mmcfg_reject_broken();
if ((pci_mmcfg_config_num == 0) ||
(pci_mmcfg_config == NULL) ||
@@ -255,7 +367,7 @@ void __init pci_mmcfg_init(int type)
return;
if (pci_mmcfg_arch_init()) {
- if (type == 1)
+ if (pci_probe & PCI_PROBE_CONF1)
unreachable_devices();
if (known_bridge)
pci_mmcfg_insert_resources();
diff -up linux-2.6.21.1/arch/i386/pci/pci.h linux-2.6.21.1edit/arch/i386/pci/pci.h
--- linux-2.6.21.1/arch/i386/pci/pci.h 2007-04-27 15:49:26.000000000 -0600
+++ linux-2.6.21.1edit/arch/i386/pci/pci.h 2007-04-29 18:33:21.000000000 -0600
@@ -91,7 +91,8 @@ extern int pci_conf1_read(unsigned int s
extern int pci_direct_probe(void);
extern void pci_direct_init(int type);
extern void pci_pcbios_init(void);
-extern void pci_mmcfg_init(int type);
+extern void pci_mmcfg_early_init(int type);
+extern void pci_mmcfg_late_init(void);
extern void pcibios_sort(void);
/* pci-mmconfig.c */
diff -ruw linux-2.6.21.1/drivers/acpi/bus.c linux-2.6.21.1edit/drivers/acpi/bus.c
--- linux-2.6.21.1/drivers/acpi/bus.c 2007-04-27 15:49:26.000000000 -0600
+++ linux-2.6.21.1edit/drivers/acpi/bus.c 2007-04-29 19:22:07.000000000 -0600
@@ -42,6 +42,7 @@
ACPI_MODULE_NAME("bus");
#ifdef CONFIG_X86
extern void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger);
+extern void __init pci_mmcfg_late_init(void);
#endif
struct acpi_device *acpi_root;
@@ -753,6 +754,9 @@
result = acpi_bus_init();
if (!result) {
+#ifdef CONFIG_X86
+ pci_mmcfg_late_init();
+#endif
#ifdef CONFIG_PM_LEGACY
if (!PM_IS_ACTIVE())
pm_active = 1;
^ permalink raw reply [flat|nested] 61+ messages in thread* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-04-30 2:14 [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources Robert Hancock @ 2007-04-30 2:59 ` Randy Dunlap 2007-04-30 22:59 ` Olivier Galibert 2007-05-24 0:02 ` Jesse Barnes 2 siblings, 0 replies; 61+ messages in thread From: Randy Dunlap @ 2007-04-30 2:59 UTC (permalink / raw) To: Robert Hancock Cc: linux-kernel, Jesse Barnes, Andi Kleen, Chuck Ebbert, Len Brown On Sun, 29 Apr 2007 20:14:37 -0600 Robert Hancock wrote: > diff -up linux-2.6.21.1/arch/i386/pci/mmconfig-shared.c linux-2.6.21.1edit/arch/i386/pci/mmconfig-shared.c > --- linux-2.6.21.1/arch/i386/pci/mmconfig-shared.c 2007-04-27 15:49:26.000000000 -0600 > +++ linux-2.6.21.1edit/arch/i386/pci/mmconfig-shared.c 2007-04-29 19:47:57.000000000 -0600 > @@ -191,9 +191,77 @@ static void __init pci_mmcfg_insert_reso > } > } > > + > +static int __init is_acpi_reserved(unsigned long start, unsigned long end) > +{ > + struct resource mcfg_res; > + > + mcfg_res.start = start; > + mcfg_res.end = end; > + mcfg_res.flags = 0; > + > + acpi_get_devices("PNP0C01", find_mboard_resource, &mcfg_res, NULL); > + > + if( !mcfg_res.flags ) if (!mcfg_res.flags) > + acpi_get_devices("PNP0C02", find_mboard_resource, &mcfg_res, NULL); > + > + return mcfg_res.flags; > +} > + > +static void __init pci_mmcfg_reject_broken(void) > { > typeof(pci_mmcfg_config[0]) *cfg; > + int i; > > if ((pci_mmcfg_config_num == 0) || > (pci_mmcfg_config == NULL) || > @@ -213,18 +281,36 @@ static void __init pci_mmcfg_reject_brok > "Rejected as broken MCFG.\n"); > goto reject; > } > - > - /* > - * Only do this check when type 1 works. If it doesn't work > - * assume we run on a Mac and always use MCFG > - */ > - if (type == 1 && !e820_all_mapped(cfg->address, > - cfg->address + MMCONFIG_APER_MIN, > - E820_RESERVED)) { > - printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" > - " E820-reserved\n", cfg->address); > - goto reject; > + > + for(i=0; i < pci_mmcfg_config_num; i++) { for (i = 0; i < pci_mmcfg_config_num; i++) { > + u32 size = (cfg->end_bus_number + 1) << 20; > + cfg = &pci_mmcfg_config[i]; > + printk(KERN_NOTICE "PCI: MCFG configuration %d: base %p segment %hu buses %u - %u\n", line too long. > + i, (void*)cfg->address, cfg->pci_segment, > + (unsigned int)cfg->start_bus_number, > + (unsigned int)cfg->end_bus_number); > + if(is_acpi_reserved(cfg->address, if ( > + cfg->address + size - 1)) > + printk(KERN_NOTICE "PCI: MCFG area at %Lx reserved " > + "in ACPI motherboard resources\n", > + cfg->address); > + else { > + printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not " > + "reserved in ACPI motherboard resources\n", > + cfg->address); > + /* Don't try to do this check unless configuration type 1 is > + available. */ > + if((pci_probe & PCI_PROBE_CONF1) && if (( and that line ends with a space :( > + e820_all_mapped(cfg->address, > + cfg->address + size - 1, > + E820_RESERVED)) > + printk(KERN_NOTICE "PCI: MCFG area at %Lx reserved in E820\n", > + cfg->address); > + else > + goto reject; > + } > } > + > return; > > reject: > @@ -234,20 +320,46 @@ reject: > pci_mmcfg_config_num = 0; > } > > -void __init pci_mmcfg_init(int type) > +void __init pci_mmcfg_early_init(int type) > +{ > + if ((pci_probe & PCI_PROBE_MMCONF) == 0) > + return; > + > + /* If type 1 access is available, no need to enable MMCONFIG yet, we can > + defer until later when the ACPI interpreter is available to better > + validate things. */ > + if( type == 1 ) if (type == 1) > + return; > + > + acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); > + > + if ((pci_mmcfg_config_num == 0) || > + (pci_mmcfg_config == NULL) || > + (pci_mmcfg_config[0].address == 0)) > + return; > + > + if (pci_mmcfg_arch_init()) > + pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; > +} > diff -ruw linux-2.6.21.1/drivers/acpi/bus.c linux-2.6.21.1edit/drivers/acpi/bus.c > --- linux-2.6.21.1/drivers/acpi/bus.c 2007-04-27 15:49:26.000000000 -0600 > +++ linux-2.6.21.1edit/drivers/acpi/bus.c 2007-04-29 19:22:07.000000000 -0600 > @@ -42,6 +42,7 @@ > ACPI_MODULE_NAME("bus"); > #ifdef CONFIG_X86 > extern void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger); > +extern void __init pci_mmcfg_late_init(void); externs should go in header files. (not that you started this here) > #endif > > struct acpi_device *acpi_root; > @@ -753,6 +754,9 @@ > result = acpi_bus_init(); > > if (!result) { > +#ifdef CONFIG_X86 > + pci_mmcfg_late_init(); > +#endif > #ifdef CONFIG_PM_LEGACY > if (!PM_IS_ACTIVE()) > pm_active = 1; > - --- ~Randy *** Remember to use Documentation/SubmitChecklist when testing your code *** ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-04-30 2:14 [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources Robert Hancock 2007-04-30 2:59 ` Randy Dunlap @ 2007-04-30 22:59 ` Olivier Galibert 2007-04-30 23:26 ` Robert Hancock ` (2 more replies) 2007-05-24 0:02 ` Jesse Barnes 2 siblings, 3 replies; 61+ messages in thread From: Olivier Galibert @ 2007-04-30 22:59 UTC (permalink / raw) To: Robert Hancock Cc: linux-kernel, Jesse Barnes, Andi Kleen, Chuck Ebbert, Len Brown On Sun, Apr 29, 2007 at 08:14:37PM -0600, Robert Hancock wrote: > -Validate that the area is reserved even if we read it from the > chipset directly and not from the MCFG table. This catches the case > where the BIOS didn't set the location properly in the chipset and > has mapped it over other things it shouldn't have. This might be > overly pessimistic - we might be able to instead verify that no > other reserved resources (like chipset registers) are inside this > memory range. I have a fundamental problem with that: you don't validate a higher reliability information against a lower one. The chipset registers are high reliability. Modulo unknown hardware erratas and bugs in the code (and accepting f0000000 is in practice a bug in the code, the docs are starting to catch up with it too), the chipset *will* decode mmconfig at the looked up address no matter what. On the other side, the ACPI data is bios generated, and that is well known to be horribly unreliable. Hell, if it was reliable we could just use the MFCG ACPI table without questions. So you can check the ACPI stuff for coherency (MFCG vs. the rest), you can validate the ACPI stuff against the results of the lookup if you want, but validating the lookup against ACPI is nonsensical. OG. ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-04-30 22:59 ` Olivier Galibert @ 2007-04-30 23:26 ` Robert Hancock 2007-05-01 16:48 ` Jesse Barnes 2007-05-02 2:41 ` Jesse Barnes 2 siblings, 0 replies; 61+ messages in thread From: Robert Hancock @ 2007-04-30 23:26 UTC (permalink / raw) To: Olivier Galibert, linux-kernel, Jesse Barnes, Andi Kleen, Chuck Ebbert, Len Brown Olivier Galibert wrote: > On Sun, Apr 29, 2007 at 08:14:37PM -0600, Robert Hancock wrote: >> -Validate that the area is reserved even if we read it from the >> chipset directly and not from the MCFG table. This catches the case >> where the BIOS didn't set the location properly in the chipset and >> has mapped it over other things it shouldn't have. This might be >> overly pessimistic - we might be able to instead verify that no >> other reserved resources (like chipset registers) are inside this >> memory range. > > I have a fundamental problem with that: you don't validate a higher > reliability information against a lower one. The chipset registers > are high reliability. Modulo unknown hardware erratas and bugs in the > code (and accepting f0000000 is in practice a bug in the code, the > docs are starting to catch up with it too), the chipset *will* decode > mmconfig at the looked up address no matter what. On the other side, > the ACPI data is bios generated, and that is well known to be horribly > unreliable. Hell, if it was reliable we could just use the MFCG ACPI > table without questions. > > So you can check the ACPI stuff for coherency (MFCG vs. the rest), you > can validate the ACPI stuff against the results of the lookup if you > want, but validating the lookup against ACPI is nonsensical. The problem is that in the event the MMCONFIG table is assigned to an address range that conflicts with other devices, there's no guarantee that MMCONFIG will have the higher decode priority. Apparently this is exactly the case on some boards, the MMCONFIG is mapped on top of chipset registers, and when we think we're accessing the MMCONFIG table we're really scrambling random chipset registers and hosing things. So we can't just blindly trust the values as being usable even when they come directly from the chipset. As I mentioned, though, really what we want to verify in this case is that no other reserved resources fall inside the range being decoded by the chipset. I may see if I can code something to do this. Essentially, though, the existing patch effectively does this, on the assumption that the board won't have conflicting reserved resources in the ACPI tables, which is probably a safe assumption as Windows would likely be terribly unhappy with that.. -- Robert Hancock Saskatoon, SK, Canada To email, remove "nospam" from hancockr@nospamshaw.ca Home Page: http://www.roberthancock.com/ ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-04-30 22:59 ` Olivier Galibert 2007-04-30 23:26 ` Robert Hancock @ 2007-05-01 16:48 ` Jesse Barnes 2007-05-02 2:41 ` Jesse Barnes 2 siblings, 0 replies; 61+ messages in thread From: Jesse Barnes @ 2007-05-01 16:48 UTC (permalink / raw) To: Olivier Galibert Cc: Robert Hancock, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Monday, April 30, 2007, Olivier Galibert wrote: > On Sun, Apr 29, 2007 at 08:14:37PM -0600, Robert Hancock wrote: > > -Validate that the area is reserved even if we read it from the > > chipset directly and not from the MCFG table. This catches the case > > where the BIOS didn't set the location properly in the chipset and > > has mapped it over other things it shouldn't have. This might be > > overly pessimistic - we might be able to instead verify that no > > other reserved resources (like chipset registers) are inside this > > memory range. > > I have a fundamental problem with that: you don't validate a higher > reliability information against a lower one. The chipset registers > are high reliability. Modulo unknown hardware erratas and bugs in the > code (and accepting f0000000 is in practice a bug in the code, the > docs are starting to catch up with it too), the chipset *will* decode > mmconfig at the looked up address no matter what. On the other side, > the ACPI data is bios generated, and that is well known to be horribly > unreliable. Hell, if it was reliable we could just use the MFCG ACPI > table without questions. We need to look at the register, but we may not want to use it if it looks too confused. If it doesn't agree with what we see in ACPI, we likely have a problem due to the issues Robert outlined in his other mail. Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-04-30 22:59 ` Olivier Galibert 2007-04-30 23:26 ` Robert Hancock 2007-05-01 16:48 ` Jesse Barnes @ 2007-05-02 2:41 ` Jesse Barnes 2007-05-02 2:56 ` Jesse Barnes 2 siblings, 1 reply; 61+ messages in thread From: Jesse Barnes @ 2007-05-02 2:41 UTC (permalink / raw) To: Olivier Galibert Cc: Robert Hancock, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Monday, April 30, 2007, Olivier Galibert wrote: > On Sun, Apr 29, 2007 at 08:14:37PM -0600, Robert Hancock wrote: > > -Validate that the area is reserved even if we read it from the > > chipset directly and not from the MCFG table. This catches the case > > where the BIOS didn't set the location properly in the chipset and > > has mapped it over other things it shouldn't have. This might be > > overly pessimistic - we might be able to instead verify that no > > other reserved resources (like chipset registers) are inside this > > memory range. > > I have a fundamental problem with that: you don't validate a higher > reliability information against a lower one. The chipset registers > are high reliability. Modulo unknown hardware erratas and bugs in the > code (and accepting f0000000 is in practice a bug in the code, the > docs are starting to catch up with it too), the chipset *will* decode > mmconfig at the looked up address no matter what. On the other side, > the ACPI data is bios generated, and that is well known to be horribly > unreliable. Hell, if it was reliable we could just use the MFCG ACPI > table without questions. Now that I've read his patch closely I think you're right. Robert, it looks like you'll trust acpi_table_parse if pci_mmcfg_check_hostbridge returns a failure. I think it should be treated with a higher priority. If pci_mmcfg_check_hostbridge returns a failure, there's no way MCFG space can work, so we should disable it unconditionally in that case (even if ACPI says "trust me, when have I ever lied to you?"). I'm testing it now on my 965... Thanks, Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-02 2:41 ` Jesse Barnes @ 2007-05-02 2:56 ` Jesse Barnes 2007-05-02 5:27 ` Jesse Barnes 0 siblings, 1 reply; 61+ messages in thread From: Jesse Barnes @ 2007-05-02 2:56 UTC (permalink / raw) To: Olivier Galibert Cc: Robert Hancock, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Tuesday, May 01, 2007, Jesse Barnes wrote: > On Monday, April 30, 2007, Olivier Galibert wrote: > > On Sun, Apr 29, 2007 at 08:14:37PM -0600, Robert Hancock wrote: > > > -Validate that the area is reserved even if we read it from the > > > chipset directly and not from the MCFG table. This catches the case > > > where the BIOS didn't set the location properly in the chipset and > > > has mapped it over other things it shouldn't have. This might be > > > overly pessimistic - we might be able to instead verify that no > > > other reserved resources (like chipset registers) are inside this > > > memory range. > > > > I have a fundamental problem with that: you don't validate a higher > > reliability information against a lower one. The chipset registers > > are high reliability. Modulo unknown hardware erratas and bugs in the > > code (and accepting f0000000 is in practice a bug in the code, the > > docs are starting to catch up with it too), the chipset *will* decode > > mmconfig at the looked up address no matter what. On the other side, > > the ACPI data is bios generated, and that is well known to be horribly > > unreliable. Hell, if it was reliable we could just use the MFCG ACPI > > table without questions. > > Now that I've read his patch closely I think you're right. > > Robert, it looks like you'll trust acpi_table_parse if > pci_mmcfg_check_hostbridge returns a failure. I think it should be > treated with a higher priority. If pci_mmcfg_check_hostbridge returns a > failure, there's no way MCFG space can work, so we should disable it > unconditionally in that case (even if ACPI says "trust me, when have I > ever lied to you?"). > > I'm testing it now on my 965... Bah... nevermind Robert, I see you're doing this already in pci_mmcfg_reject_broken. I'm about to reboot & test now. Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-02 2:56 ` Jesse Barnes @ 2007-05-02 5:27 ` Jesse Barnes 2007-05-02 14:34 ` Robert Hancock 0 siblings, 1 reply; 61+ messages in thread From: Jesse Barnes @ 2007-05-02 5:27 UTC (permalink / raw) To: Olivier Galibert Cc: Robert Hancock, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Tuesday, May 01, 2007, Jesse Barnes wrote: > > I'm testing it now on my 965... > > Bah... nevermind Robert, I see you're doing this already in > pci_mmcfg_reject_broken. I'm about to reboot & test now. Ok, I've tested a bit on my 965 (after re-adding my old patch to support it) and the new checks are more complete, but my BIOS still appears to be buggy. The extended config space (as defined by the register) is at 0xf0000000 (full value is 0xf0000003 indicating 128M enabled). The ACPI MCFG table has this space reserved according to Robert's new code, but the machine hangs due to the address space aliasing Olivier mentioned awhile back. I don't have a PCIe card to test with (or any devices that require extended config space that I know of) so I can't really tell if Windows supports PCIe on this platform, but if it does I don't see how it would w/o having a full bridge driver and sophisticated address space allocation builtin. I'm going to try updating my BIOS, but if that doesn't solve this problem, I'm not sure what we can do about it. Should pci_mmcfg_insert_resources check for conflicts? Should we just blacklist certain boards? I can try pinging our BIOS folks about this board to see what was intended, but I'm sure this won't be the only board we have problems with, so we'll need to address it generically somehow. Thanks, Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-02 5:27 ` Jesse Barnes @ 2007-05-02 14:34 ` Robert Hancock 2007-05-02 17:57 ` Jesse Barnes 0 siblings, 1 reply; 61+ messages in thread From: Robert Hancock @ 2007-05-02 14:34 UTC (permalink / raw) To: Jesse Barnes Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown Jesse Barnes wrote: > On Tuesday, May 01, 2007, Jesse Barnes wrote: >>> I'm testing it now on my 965... >> Bah... nevermind Robert, I see you're doing this already in >> pci_mmcfg_reject_broken. I'm about to reboot & test now. > > Ok, I've tested a bit on my 965 (after re-adding my old patch to support > it) and the new checks are more complete, but my BIOS still appears to be > buggy. > > The extended config space (as defined by the register) is at 0xf0000000 > (full value is 0xf0000003 indicating 128M enabled). The ACPI MCFG table > has this space reserved according to Robert's new code, but the machine > hangs due to the address space aliasing Olivier mentioned awhile back. I > don't have a PCIe card to test with (or any devices that require extended > config space that I know of) so I can't really tell if Windows supports > PCIe on this platform, but if it does I don't see how it would w/o having > a full bridge driver and sophisticated address space allocation builtin. Windows XP doesn't use MMCONFIG or any extended configuration space. I believe Vista is supposed to, though. Not sure how they are handling this issue. > > I'm going to try updating my BIOS, but if that doesn't solve this problem, > I'm not sure what we can do about it. Should pci_mmcfg_insert_resources > check for conflicts? Should we just blacklist certain boards? I can try > pinging our BIOS folks about this board to see what was intended, but I'm > sure this won't be the only board we have problems with, so we'll need to > address it generically somehow. Can you post what your board has for PNPACPI reserved resources (I believe they're in /sys/devices/pnp0/*/resources IIRC, don't have a Linux box handy right now). Full dmesg would also be useful, I think it dumps out those reservations at boot nowadays.. -- Robert Hancock Saskatoon, SK, Canada To email, remove "nospam" from hancockr@nospamshaw.ca Home Page: http://www.roberthancock.com/ ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-02 14:34 ` Robert Hancock @ 2007-05-02 17:57 ` Jesse Barnes 2007-05-02 23:45 ` Robert Hancock 0 siblings, 1 reply; 61+ messages in thread From: Jesse Barnes @ 2007-05-02 17:57 UTC (permalink / raw) To: Robert Hancock Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown [-- Attachment #1: Type: text/plain, Size: 1675 bytes --] On Wednesday, May 2, 2007 7:34 am Robert Hancock wrote: > Jesse Barnes wrote: > > On Tuesday, May 01, 2007, Jesse Barnes wrote: > >>> I'm testing it now on my 965... > >> > >> Bah... nevermind Robert, I see you're doing this already in > >> pci_mmcfg_reject_broken. I'm about to reboot & test now. > > > > Ok, I've tested a bit on my 965 (after re-adding my old patch to > > support it) and the new checks are more complete, but my BIOS still > > appears to be buggy. > > > > The extended config space (as defined by the register) is at > > 0xf0000000 (full value is 0xf0000003 indicating 128M enabled). The > > ACPI MCFG table has this space reserved according to Robert's new > > code, but the machine hangs due to the address space aliasing > > Olivier mentioned awhile back. I don't have a PCIe card to test > > with (or any devices that require extended config space that I know > > of) so I can't really tell if Windows supports PCIe on this > > platform, but if it does I don't see how it would w/o having a full > > bridge driver and sophisticated address space allocation builtin. > > Windows XP doesn't use MMCONFIG or any extended configuration space. > I believe Vista is supposed to, though. Not sure how they are > handling this issue. Oh right... Vista will be the first to fully support PCIe & mcfg... > Can you post what your board has for PNPACPI reserved resources (I > believe they're in /sys/devices/pnp0/*/resources IIRC, don't have a > Linux box handy right now). Full dmesg would also be useful, I think > it dumps out those reservations at boot nowadays.. BIOS update didn't help. Here's the boot log and a dump of the pnp0 resources. Jesse [-- Attachment #2: pnp.out --] [-- Type: text/plain, Size: 691 bytes --] state = active io 0xcf8-0xcff state = active mem 0xf0000000-0xf7ffffff mem 0xfed13000-0xfed13fff mem 0xfed14000-0xfed17fff mem 0xfed18000-0xfed18fff state = active io 0x0-0xf io 0x81-0x83 io 0x87-0x87 io 0x89-0x8b io 0x8f-0x8f io 0xc0-0xdf dma 4 state = active io 0x70-0x71 io 0x74-0x77 irq 8 state = active io 0xf0-0xf0 irq 13 state = active io 0x61-0x61 state = active io 0x500-0x53f io 0x400-0x47f io 0x92-0x92 io 0x680-0x6ff io 0x10-0x1f io 0x72-0x73 io 0x80-0x80 io 0x84-0x86 state = active io 0x60-0x60 io 0x64-0x64 state = active io 0x378-0x37f io 0x778-0x77f irq 7 state = active io 0x3f8-0x3ff irq 4 state = active mem 0xfec00000-0xfec000ff state = active mem 0xfed00000-0xfed03fff [-- Attachment #3: boot.out --] [-- Type: text/plain, Size: 25550 bytes --] Linux version 2.6.21-1.3116.fc7 (brewbuilder@hs20-bc2-3.build.redhat.com) (gcc version 4.1.2 20070424 (Red Hat 4.1.2-11)) #1 SMP Thu Apr 26 10:17:55 EDT 2007 Command line: ro root=LABEL=/ rhgb quiet selinux=0 BIOS-provided physical RAM map: BIOS-e820: 0000000000000000 - 000000000008f000 (usable) BIOS-e820: 000000000008f000 - 00000000000a0000 (reserved) BIOS-e820: 00000000000e0000 - 0000000000100000 (reserved) BIOS-e820: 0000000000100000 - 00000000bd643000 (usable) BIOS-e820: 00000000bd643000 - 00000000bd6c6000 (ACPI NVS) BIOS-e820: 00000000bd6c6000 - 00000000be5fb000 (usable) BIOS-e820: 00000000be5fb000 - 00000000be608000 (reserved) BIOS-e820: 00000000be608000 - 00000000be6a5000 (usable) BIOS-e820: 00000000be6a5000 - 00000000be6aa000 (ACPI data) BIOS-e820: 00000000be6aa000 - 00000000be6ab000 (usable) BIOS-e820: 00000000be6ab000 - 00000000be6f2000 (ACPI NVS) BIOS-e820: 00000000be6f2000 - 00000000be6ff000 (ACPI data) BIOS-e820: 00000000be6ff000 - 00000000be700000 (usable) BIOS-e820: 00000000be700000 - 00000000bf000000 (reserved) BIOS-e820: 00000000fff00000 - 0000000100000000 (reserved) Entering add_active_range(0, 0, 143) 0 entries of 3200 used Entering add_active_range(0, 256, 775747) 1 entries of 3200 used Entering add_active_range(0, 775878, 779771) 2 entries of 3200 used Entering add_active_range(0, 779784, 779941) 3 entries of 3200 used Entering add_active_range(0, 779946, 779947) 4 entries of 3200 used Entering add_active_range(0, 780031, 780032) 5 entries of 3200 used end_pfn_map = 1048576 DMI 2.4 present. ACPI: RSDP 000FE020, 0014 (r0 INTEL ) ACPI: RSDT BE6FD038, 0050 (r1 INTEL DG965OT 68C 1000013) ACPI: FACP BE6FC000, 00F4 (r2 INTEL DG965OT 68C MSFT 1000013) ACPI: DSDT BE6F7000, 40E9 (r1 INTEL DG965OT 68C MSFT 1000013) ACPI: FACS BE6AB000, 0040 ACPI: APIC BE6F6000, 0078 (r1 INTEL DG965OT 68C MSFT 1000013) ACPI: WDDT BE6F5000, 0040 (r1 INTEL DG965OT 68C MSFT 1000013) ACPI: MCFG BE6F4000, 003C (r1 INTEL DG965OT 68C MSFT 1000013) ACPI: ASF! BE6F3000, 00A6 (r32 INTEL DG965OT 68C MSFT 1000013) ACPI: HPET BE6F2000, 0038 (r1 INTEL DG965OT 68C MSFT 1000013) ACPI: SSDT BE6A9000, 01BC (r1 INTEL CpuPm 68C MSFT 1000013) ACPI: SSDT BE6A8000, 0175 (r1 INTEL Cpu0Ist 68C MSFT 1000013) ACPI: SSDT BE6A7000, 0175 (r1 INTEL Cpu1Ist 68C MSFT 1000013) ACPI: SSDT BE6A6000, 0175 (r1 INTEL Cpu2Ist 68C MSFT 1000013) ACPI: SSDT BE6A5000, 0175 (r1 INTEL Cpu3Ist 68C MSFT 1000013) No NUMA configuration found Faking a node at 0000000000000000-00000000be700000 Entering add_active_range(0, 0, 143) 0 entries of 3200 used Entering add_active_range(0, 256, 775747) 1 entries of 3200 used Entering add_active_range(0, 775878, 779771) 2 entries of 3200 used Entering add_active_range(0, 779784, 779941) 3 entries of 3200 used Entering add_active_range(0, 779946, 779947) 4 entries of 3200 used Entering add_active_range(0, 780031, 780032) 5 entries of 3200 used Bootmem setup node 0 0000000000000000-00000000be700000 Zone PFN ranges: DMA 0 -> 4096 DMA32 4096 -> 1048576 Normal 1048576 -> 1048576 early_node_map[6] active PFN ranges 0: 0 -> 143 0: 256 -> 775747 0: 775878 -> 779771 0: 779784 -> 779941 0: 779946 -> 779947 0: 780031 -> 780032 On node 0 totalpages: 779686 DMA zone: 88 pages used for memmap DMA zone: 2367 pages reserved DMA zone: 1528 pages, LIFO batch:0 DMA32 zone: 16670 pages used for memmap DMA32 zone: 759033 pages, LIFO batch:31 Normal zone: 0 pages used for memmap ACPI: PM-Timer IO Port: 0x408 ACPI: Local APIC address 0xfee00000 ACPI: LAPIC (acpi_id[0x01] lapic_id[0x00] enabled) Processor #0 (Bootup-CPU) ACPI: LAPIC (acpi_id[0x02] lapic_id[0x01] enabled) Processor #1 ACPI: LAPIC (acpi_id[0x03] lapic_id[0x82] disabled) ACPI: LAPIC (acpi_id[0x04] lapic_id[0x83] disabled) ACPI: LAPIC_NMI (acpi_id[0x01] dfl dfl lint[0x1]) ACPI: LAPIC_NMI (acpi_id[0x02] dfl dfl lint[0x1]) ACPI: IOAPIC (id[0x02] address[0xfec00000] gsi_base[0]) IOAPIC[0]: apic_id 2, address 0xfec00000, GSI 0-23 ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level) ACPI: IRQ0 used by override. ACPI: IRQ2 used by override. ACPI: IRQ9 used by override. Setting APIC routing to physical flat ACPI: HPET id: 0x8086a201 base: 0xfed00000 Using ACPI (MADT) for SMP configuration information Nosave address range: 000000000008f000 - 00000000000a0000 Nosave address range: 00000000000a0000 - 00000000000e0000 Nosave address range: 00000000000e0000 - 0000000000100000 Nosave address range: 00000000bd643000 - 00000000bd6c6000 Nosave address range: 00000000be5fb000 - 00000000be608000 Nosave address range: 00000000be6a5000 - 00000000be6aa000 Nosave address range: 00000000be6ab000 - 00000000be6f2000 Nosave address range: 00000000be6f2000 - 00000000be6ff000 Allocating PCI resources starting at c0000000 (gap: bf000000:40f00000) SMP: Allowing 4 CPUs, 2 hotplug CPUs PERCPU: Allocating 44672 bytes of per cpu data Built 1 zonelists. Total pages: 760561 Kernel command line: ro root=LABEL=/ rhgb quiet selinux=0 Initializing CPU#0 PID hash table entries: 4096 (order: 12, 32768 bytes) time.c: Detected 2397.602 MHz processor. Console: colour VGA+ 80x25 Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar ... MAX_LOCKDEP_SUBCLASSES: 8 ... MAX_LOCK_DEPTH: 30 ... MAX_LOCKDEP_KEYS: 2048 ... CLASSHASH_SIZE: 1024 ... MAX_LOCKDEP_ENTRIES: 8192 ... MAX_LOCKDEP_CHAINS: 16384 ... CHAINHASH_SIZE: 8192 memory used by lock dependency info: 1648 kB per task-struct memory footprint: 1680 bytes Dentry cache hash table entries: 524288 (order: 10, 4194304 bytes) Inode-cache hash table entries: 262144 (order: 9, 2097152 bytes) Checking aperture... Calgary: detecting Calgary via BIOS EBDA area Calgary: Unable to locate Rio Grande table in EBDA - bailing! Memory: 3031908k/3120128k available (2465k kernel code, 86836k reserved, 1445k data, 332k init) Calibrating delay using timer specific routine.. 4798.40 BogoMIPS (lpj=2399204) Security Framework v1.0.0 initialized SELinux: Disabled at boot. Capability LSM initialized Mount-cache hash table entries: 256 CPU: L1 I cache: 32K, L1 D cache: 32K CPU: L2 cache: 4096K CPU 0/0 -> Node 0 using mwait in idle threads. CPU: Physical Processor ID: 0 CPU: Processor Core ID: 0 CPU0: Thermal monitoring enabled (TM2) lockdep: not fixing up alternatives. ACPI: Core revision 20070126 Using local APIC timer interrupts. result 16649994 Detected 16.649 MHz APIC timer. lockdep: not fixing up alternatives. Booting processor 1/2 APIC 0x1 Initializing CPU#1 Calibrating delay using timer specific routine.. 4795.18 BogoMIPS (lpj=2397591) CPU: L1 I cache: 32K, L1 D cache: 32K CPU: L2 cache: 4096K CPU 1/1 -> Node 0 CPU: Physical Processor ID: 0 CPU: Processor Core ID: 1 CPU1: Thermal monitoring enabled (TM2) Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz stepping 06 checking TSC synchronization [CPU#0 -> CPU#1]: passed. Brought up 2 CPUs sizeof(vma)=176 bytes sizeof(page)=88 bytes sizeof(inode)=1008 bytes sizeof(dentry)=248 bytes sizeof(ext3inode)=1376 bytes sizeof(buffer_head)=104 bytes sizeof(skbuff)=248 bytes sizeof(task_struct)=3776 bytes migration_cost=15 NET: Registered protocol family 16 ACPI: bus type pci registered PCI: Using configuration type 1 ACPI: Interpreter enabled ACPI: (supports S0 S3 S4 S5) ACPI: Using IOAPIC for interrupt routing ACPI: PCI Root Bridge [PCI0] (0000:00) PCI: Probing PCI hardware (bus 00) Boot video device is 0000:00:02.0 PCI quirk: region 0400-047f claimed by ICH6 ACPI/GPIO/TCO PCI quirk: region 0500-053f claimed by ICH6 GPIO PCI: Transparent bridge - 0000:00:1e.0 ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.P32_._PRT] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.PEX0._PRT] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.PEX1._PRT] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.PEX2._PRT] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.PEX3._PRT] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.PEX4._PRT] ACPI: PCI Interrupt Link [LNKA] (IRQs 3 4 5 7 9 10 *11 12) ACPI: PCI Interrupt Link [LNKB] (IRQs 3 4 5 7 *9 10 11 12) ACPI: PCI Interrupt Link [LNKC] (IRQs 3 4 5 7 9 10 *11 12) ACPI: PCI Interrupt Link [LNKD] (IRQs 3 4 5 7 9 10 *11 12) ACPI: PCI Interrupt Link [LNKE] (IRQs 3 4 5 7 *9 10 11 12) ACPI: PCI Interrupt Link [LNKF] (IRQs 3 4 5 7 9 *10 11 12) ACPI: PCI Interrupt Link [LNKG] (IRQs 3 4 5 7 9 10 11 12) *0, disabled. ACPI: PCI Interrupt Link [LNKH] (IRQs 3 4 5 7 9 *10 11 12) Linux Plug and Play Support v0.97 (c) Adam Belay pnp: PnP ACPI init pnp: PnP ACPI: found 12 devices usbcore: registered new interface driver usbfs usbcore: registered new interface driver hub usbcore: registered new device driver usb PCI: Using ACPI for IRQ routing PCI: If a device doesn't work, try "pci=routeirq". If it helps, post a report NetLabel: Initializing NetLabel: domain hash size = 128 NetLabel: protocols = UNLABELED CIPSOv4 NetLabel: unlabeled traffic allowed by default PCI-GART: No AMD northbridge found. hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0 hpet0: 3 64-bit timers, 14318180 Hz pnp: 00:01: iomem range 0xf0000000-0xf7ffffff has been reserved pnp: 00:01: iomem range 0xfed13000-0xfed13fff has been reserved pnp: 00:01: iomem range 0xfed14000-0xfed17fff has been reserved pnp: 00:01: iomem range 0xfed18000-0xfed18fff has been reserved pnp: 00:06: ioport range 0x500-0x53f has been reserved pnp: 00:06: ioport range 0x400-0x47f has been reserved pnp: 00:06: ioport range 0x680-0x6ff has been reserved Time: tsc clocksource has been installed. PCI: Ignore bogus resource 6 [0:0] of 0000:00:02.0 PCI: Bridge: 0000:00:1c.0 IO window: disabled. MEM window: d0500000-d05fffff PREFETCH window: disabled. PCI: Bridge: 0000:00:1c.1 IO window: 2000-2fff MEM window: d0100000-d01fffff PREFETCH window: disabled. PCI: Bridge: 0000:00:1c.2 IO window: disabled. MEM window: d0600000-d06fffff PREFETCH window: disabled. PCI: Bridge: 0000:00:1c.3 IO window: disabled. MEM window: d0700000-d07fffff PREFETCH window: disabled. PCI: Bridge: 0000:00:1c.4 IO window: disabled. MEM window: d0800000-d08fffff PREFETCH window: disabled. PCI: Bridge: 0000:00:1e.0 IO window: 1000-1fff MEM window: d0000000-d00fffff PREFETCH window: disabled. ACPI: PCI Interrupt 0000:00:1c.0[A] -> GSI 17 (level, low) -> IRQ 17 PCI: Setting latency timer of device 0000:00:1c.0 to 64 ACPI: PCI Interrupt 0000:00:1c.1[B] -> GSI 16 (level, low) -> IRQ 16 PCI: Setting latency timer of device 0000:00:1c.1 to 64 ACPI: PCI Interrupt 0000:00:1c.2[C] -> GSI 18 (level, low) -> IRQ 18 PCI: Setting latency timer of device 0000:00:1c.2 to 64 ACPI: PCI Interrupt 0000:00:1c.3[D] -> GSI 19 (level, low) -> IRQ 19 PCI: Setting latency timer of device 0000:00:1c.3 to 64 ACPI: PCI Interrupt 0000:00:1c.4[A] -> GSI 17 (level, low) -> IRQ 17 PCI: Setting latency timer of device 0000:00:1c.4 to 64 PCI: Setting latency timer of device 0000:00:1e.0 to 64 NET: Registered protocol family 2 IP route cache hash table entries: 131072 (order: 8, 1048576 bytes) TCP established hash table entries: 65536 (order: 10, 4194304 bytes) TCP bind hash table entries: 65536 (order: 9, 3670016 bytes) TCP: Hash tables configured (established 65536 bind 65536) TCP reno registered checking if image is initramfs... it is Freeing initrd memory: 3078k freed audit: initializing netlink socket (disabled) audit(1178092655.600:1): initialized Total HugeTLB memory allocated, 0 VFS: Disk quotas dquot_6.5.1 Dquot-cache hash table entries: 512 (order 0, 4096 bytes) ksign: Installing public key data Loading keyring - Added public key A777C0AD109752E5 - User ID: Red Hat, Inc. (Kernel Module GPG key) io scheduler noop registered io scheduler anticipatory registered io scheduler deadline registered io scheduler cfq registered (default) PCI: Setting latency timer of device 0000:00:1c.0 to 64 assign_interrupt_mode Found MSI capability Allocate Port Service[0000:00:1c.0:pcie00] Allocate Port Service[0000:00:1c.0:pcie02] PCI: Setting latency timer of device 0000:00:1c.1 to 64 assign_interrupt_mode Found MSI capability Allocate Port Service[0000:00:1c.1:pcie00] Allocate Port Service[0000:00:1c.1:pcie02] PCI: Setting latency timer of device 0000:00:1c.2 to 64 assign_interrupt_mode Found MSI capability Allocate Port Service[0000:00:1c.2:pcie00] Allocate Port Service[0000:00:1c.2:pcie02] PCI: Setting latency timer of device 0000:00:1c.3 to 64 assign_interrupt_mode Found MSI capability Allocate Port Service[0000:00:1c.3:pcie00] Allocate Port Service[0000:00:1c.3:pcie02] PCI: Setting latency timer of device 0000:00:1c.4 to 64 assign_interrupt_mode Found MSI capability Allocate Port Service[0000:00:1c.4:pcie00] Allocate Port Service[0000:00:1c.4:pcie02] pci_hotplug: PCI Hot Plug PCI Core version: 0.5 ACPI: Processor [CPU0] (supports 8 throttling states) ACPI: Processor [CPU1] (supports 8 throttling states) ACPI Exception (processor_core-0783): AE_NOT_FOUND, Processor Device is not present [20070126] ACPI Exception (processor_core-0783): AE_NOT_FOUND, Processor Device is not present [20070126] Real Time Clock Driver v1.12ac hpet_resources: 0xfed00000 is busy Non-volatile memory driver v1.2 Linux agpgart interface v0.102 (c) Dave Jones agpgart: Detected an Intel 965G Chipset. agpgart: Detected 7676K stolen memory. agpgart: AGP aperture is 256M @ 0xc0000000 Serial: 8250/16550 driver $Revision: 1.90 $ 4 ports, IRQ sharing enabled serial8250: ttyS0 at I/O 0x3f8 (irq = 4) is a 16550A 00:09: ttyS0 at I/O 0x3f8 (irq = 4) is a 16550A RAMDISK driver initialized: 16 RAM disks of 16384K size 4096 blocksize input: Macintosh mouse button emulation as /class/input/input0 usbcore: registered new interface driver libusual usbcore: registered new interface driver hiddev usbcore: registered new interface driver usbhid drivers/usb/input/hid-core.c: v2.6:USB HID core driver PNP: No PS/2 controller found. Probing ports directly. serio: i8042 KBD port at 0x60,0x64 irq 1 serio: i8042 AUX port at 0x60,0x64 irq 12 mice: PS/2 mouse device common for all mice TCP bic registered Initializing XFRM netlink socket NET: Registered protocol family 1 NET: Registered protocol family 17 Freeing unused kernel memory: 332k freed Write protecting the kernel read-only data: 1058k USB Universal Host Controller Interface driver v3.0 ACPI: PCI Interrupt 0000:00:1a.0[A] -> GSI 16 (level, low) -> IRQ 16 PCI: Setting latency timer of device 0000:00:1a.0 to 64 uhci_hcd 0000:00:1a.0: UHCI Host Controller uhci_hcd 0000:00:1a.0: new USB bus registered, assigned bus number 1 uhci_hcd 0000:00:1a.0: irq 16, io base 0x000030c0 usb usb1: configuration #1 chosen from 1 choice hub 1-0:1.0: USB hub found hub 1-0:1.0: 2 ports detected ACPI: PCI Interrupt 0000:00:1a.1[B] -> GSI 21 (level, low) -> IRQ 21 PCI: Setting latency timer of device 0000:00:1a.1 to 64 uhci_hcd 0000:00:1a.1: UHCI Host Controller uhci_hcd 0000:00:1a.1: new USB bus registered, assigned bus number 2 uhci_hcd 0000:00:1a.1: irq 21, io base 0x000030a0 usb usb2: configuration #1 chosen from 1 choice hub 2-0:1.0: USB hub found hub 2-0:1.0: 2 ports detected ACPI: PCI Interrupt 0000:00:1d.0[A] -> GSI 23 (level, low) -> IRQ 23 PCI: Setting latency timer of device 0000:00:1d.0 to 64 uhci_hcd 0000:00:1d.0: UHCI Host Controller uhci_hcd 0000:00:1d.0: new USB bus registered, assigned bus number 3 uhci_hcd 0000:00:1d.0: irq 23, io base 0x00003080 usb usb3: configuration #1 chosen from 1 choice hub 3-0:1.0: USB hub found hub 3-0:1.0: 2 ports detected ACPI: PCI Interrupt 0000:00:1d.1[B] -> GSI 19 (level, low) -> IRQ 19 PCI: Setting latency timer of device 0000:00:1d.1 to 64 uhci_hcd 0000:00:1d.1: UHCI Host Controller uhci_hcd 0000:00:1d.1: new USB bus registered, assigned bus number 4 uhci_hcd 0000:00:1d.1: irq 19, io base 0x00003060 usb usb4: configuration #1 chosen from 1 choice hub 4-0:1.0: USB hub found hub 4-0:1.0: 2 ports detected ACPI: PCI Interrupt 0000:00:1d.2[C] -> GSI 18 (level, low) -> IRQ 18 PCI: Setting latency timer of device 0000:00:1d.2 to 64 uhci_hcd 0000:00:1d.2: UHCI Host Controller uhci_hcd 0000:00:1d.2: new USB bus registered, assigned bus number 5 uhci_hcd 0000:00:1d.2: irq 18, io base 0x00003040 usb usb5: configuration #1 chosen from 1 choice hub 5-0:1.0: USB hub found hub 5-0:1.0: 2 ports detected usb 3-2: new full speed USB device using uhci_hcd and address 2 ohci_hcd: 2006 August 04 USB 1.1 'Open' Host Controller (OHCI) Driver ACPI: PCI Interrupt 0000:00:1a.7[C] -> GSI 18 (level, low) -> IRQ 18 PCI: Setting latency timer of device 0000:00:1a.7 to 64 ehci_hcd 0000:00:1a.7: EHCI Host Controller ehci_hcd 0000:00:1a.7: new USB bus registered, assigned bus number 6 ehci_hcd 0000:00:1a.7: debug port 1 PCI: cache line size of 32 is not supported by device 0000:00:1a.7 ehci_hcd 0000:00:1a.7: irq 18, io mem 0xd0421c00 ehci_hcd 0000:00:1a.7: USB 2.0 started, EHCI 1.00, driver 10 Dec 2004 usb usb6: configuration #1 chosen from 1 choice hub 6-0:1.0: USB hub found hub 6-0:1.0: 4 ports detected ACPI: PCI Interrupt 0000:00:1d.7[A] -> GSI 23 (level, low) -> IRQ 23 PCI: Setting latency timer of device 0000:00:1d.7 to 64 ehci_hcd 0000:00:1d.7: EHCI Host Controller ehci_hcd 0000:00:1d.7: new USB bus registered, assigned bus number 7 ehci_hcd 0000:00:1d.7: debug port 1 PCI: cache line size of 32 is not supported by device 0000:00:1d.7 ehci_hcd 0000:00:1d.7: irq 23, io mem 0xd0421800 ehci_hcd 0000:00:1d.7: USB 2.0 started, EHCI 1.00, driver 10 Dec 2004 usb usb7: configuration #1 chosen from 1 choice hub 7-0:1.0: USB hub found hub 7-0:1.0: 6 ports detected usb 3-2: device descriptor read/all, error -71 SCSI subsystem initialized libata version 2.20 loaded. ahci 0000:00:1f.2: version 2.1 ACPI: PCI Interrupt 0000:00:1f.2[A] -> GSI 19 (level, low) -> IRQ 19 usb 7-2: new high speed USB device using ehci_hcd and address 2 usb 7-2: configuration #1 chosen from 1 choice hub 7-2:1.0: USB hub found hub 7-2:1.0: 2 ports detected usb 7-2.1: new high speed USB device using ehci_hcd and address 3 usb 7-2.1: configuration #1 chosen from 1 choice hub 7-2.1:1.0: USB hub found hub 7-2.1:1.0: 4 ports detected usb 7-2.2: new low speed USB device using ehci_hcd and address 4 PCI: Setting latency timer of device 0000:00:1f.2 to 64 ahci 0000:00:1f.2: AHCI 0001.0100 32 slots 6 ports 3 Gbps 0x3f impl SATA mode ahci 0000:00:1f.2: flags: 64bit ncq led clo pio slum part ata1: SATA max UDMA/133 cmd 0xffffc2000002c100 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 19 ata2: SATA max UDMA/133 cmd 0xffffc2000002c180 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 19 ata3: SATA max UDMA/133 cmd 0xffffc2000002c200 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 19 ata4: SATA max UDMA/133 cmd 0xffffc2000002c280 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 19 ata5: SATA max UDMA/133 cmd 0xffffc2000002c300 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 19 ata6: SATA max UDMA/133 cmd 0xffffc2000002c380 ctl 0x0000000000000000 bmdma 0x0000000000000000 irq 19 scsi0 : ahci usb 7-2.2: configuration #1 chosen from 1 choice input: THRUSTMASTER FireStorm Dual Power as /class/input/input1 input: USB HID v1.10 Gamepad [THRUSTMASTER FireStorm Dual Power] on usb-0000:00:1d.7-2.2 usb 7-2.1.1: new high speed USB device using ehci_hcd and address 5 usb 7-2.1.1: configuration #1 chosen from 1 choice libusual: modprobe for usb-storage succeeded, but module is not present ata1: SATA link up 3.0 Gbps (SStatus 123 SControl 300) ata1.00: ATA-7: SAMSUNG HD160JJ, WU100-33, max UDMA/100 ata1.00: 312581808 sectors, multi 0: LBA48 ata1.00: configured for UDMA/100 scsi1 : ahci usb 7-2.1.2: new low speed USB device using ehci_hcd and address 6 usb 7-2.1.2: configuration #1 chosen from 1 choice input: Logitech USB Receiver as /class/input/input2 input: USB HID v1.10 Keyboard [Logitech USB Receiver] on usb-0000:00:1d.7-2.1.2 input: Logitech USB Receiver as /class/input/input3 input: USB HID v1.10 Mouse [Logitech USB Receiver] on usb-0000:00:1d.7-2.1.2 ata2: SATA link up 3.0 Gbps (SStatus 123 SControl 300) ata2.00: ATA-7: SAMSUNG HD160JJ, WU100-33, max UDMA/100 ata2.00: 312581808 sectors, multi 0: LBA48 ata2.00: configured for UDMA/100 scsi2 : ahci ata3: SATA link down (SStatus 0 SControl 300) scsi3 : ahci ata4: SATA link down (SStatus 0 SControl 300) scsi4 : ahci ata5: SATA link down (SStatus 0 SControl 300) scsi5 : ahci ata6: SATA link down (SStatus 0 SControl 300) scsi 0:0:0:0: Direct-Access ATA SAMSUNG HD160JJ WU10 PQ: 0 ANSI: 5 SCSI device sda: 312581808 512-byte hdwr sectors (160042 MB) sda: Write Protect is off sda: Mode Sense: 00 3a 00 00 SCSI device sda: write cache: enabled, read cache: enabled, doesn't support DPO or FUA SCSI device sda: 312581808 512-byte hdwr sectors (160042 MB) sda: Write Protect is off sda: Mode Sense: 00 3a 00 00 SCSI device sda: write cache: enabled, read cache: enabled, doesn't support DPO or FUA sda: sda1 sd 0:0:0:0: Attached scsi disk sda scsi 1:0:0:0: Direct-Access ATA SAMSUNG HD160JJ WU10 PQ: 0 ANSI: 5 SCSI device sdb: 312581808 512-byte hdwr sectors (160042 MB) sdb: Write Protect is off sdb: Mode Sense: 00 3a 00 00 SCSI device sdb: write cache: enabled, read cache: enabled, doesn't support DPO or FUA SCSI device sdb: 312581808 512-byte hdwr sectors (160042 MB) sdb: Write Protect is off sdb: Mode Sense: 00 3a 00 00 SCSI device sdb: write cache: enabled, read cache: enabled, doesn't support DPO or FUA sdb: sdb1 sdb2 sdb3 sd 1:0:0:0: Attached scsi disk sdb Initializing USB Mass Storage driver... scsi6 : SCSI emulation for USB Mass Storage devices usb-storage: device found at 5 usbcore: registered new interface driver usb-storage USB Mass Storage support registered. usb-storage: waiting for device to settle before scanning usb-storage: device scan complete scsi 6:0:0:0: Direct-Access Generic Flash HS-CF 4.44 PQ: 0 ANSI: 0 scsi 6:0:0:1: Direct-Access Generic Flash HS-COMBO 4.44 PQ: 0 ANSI: 0 sd 6:0:0:0: Attached scsi removable disk sdc sd 6:0:0:1: Attached scsi removable disk sdd kjournald starting. Commit interval 5 seconds EXT3-fs: mounted filesystem with ordered data mode. sd 0:0:0:0: Attached scsi generic sg0 type 0 sd 1:0:0:0: Attached scsi generic sg1 type 0 sd 6:0:0:0: Attached scsi generic sg2 type 0 sd 6:0:0:1: Attached scsi generic sg3 type 0 iTCO_vendor_support: vendor-support=0 input: PC Speaker as /class/input/input4 iTCO_wdt: Intel TCO WatchDog Timer Driver v1.01 (21-Jan-2007) iTCO_wdt: Found a ICH8DH TCO device (Version=2, TCOBASE=0x0460) iTCO_wdt: initialized. heartbeat=30 sec (nowayout=0) shpchp: Standard Hot Plug PCI Controller Driver version: 0.4 Intel(R) PRO/1000 Network Driver - version 7.3.20-k2-NAPI Copyright (c) 1999-2006 Intel Corporation. ACPI: PCI Interrupt 0000:00:19.0[A] -> GSI 20 (level, low) -> IRQ 20 PCI: Setting latency timer of device 0000:00:19.0 to 64 e1000: 0000:00:19.0: e1000_probe: (PCI Express:2.5Gb/s:Width x1) 00:16:76:c9:22:af parport: PnPBIOS parport detected. parport0: PC-style at 0x378 (0x778), irq 7 [PCSPP,TRISTATE] e1000: eth0: e1000_probe: Intel(R) PRO/1000 Network Connection ACPI: PCI Interrupt 0000:00:1f.3[B] -> GSI 21 (level, low) -> IRQ 21 ACPI: PCI Interrupt 0000:06:03.0[A] -> GSI 19 (level, low) -> IRQ 19 fw_ohci: Added fw-ohci device 0000:06:03.0, OHCI version 1.10 rtc_cmos 00:03: rtc core: registered rtc_cmos as rtc0 rtc_cmos: probe of 00:03 failed with error -16 gameport: EMU10K1 is pci0000:06:00.1/gameport0, io 0x1020, speed 1036kHz ACPI: PCI Interrupt 0000:02:00.0[A] -> GSI 17 (level, low) -> IRQ 17 PCI: Setting latency timer of device 0000:02:00.0 to 64 ata7: PATA max UDMA/100 cmd 0x0000000000012018 ctl 0x0000000000012026 bmdma 0x0000000000012000 irq 17 scsi7 : pata_marvell BAR5:00:00 01:7F 02:22 03:CA 04:00 05:00 06:00 07:00 08:00 09:00 0A:00 0B:00 0C:01 0D:00 0E:00 0F:00 ata7.00: ATAPI, max UDMA/33 fw_core: created new fw device fw0 (0 config rom retries) ata7.00: configured for UDMA/33 scsi 7:0:0:0: CD-ROM _NEC DVD_RW ND-3550A 1.05 PQ: 0 ANSI: 5 scsi 7:0:0:0: Attached scsi generic sg4 type 5 ACPI: PCI Interrupt 0000:06:00.0[A] -> GSI 21 (level, low) -> IRQ 21 sr0: scsi3-mmc drive: 48x/48x writer cd/rw xa/form2 cdda tray Uniform CD-ROM driver Revision: 3.20 sr 7:0:0:0: Attached scsi CD-ROM sr0 floppy0: no floppy controllers found lp0: using parport0 (interrupt-driven). lp0: console ready NET: Registered protocol family 10 lo: Disabled Privacy Extensions Mobile IPv6 ACPI: PCI Interrupt 0000:00:02.0[A] -> GSI 16 (level, low) -> IRQ 16 No dock devices found. input: Power Button (FF) as /class/input/input5 ACPI: Power Button (FF) [PWRF] input: Sleep Button (CM) as /class/input/input6 ACPI: Sleep Button (CM) [SLPB] ibm_acpi: ec object not found device-mapper: ioctl: 4.11.0-ioctl (2006-10-12) initialised: dm-devel@redhat.com EXT3 FS on sdb1, internal journal kjournald starting. Commit interval 5 seconds EXT3 FS on sdb3, internal journal EXT3-fs: mounted filesystem with ordered data mode. fuse init (API version 7.8) Adding 2096472k swap on /dev/sdb2. Priority:-1 extents:1 across:2096472k e1000: eth0: e1000_request_irq: Unable to allocate MSI interrupt Error: -22 e1000: eth0: e1000_watchdog: NIC Link is Up 100 Mbps Full Duplex, Flow Control: RX/TX e1000: eth0: e1000_watchdog: 10/100 speed: disabling TSO eth0: no IPv6 routers present [drm] Initialized drm 1.1.0 20060810 [drm] Initialized i915 1.6.0 20060119 on minor 0 ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-02 17:57 ` Jesse Barnes @ 2007-05-02 23:45 ` Robert Hancock 2007-05-02 23:54 ` Jesse Barnes 0 siblings, 1 reply; 61+ messages in thread From: Robert Hancock @ 2007-05-02 23:45 UTC (permalink / raw) To: Jesse Barnes Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown Jesse Barnes wrote: > On Wednesday, May 2, 2007 7:34 am Robert Hancock wrote: >> Jesse Barnes wrote: >>> On Tuesday, May 01, 2007, Jesse Barnes wrote: >>>>> I'm testing it now on my 965... >>>> Bah... nevermind Robert, I see you're doing this already in >>>> pci_mmcfg_reject_broken. I'm about to reboot & test now. >>> Ok, I've tested a bit on my 965 (after re-adding my old patch to >>> support it) and the new checks are more complete, but my BIOS still >>> appears to be buggy. >>> >>> The extended config space (as defined by the register) is at >>> 0xf0000000 (full value is 0xf0000003 indicating 128M enabled). The >>> ACPI MCFG table has this space reserved according to Robert's new >>> code, but the machine hangs due to the address space aliasing >>> Olivier mentioned awhile back. I don't have a PCIe card to test >>> with (or any devices that require extended config space that I know >>> of) so I can't really tell if Windows supports PCIe on this >>> platform, but if it does I don't see how it would w/o having a full >>> bridge driver and sophisticated address space allocation builtin. >> Windows XP doesn't use MMCONFIG or any extended configuration space. >> I believe Vista is supposed to, though. Not sure how they are >> handling this issue. > > Oh right... Vista will be the first to fully support PCIe & mcfg... > >> Can you post what your board has for PNPACPI reserved resources (I >> believe they're in /sys/devices/pnp0/*/resources IIRC, don't have a >> Linux box handy right now). Full dmesg would also be useful, I think >> it dumps out those reservations at boot nowadays.. > > BIOS update didn't help. Here's the boot log and a dump of the pnp0 > resources. Curious.. It looks like the ACPI resources have the correct reservation for the MMCONFIG window according to what the register says the location should be. There's no other reservations that overlap with that range (f000000-f7ffffff), and according to the 965 datasheet there's nothing that's hard-coded to occupy that memory range. I can't really see what this range could be conflicting with. What happens if you take out the chipset register detection, does the MCFG table give you the same result? Wonder if they're doing something funny with start/end bus values or something in their table. There's some code in my patch that prints out the important data from the MCFG table, can you tell me what that shows with the chipset detection taken out? If that doesn't provide any useful information, I think we may need some assistance from Intel chipset/motherboard people to figure out what is going on here.. -- Robert Hancock Saskatoon, SK, Canada To email, remove "nospam" from hancockr@nospamshaw.ca Home Page: http://www.roberthancock.com/ ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-02 23:45 ` Robert Hancock @ 2007-05-02 23:54 ` Jesse Barnes 2007-05-04 21:06 ` Jesse Barnes 0 siblings, 1 reply; 61+ messages in thread From: Jesse Barnes @ 2007-05-02 23:54 UTC (permalink / raw) To: Robert Hancock Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wednesday, May 2, 2007 4:45 pm Robert Hancock wrote: > Jesse Barnes wrote: > > On Wednesday, May 2, 2007 7:34 am Robert Hancock wrote: > >> Jesse Barnes wrote: > >>> On Tuesday, May 01, 2007, Jesse Barnes wrote: > >>>>> I'm testing it now on my 965... > >>>> > >>>> Bah... nevermind Robert, I see you're doing this already in > >>>> pci_mmcfg_reject_broken. I'm about to reboot & test now. > >>> > >>> Ok, I've tested a bit on my 965 (after re-adding my old patch to > >>> support it) and the new checks are more complete, but my BIOS > >>> still appears to be buggy. > >>> > >>> The extended config space (as defined by the register) is at > >>> 0xf0000000 (full value is 0xf0000003 indicating 128M enabled). > >>> The ACPI MCFG table has this space reserved according to Robert's > >>> new code, but the machine hangs due to the address space aliasing > >>> Olivier mentioned awhile back. I don't have a PCIe card to test > >>> with (or any devices that require extended config space that I > >>> know of) so I can't really tell if Windows supports PCIe on this > >>> platform, but if it does I don't see how it would w/o having a > >>> full bridge driver and sophisticated address space allocation > >>> builtin. > >> > >> Windows XP doesn't use MMCONFIG or any extended configuration > >> space. I believe Vista is supposed to, though. Not sure how they > >> are handling this issue. > > > > Oh right... Vista will be the first to fully support PCIe & mcfg... > > > >> Can you post what your board has for PNPACPI reserved resources (I > >> believe they're in /sys/devices/pnp0/*/resources IIRC, don't have > >> a Linux box handy right now). Full dmesg would also be useful, I > >> think it dumps out those reservations at boot nowadays.. > > > > BIOS update didn't help. Here's the boot log and a dump of the > > pnp0 resources. > > Curious.. It looks like the ACPI resources have the correct > reservation for the MMCONFIG window according to what the register > says the location should be. There's no other reservations that > overlap with that range (f000000-f7ffffff), and according to the 965 > datasheet there's nothing that's hard-coded to occupy that memory > range. I can't really see what this range could be conflicting with. Yeah, it's strange. Even /proc/iomem from a working boot looks ok: d0700000-d07fffff : PCI Bus #04 d0800000-d08fffff : PCI Bus #05 f0000000-f7ffffff : pnp 00:01 fec00000-fec00fff : IOAPIC 0 fed00000-fed003ff : HPET 0 > What happens if you take out the chipset register detection, does the > MCFG table give you the same result? Wonder if they're doing > something funny with start/end bus values or something in their > table. There's some code in my patch that prints out the important > data from the MCFG table, can you tell me what that shows with the > chipset detection taken out? Yeah, I'll look a little more closely. It could also be that another register needs tweaking somewhere to actually get the bridge to decode the space. > If that doesn't provide any useful information, I think we may need > some assistance from Intel chipset/motherboard people to figure out > what is going on here.. I'm talking with them now, hopefully they'll shed some light on it. Thanks, Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-02 23:54 ` Jesse Barnes @ 2007-05-04 21:06 ` Jesse Barnes 2007-05-05 0:22 ` Robert Hancock 2007-05-21 19:10 ` Jesse Barnes 0 siblings, 2 replies; 61+ messages in thread From: Jesse Barnes @ 2007-05-04 21:06 UTC (permalink / raw) To: Robert Hancock Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wednesday, May 2, 2007 4:54 pm Jesse Barnes wrote: > > What happens if you take out the chipset register detection, does > > the MCFG table give you the same result? Wonder if they're doing > > something funny with start/end bus values or something in their > > table. There's some code in my patch that prints out the important > > data from the MCFG table, can you tell me what that shows with the > > chipset detection taken out? > > Yeah, I'll look a little more closely. It could also be that another > register needs tweaking somewhere to actually get the bridge to > decode the space. > > > If that doesn't provide any useful information, I think we may need > > some assistance from Intel chipset/motherboard people to figure out > > what is going on here.. > > I'm talking with them now, hopefully they'll shed some light on it. I did a little more debugging this morning, and found that I can actually do reads from the space described by ACPI and the device register, but later when ACPI actually scans the root bridges, it hangs. Specifically the call to pci_acpi_scan_root in pci_root.c:acpi_pci_root_add() never seems to return. I'll walk through that logic when I get back to my test box, but it's also worth noting that Vista's MCFG on this machine apparently works ok too. Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-04 21:06 ` Jesse Barnes @ 2007-05-05 0:22 ` Robert Hancock 2007-05-21 19:10 ` Jesse Barnes 1 sibling, 0 replies; 61+ messages in thread From: Robert Hancock @ 2007-05-05 0:22 UTC (permalink / raw) To: Jesse Barnes Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown Jesse Barnes wrote: > On Wednesday, May 2, 2007 4:54 pm Jesse Barnes wrote: >>> What happens if you take out the chipset register detection, does >>> the MCFG table give you the same result? Wonder if they're doing >>> something funny with start/end bus values or something in their >>> table. There's some code in my patch that prints out the important >>> data from the MCFG table, can you tell me what that shows with the >>> chipset detection taken out? >> Yeah, I'll look a little more closely. It could also be that another >> register needs tweaking somewhere to actually get the bridge to >> decode the space. >> >>> If that doesn't provide any useful information, I think we may need >>> some assistance from Intel chipset/motherboard people to figure out >>> what is going on here.. >> I'm talking with them now, hopefully they'll shed some light on it. > > I did a little more debugging this morning, and found that I can > actually do reads from the space described by ACPI and the device > register, but later when ACPI actually scans the root bridges, it > hangs. Specifically the call to pci_acpi_scan_root in > pci_root.c:acpi_pci_root_add() never seems to return. > > I'll walk through that logic when I get back to my test box, but it's > also worth noting that Vista's MCFG on this machine apparently works ok > too. I would try sticking some debug in arch/x86_64/pci/mmconfig.c at the beginning and end of pci_mmcfg_read and pci_mmcfg_write to print the seg, bus, devfn and reg for each read and write. Hopefully that will track down the one that is causing the lockup, if it is an actual MMCONFIG access that's doing it.. -- Robert Hancock Saskatoon, SK, Canada To email, remove "nospam" from hancockr@nospamshaw.ca Home Page: http://www.roberthancock.com/ ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-04 21:06 ` Jesse Barnes 2007-05-05 0:22 ` Robert Hancock @ 2007-05-21 19:10 ` Jesse Barnes 2007-05-21 19:26 ` Robert Hancock 1 sibling, 1 reply; 61+ messages in thread From: Jesse Barnes @ 2007-05-21 19:10 UTC (permalink / raw) To: Robert Hancock Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown > > > What happens if you take out the chipset register detection, does > > > the MCFG table give you the same result? Wonder if they're doing > > > something funny with start/end bus values or something in their > > > table. There's some code in my patch that prints out the important > > > data from the MCFG table, can you tell me what that shows with the > > > chipset detection taken out? I can't see how any MCFG based accesses could work on this box, but I don't know why. According to the boot log (with our code patched in but disabled after checking the ACPI reserved status), the space is fine: ... ACPI: (supports S0 S3 S4 S5) ACPI: Using IOAPIC for interrupt routing pciexbar lo: 0xf0000003 pciexbar hi: 0x00000000 Enabled MCFG space at 0x00000000f0000000, size 134217728 PCI: Found Intel Corporation G965 Express Memory Controller Hub with MMCONFIG support. PCI: MCFG configuration 0: base 00000000f0000000 segment 0 buses 0 - 127 PCI: MCFG area at f0000000 reserved in ACPI motherboard resources PCI: Not using MMCONFIG. <-- due to the 'goto reject' after if (is_acpi_reserved) { ... } PM: Adding info for acpi:acpi_system:00 PM: Adding info for acpi:button_power:00 ... Same thing happens if I disable the chipset specific code and just use the ACPI stuff you added. If I leave it enabled, several config cycles work fine, but the box eventually hangs after probing 24 devices or so. I don't see anything else mapped into this space, and the MTRRs seem ok, so either there's something hidden in this memory range or there's another chipset register that needs poking to fully enable this space properly. Sysrq doesn't seem to work, and I don't see any events in my machine log, so figuring out exactly why it's hanging is a bit difficult. Any ideas on what to try next? I'll see if I can get some more details from our BIOS folks and do yet another pass over the documentation to see if there's something I'm missing. Thanks, Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-21 19:10 ` Jesse Barnes @ 2007-05-21 19:26 ` Robert Hancock 2007-05-21 20:07 ` Jesse Barnes 0 siblings, 1 reply; 61+ messages in thread From: Robert Hancock @ 2007-05-21 19:26 UTC (permalink / raw) To: Jesse Barnes Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown Jesse Barnes wrote: >>>> What happens if you take out the chipset register detection, does >>>> the MCFG table give you the same result? Wonder if they're doing >>>> something funny with start/end bus values or something in their >>>> table. There's some code in my patch that prints out the important >>>> data from the MCFG table, can you tell me what that shows with the >>>> chipset detection taken out? > > I can't see how any MCFG based accesses could work on this box, but I > don't know why. According to the boot log (with our code patched in > but disabled after checking the ACPI reserved status), the space is fine: > > ... > ACPI: (supports S0 S3 S4 S5) > ACPI: Using IOAPIC for interrupt routing > pciexbar lo: 0xf0000003 > pciexbar hi: 0x00000000 > Enabled MCFG space at 0x00000000f0000000, size 134217728 > PCI: Found Intel Corporation G965 Express Memory Controller Hub with MMCONFIG support. > PCI: MCFG configuration 0: base 00000000f0000000 segment 0 buses 0 - 127 > PCI: MCFG area at f0000000 reserved in ACPI motherboard resources > PCI: Not using MMCONFIG. <-- due to the 'goto reject' after > if (is_acpi_reserved) { ... } > PM: Adding info for acpi:acpi_system:00 > PM: Adding info for acpi:button_power:00 > ... > > Same thing happens if I disable the chipset specific code and just use > the ACPI stuff you added. > > If I leave it enabled, several config cycles work fine, but the box > eventually hangs after probing 24 devices or so. I don't see anything > else mapped into this space, and the MTRRs seem ok, so either there's > something hidden in this memory range or there's another chipset register > that needs poking to fully enable this space properly. > > Sysrq doesn't seem to work, and I don't see any events in my machine log, > so figuring out exactly why it's hanging is a bit difficult. > > Any ideas on what to try next? I'll see if I can get some more details > from our BIOS folks and do yet another pass over the documentation to see > if there's something I'm missing. Can you find out which config access (bus, device, function, address) is the one that hangs the box? I assume that either the corresponding address in the MCFG table is problematic (i.e. has something else mapped over it), or maybe that device just doesn't like being probed with MCFG somehow. -- Robert Hancock Saskatoon, SK, Canada To email, remove "nospam" from hancockr@nospamshaw.ca Home Page: http://www.roberthancock.com/ ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-21 19:26 ` Robert Hancock @ 2007-05-21 20:07 ` Jesse Barnes 2007-05-21 20:22 ` Jesse Barnes 0 siblings, 1 reply; 61+ messages in thread From: Jesse Barnes @ 2007-05-21 20:07 UTC (permalink / raw) To: Robert Hancock Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Monday, May 21, 2007, Robert Hancock wrote: > > If I leave it enabled, several config cycles work fine, but the box > > eventually hangs after probing 24 devices or so. I don't see anything > > else mapped into this space, and the MTRRs seem ok, so either there's > > something hidden in this memory range or there's another chipset > > register that needs poking to fully enable this space properly. > > > > Sysrq doesn't seem to work, and I don't see any events in my machine > > log, so figuring out exactly why it's hanging is a bit difficult. > > > > Any ideas on what to try next? I'll see if I can get some more > > details from our BIOS folks and do yet another pass over the > > documentation to see if there's something I'm missing. > > Can you find out which config access (bus, device, function, address) is > the one that hangs the box? I assume that either the corresponding > address in the MCFG table is problematic (i.e. has something else mapped > over it), or maybe that device just doesn't like being probed with MCFG > somehow. Yeah, I've got that data... just a sec while I make sure it's reproducable... Aha, I hadn't decoded the devfn before, looks like it's dying on an access to the graphics device (bus 0, slot 2, device 0): ... pci_mmcfg_read: 0, 0, 0x10, 0x18, 4 = 0xc000000c pci_mmcfg_read: 0, 0, 0x10, 0x18, 4 = <hang> ... Offset 0x18 into the graphics config space should be the graphics memory range address, and 0xc000000c is the correct value. But for some reason it hangs on the second access. It hangs here everytime. Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-21 20:07 ` Jesse Barnes @ 2007-05-21 20:22 ` Jesse Barnes 2007-05-23 0:31 ` Robert Hancock 0 siblings, 1 reply; 61+ messages in thread From: Jesse Barnes @ 2007-05-21 20:22 UTC (permalink / raw) To: Robert Hancock Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Monday, May 21, 2007, Jesse Barnes wrote: > Yeah, I've got that data... just a sec while I make sure it's > reproducable... > > Aha, I hadn't decoded the devfn before, looks like it's dying on an > access to the graphics device (bus 0, slot 2, device 0): > > ... > pci_mmcfg_read: 0, 0, 0x10, 0x18, 4 = 0xc000000c > pci_mmcfg_read: 0, 0, 0x10, 0x18, 4 = <hang> > ... > > Offset 0x18 into the graphics config space should be the graphics memory > range address, and 0xc000000c is the correct value. But for some reason > it hangs on the second access. > > It hangs here everytime. That register is in the config space BAR region, so it should be ok to write 0xffffffff to it and read it back to size the register. However, it's after writing the 0xffffffff to it and trying to read it back that the machine hangs. I didn't see any accesses to the command register to disable decoding (at least not via the mmconfig methods), so maybe that's broken during MCFG based probing? Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-21 20:22 ` Jesse Barnes @ 2007-05-23 0:31 ` Robert Hancock 2007-05-23 0:38 ` Jesse Barnes 0 siblings, 1 reply; 61+ messages in thread From: Robert Hancock @ 2007-05-23 0:31 UTC (permalink / raw) To: Jesse Barnes Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown Jesse Barnes wrote: > On Monday, May 21, 2007, Jesse Barnes wrote: >> Yeah, I've got that data... just a sec while I make sure it's >> reproducable... >> >> Aha, I hadn't decoded the devfn before, looks like it's dying on an >> access to the graphics device (bus 0, slot 2, device 0): >> >> ... >> pci_mmcfg_read: 0, 0, 0x10, 0x18, 4 = 0xc000000c >> pci_mmcfg_read: 0, 0, 0x10, 0x18, 4 = <hang> >> ... >> >> Offset 0x18 into the graphics config space should be the graphics memory >> range address, and 0xc000000c is the correct value. But for some reason >> it hangs on the second access. >> >> It hangs here everytime. > > That register is in the config space BAR region, so it should be ok to > write 0xffffffff to it and read it back to size the register. However, > it's after writing the 0xffffffff to it and trying to read it back that > the machine hangs. I didn't see any accesses to the command register to > disable decoding (at least not via the mmconfig methods), so maybe that's > broken during MCFG based probing? Eww. I don't see where we disable the decode at all while we probe the BARs on the device. That seems like a bad thing, especially with the way we probe 64-bit BARs (do the low 32 bits first and then the high 32 bits). This means the base address effectively gets set to 0xfffffff0 momentarily, which might cause some issues. I'd try adding some code inside pci_setup_device (drivers/pci/probe.c) to disable PCI_COMMAND_IO and PCI_COMMAND_MEMORY on the device when probing devices with the standard header type and then restoring the previous command bits afterwards, and see what effect that has. It'll be interesting if it does, since obviously it seems to work as it is with non-MMCONFIG access methods. Maybe the base address being set like that interferes with MMCONFIG access itself somehow? -- Robert Hancock Saskatoon, SK, Canada To email, remove "nospam" from hancockr@nospamshaw.ca Home Page: http://www.roberthancock.com/ ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 0:31 ` Robert Hancock @ 2007-05-23 0:38 ` Jesse Barnes 2007-05-23 0:53 ` Robert Hancock 2007-05-23 1:06 ` Robert Hancock 0 siblings, 2 replies; 61+ messages in thread From: Jesse Barnes @ 2007-05-23 0:38 UTC (permalink / raw) To: Robert Hancock Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Tuesday, May 22, 2007, Robert Hancock wrote: > Eww. I don't see where we disable the decode at all while we probe the > BARs on the device. That seems like a bad thing, especially with the way > we probe 64-bit BARs (do the low 32 bits first and then the high 32 > bits). This means the base address effectively gets set to 0xfffffff0 > momentarily, which might cause some issues. I'm a bit shocked that things work as well as they do without the disabling... > I'd try adding some code inside pci_setup_device (drivers/pci/probe.c) > to disable PCI_COMMAND_IO and PCI_COMMAND_MEMORY on the device when > probing devices with the standard header type and then restoring the > previous command bits afterwards, and see what effect that has. It'll be > interesting if it does, since obviously it seems to work as it is with > non-MMCONFIG access methods. Maybe the base address being set like that > interferes with MMCONFIG access itself somehow? I tried that, and it seems to get past probing the graphics device at least, but it hangs a bit later. It could be that the enable/disable I added wasn't correct though, I didn't check to see which one I should disable in the command word, which may be a problem (just disabled them both every probe). I'll try again with more precise enable/disable semantics. Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 0:38 ` Jesse Barnes @ 2007-05-23 0:53 ` Robert Hancock 2007-05-23 0:56 ` Jesse Barnes 2007-05-23 1:06 ` Robert Hancock 1 sibling, 1 reply; 61+ messages in thread From: Robert Hancock @ 2007-05-23 0:53 UTC (permalink / raw) To: Jesse Barnes Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown Jesse Barnes wrote: > On Tuesday, May 22, 2007, Robert Hancock wrote: >> Eww. I don't see where we disable the decode at all while we probe the >> BARs on the device. That seems like a bad thing, especially with the way >> we probe 64-bit BARs (do the low 32 bits first and then the high 32 >> bits). This means the base address effectively gets set to 0xfffffff0 >> momentarily, which might cause some issues. > > I'm a bit shocked that things work as well as they do without the > disabling... > >> I'd try adding some code inside pci_setup_device (drivers/pci/probe.c) >> to disable PCI_COMMAND_IO and PCI_COMMAND_MEMORY on the device when >> probing devices with the standard header type and then restoring the >> previous command bits afterwards, and see what effect that has. It'll be >> interesting if it does, since obviously it seems to work as it is with >> non-MMCONFIG access methods. Maybe the base address being set like that >> interferes with MMCONFIG access itself somehow? > > I tried that, and it seems to get past probing the graphics device at > least, but it hangs a bit later. It could be that the enable/disable I > added wasn't correct though, I didn't check to see which one I should > disable in the command word, which may be a problem (just disabled them > both every probe). I'll try again with more precise enable/disable > semantics. It'd be interesting to see at what access it ran into trouble next, at least if it's consistent. Could be that some device doesn't like having the decode disabled.. -- Robert Hancock Saskatoon, SK, Canada To email, remove "nospam" from hancockr@nospamshaw.ca Home Page: http://www.roberthancock.com/ ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 0:53 ` Robert Hancock @ 2007-05-23 0:56 ` Jesse Barnes 0 siblings, 0 replies; 61+ messages in thread From: Jesse Barnes @ 2007-05-23 0:56 UTC (permalink / raw) To: Robert Hancock Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Tuesday, May 22, 2007, Robert Hancock wrote: > Jesse Barnes wrote: > > On Tuesday, May 22, 2007, Robert Hancock wrote: > >> Eww. I don't see where we disable the decode at all while we probe > >> the BARs on the device. That seems like a bad thing, especially with > >> the way we probe 64-bit BARs (do the low 32 bits first and then the > >> high 32 bits). This means the base address effectively gets set to > >> 0xfffffff0 momentarily, which might cause some issues. > > > > I'm a bit shocked that things work as well as they do without the > > disabling... > > > >> I'd try adding some code inside pci_setup_device > >> (drivers/pci/probe.c) to disable PCI_COMMAND_IO and > >> PCI_COMMAND_MEMORY on the device when probing devices with the > >> standard header type and then restoring the previous command bits > >> afterwards, and see what effect that has. It'll be interesting if it > >> does, since obviously it seems to work as it is with non-MMCONFIG > >> access methods. Maybe the base address being set like that interferes > >> with MMCONFIG access itself somehow? > > > > I tried that, and it seems to get past probing the graphics device at > > least, but it hangs a bit later. It could be that the enable/disable > > I added wasn't correct though, I didn't check to see which one I > > should disable in the command word, which may be a problem (just > > disabled them both every probe). I'll try again with more precise > > enable/disable semantics. > > It'd be interesting to see at what access it ran into trouble next, at > least if it's consistent. Could be that some device doesn't like having > the decode disabled.. I think it actually gets through the probing but hangs elsewhere, but I'll have to test again to be sure. Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 0:38 ` Jesse Barnes 2007-05-23 0:53 ` Robert Hancock @ 2007-05-23 1:06 ` Robert Hancock 2007-05-23 18:52 ` Jesse Barnes 1 sibling, 1 reply; 61+ messages in thread From: Robert Hancock @ 2007-05-23 1:06 UTC (permalink / raw) To: Jesse Barnes Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown Jesse Barnes wrote: > On Tuesday, May 22, 2007, Robert Hancock wrote: >> Eww. I don't see where we disable the decode at all while we probe the >> BARs on the device. That seems like a bad thing, especially with the way >> we probe 64-bit BARs (do the low 32 bits first and then the high 32 >> bits). This means the base address effectively gets set to 0xfffffff0 >> momentarily, which might cause some issues. > > I'm a bit shocked that things work as well as they do without the > disabling... > >> I'd try adding some code inside pci_setup_device (drivers/pci/probe.c) >> to disable PCI_COMMAND_IO and PCI_COMMAND_MEMORY on the device when >> probing devices with the standard header type and then restoring the >> previous command bits afterwards, and see what effect that has. It'll be >> interesting if it does, since obviously it seems to work as it is with >> non-MMCONFIG access methods. Maybe the base address being set like that >> interferes with MMCONFIG access itself somehow? > > I tried that, and it seems to get past probing the graphics device at > least, but it hangs a bit later. It could be that the enable/disable I > added wasn't correct though, I didn't check to see which one I should > disable in the command word, which may be a problem (just disabled them > both every probe). I'll try again with more precise enable/disable > semantics. There was a big discussion about this back in 2002, in which Linus wasn't overly enthused about disabling the decode during probing due to risk of causing problems with some devices: http://lkml.org/lkml/2002/12/19/145 In this particular case (64-bit BAR) we might be able to avoid the problem by changing the order in which we probe the two halves of the address, i.e. change the top half to 0xffffffff before messing with the bottom half and then change it back last. That way, we end up mapping it way to the top of 64-bit address space, which hopefully is less likely to conflict.. -- Robert Hancock Saskatoon, SK, Canada To email, remove "nospam" from hancockr@nospamshaw.ca Home Page: http://www.roberthancock.com/ ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 1:06 ` Robert Hancock @ 2007-05-23 18:52 ` Jesse Barnes 2007-05-23 20:20 ` Linus Torvalds 2007-05-23 23:04 ` Robert Hancock 0 siblings, 2 replies; 61+ messages in thread From: Jesse Barnes @ 2007-05-23 18:52 UTC (permalink / raw) To: Robert Hancock Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown, Linus Torvalds On Tuesday, May 22, 2007 6:06 pm Robert Hancock wrote: > There was a big discussion about this back in 2002, in which Linus > wasn't overly enthused about disabling the decode during probing due > to risk of causing problems with some devices: > > http://lkml.org/lkml/2002/12/19/145 > > In this particular case (64-bit BAR) we might be able to avoid the > problem by changing the order in which we probe the two halves of the > address, i.e. change the top half to 0xffffffff before messing with > the bottom half and then change it back last. That way, we end up > mapping it way to the top of 64-bit address space, which hopefully is > less likely to conflict.. Fixed it (finally). I don't think moving the 64 bit probing around would make a difference, since we'd restore its original value anyway before moving on to the 32 bit probe which is where I think the problem is. I think what's happening is the probe is writing 0xffffffff to the video device, which is in the GMCH, and without memory decoding disabled, it'll start claiming PCI config access cycles causing the problems I saw. So my code to disable I/O and memory decode was actually working but I had a bug in the re-enable path so all my devices were staying disabled. :) So here's the patch I used (along with your ACPI patch and my 965 MCFG enable patch of course). The probing code could probably use a bit more cleanup, but this patch limits itself to implementing PCI_COMMAND based I/O and memory space decode disabling during size probing. We might want to do this unconditionally if we're using mmconfig based configuration access, since I imagine other machines might end up having similar address space layouts that would cause problems. Linus, since you were the one concerned about breaking working setups, what do you think? Should we use this approach, or specifically quirk out cases where mmconfig space might conflict with BAR probing? Thanks, Jesse diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index e48fcf0..69dfe0c 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -170,6 +170,48 @@ static inline int is_64bit_memory(u32 mask) return 0; } +#define BAR_IS_MEMORY(bar) (((bar) & PCI_BASE_ADDRESS_SPACE) == \ + PCI_BASE_ADDRESS_SPACE_MEMORY) + +/** + * pci_bar_size - get raw PCI BAR size + * @dev: PCI device + * @reg: BAR to probe + * + * Use basic PCI probing: + * - save original BAR value + * - disable MEM or IO decode as appropriate in PCI_COMMAND reg + * - write all 1s to the BAR + * - read back value + * - reenble MEM or IO decode as necessary + * - write original value back + * + * Returns raw BAR size to caller. + */ +static u32 pci_bar_size(struct pci_dev *dev, unsigned int reg) +{ + u32 orig_reg, sz; + u16 orig_cmd; + + pci_read_config_dword(dev, reg, &orig_reg); + pci_read_config_word(dev, PCI_COMMAND, &orig_cmd); + + if (BAR_IS_MEMORY(orig_reg)) + pci_write_config_word(dev, PCI_COMMAND, + orig_cmd & ~PCI_COMMAND_MEMORY); + else + pci_write_config_word(dev, PCI_COMMAND, + orig_cmd & ~PCI_COMMAND_IO); + + pci_write_config_dword(dev, reg, 0xffffffff); + pci_read_config_dword(dev, reg, &sz); + pci_write_config_dword(dev, reg, orig_reg); + + pci_write_config_word(dev, PCI_COMMAND, orig_cmd); + + return sz; +} + static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) { unsigned int pos, reg, next; @@ -185,17 +227,15 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) res = &dev->resource[pos]; res->name = pci_name(dev); reg = PCI_BASE_ADDRESS_0 + (pos << 2); + pci_read_config_dword(dev, reg, &l); - pci_write_config_dword(dev, reg, ~0); - pci_read_config_dword(dev, reg, &sz); - pci_write_config_dword(dev, reg, l); + sz = pci_bar_size(dev, reg); if (!sz || sz == 0xffffffff) continue; if (l == 0xffffffff) l = 0; raw_sz = sz; - if ((l & PCI_BASE_ADDRESS_SPACE) == - PCI_BASE_ADDRESS_SPACE_MEMORY) { + if (BAR_IS_MEMORY(l)) { sz = pci_size(l, sz, (u32)PCI_BASE_ADDRESS_MEM_MASK); /* * For 64bit prefetchable memory sz could be 0, if the @@ -219,9 +259,7 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) u32 szhi, lhi; pci_read_config_dword(dev, reg+4, &lhi); - pci_write_config_dword(dev, reg+4, ~0); - pci_read_config_dword(dev, reg+4, &szhi); - pci_write_config_dword(dev, reg+4, lhi); + szhi = pci_bar_size(dev, reg+4); sz64 = ((u64)szhi << 32) | raw_sz; l64 = ((u64)lhi << 32) | l; sz64 = pci_size64(l64, sz64, PCI_BASE_ADDRESS_MEM_MASK); ^ permalink raw reply related [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 18:52 ` Jesse Barnes @ 2007-05-23 20:20 ` Linus Torvalds 2007-05-23 20:38 ` Alan Cox ` (2 more replies) 2007-05-23 23:04 ` Robert Hancock 1 sibling, 3 replies; 61+ messages in thread From: Linus Torvalds @ 2007-05-23 20:20 UTC (permalink / raw) To: Jesse Barnes Cc: Robert Hancock, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wed, 23 May 2007, Jesse Barnes wrote: > > Fixed it (finally). I don't think moving the 64 bit probing around > would make a difference, since we'd restore its original value anyway > before moving on to the 32 bit probe which is where I think the problem > is. Well, the thing is, I'm pretty sure there is at least one northbridge that stops memory accesses from the CPU when you turn off the MEM bit on it. Oops, you just killed the machine. Looking at the 925X datasheet (which I happened to have around in my google search history because of the discussions of the sky2 DMA problems), it looks like at least that one just hardcodes the MEM bit to be 1, and thus writing to it is a total no-op. But I really think that clearing the MEM bit for at least the host bridge is conceptually quite wrong, even if it might turn out that all chipsets end up just saying (like Intel) "screw it, the user is insane, we're not going to actually do what he asks us to do". Do we really want to be that insane? Turn off memory accesses when probing the CPU host bridge? So at a _minimum_ I would say that that thing needs to be more careful about host bridges. Maybe it's not needed, who knows? > Linus, since you were the one concerned about breaking working setups, > what do you think? Should we use this approach, or specifically quirk > out cases where mmconfig space might conflict with BAR probing? So see above. I think at a minimum, we should consider the host bridge special. I also suspect that we'd be simply better off if we didn't use mmconfig at all unless we _have_ to. Why use mmconfig for the standard BAR accesses? Is there really any reason? I can understand using it for extended config space, since then the old-fashioned approach won't work. But for normal accesses? What's the point, really? mmconfig seems to be fundamentally designed to be impossible to bootstrap off, so there's no way you can have a machine that _only_ supports mmconfig. So why do people seem to think it's so wonderful? Please fill me in on this fundamental mystery. Quite frankly, if we just didn't use mmconfig, the whole issue would go away. Isn't _that_ the much better solution? Linus ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 20:20 ` Linus Torvalds @ 2007-05-23 20:38 ` Alan Cox 2007-05-23 20:45 ` Linus Torvalds 2007-05-23 20:49 ` Jesse Barnes 2007-05-23 23:04 ` Robert Hancock 2 siblings, 1 reply; 61+ messages in thread From: Alan Cox @ 2007-05-23 20:38 UTC (permalink / raw) To: Linus Torvalds Cc: Jesse Barnes, Robert Hancock, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown > Well, the thing is, I'm pretty sure there is at least one northbridge that > stops memory accesses from the CPU when you turn off the MEM bit on it. > Oops, you just killed the machine. CS5520. But it doesn't have 64bit or PCI Express. Alan ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 20:38 ` Alan Cox @ 2007-05-23 20:45 ` Linus Torvalds 0 siblings, 0 replies; 61+ messages in thread From: Linus Torvalds @ 2007-05-23 20:45 UTC (permalink / raw) To: Alan Cox Cc: Jesse Barnes, Robert Hancock, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wed, 23 May 2007, Alan Cox wrote: > > > Well, the thing is, I'm pretty sure there is at least one northbridge that > > stops memory accesses from the CPU when you turn off the MEM bit on it. > > Oops, you just killed the machine. > > CS5520. But it doesn't have 64bit or PCI Express. That patch does it for _all_ PCI probing. So it would turn any machine using that northbridge into a brick. Linus ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 20:20 ` Linus Torvalds 2007-05-23 20:38 ` Alan Cox @ 2007-05-23 20:49 ` Jesse Barnes 2007-05-23 20:56 ` Linus Torvalds 2007-05-23 23:04 ` Robert Hancock 2 siblings, 1 reply; 61+ messages in thread From: Jesse Barnes @ 2007-05-23 20:49 UTC (permalink / raw) To: Linus Torvalds Cc: Robert Hancock, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wednesday, May 23, 2007 1:20 pm Linus Torvalds wrote: > On Wed, 23 May 2007, Jesse Barnes wrote: > > Fixed it (finally). I don't think moving the 64 bit probing around > > would make a difference, since we'd restore its original value > > anyway before moving on to the 32 bit probe which is where I think > > the problem is. > > Well, the thing is, I'm pretty sure there is at least one northbridge > that stops memory accesses from the CPU when you turn off the MEM bit > on it. Oops, you just killed the machine. Wow, that sounds like a pretty lame host bridge. > Looking at the 925X datasheet (which I happened to have around in my > google search history because of the discussions of the sky2 DMA > problems), it looks like at least that one just hardcodes the MEM bit > to be 1, and thus writing to it is a total no-op. > > But I really think that clearing the MEM bit for at least the host > bridge is conceptually quite wrong, even if it might turn out that > all chipsets end up just saying (like Intel) "screw it, the user is > insane, we're not going to actually do what he asks us to do". > > Do we really want to be that insane? Turn off memory accesses when > probing the CPU host bridge? > > So at a _minimum_ I would say that that thing needs to be more > careful about host bridges. Maybe it's not needed, who knows? I'm not sure either, but the PCI spec is pretty clear about how probing ought to be done, and it seems that other OSes do the disabling (though I'm not sure about how they handle broken host bridges like the one you mention). > I also suspect that we'd be simply better off if we didn't use > mmconfig at all unless we _have_ to. Why use mmconfig for the > standard BAR accesses? Is there really any reason? I can understand > using it for extended config space, since then the old-fashioned > approach won't work. But for normal accesses? What's the point, > really? Yeah, it's mainly needed for extended config space and PCIe (lots of regular PCIe features are in the extended space and are assumed to be accessible). > mmconfig seems to be fundamentally designed to be impossible to > bootstrap off, so there's no way you can have a machine that _only_ > supports mmconfig. So why do people seem to think it's so wonderful? > Please fill me in on this fundamental mystery. Well, non-x86 people I think are fairly used to it, for one. > Quite frankly, if we just didn't use mmconfig, the whole issue would > go away. Isn't _that_ the much better solution? Not for systems with PCIe... and the platforms I've been having trouble with have PCIe slots, so I'd really like mmconfig to be used at least on machines with PCIe bridges. For other machines, it probably doesn't matter much. I don't know of any regular PCI devices offhand that really need extended config space. Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 20:49 ` Jesse Barnes @ 2007-05-23 20:56 ` Linus Torvalds 2007-05-23 21:03 ` Jesse Barnes 2007-05-23 21:20 ` Jesse Barnes 0 siblings, 2 replies; 61+ messages in thread From: Linus Torvalds @ 2007-05-23 20:56 UTC (permalink / raw) To: Jesse Barnes Cc: Robert Hancock, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wed, 23 May 2007, Jesse Barnes wrote: > On Wednesday, May 23, 2007 1:20 pm Linus Torvalds wrote: > > On Wed, 23 May 2007, Jesse Barnes wrote: > > > Fixed it (finally). I don't think moving the 64 bit probing around > > > would make a difference, since we'd restore its original value > > > anyway before moving on to the 32 bit probe which is where I think > > > the problem is. > > > > Well, the thing is, I'm pretty sure there is at least one northbridge > > that stops memory accesses from the CPU when you turn off the MEM bit > > on it. Oops, you just killed the machine. > > Wow, that sounds like a pretty lame host bridge. Umm. Why? Think about it. You ASKED it to stop forwarding memory. So who is lamer: the chip that does what it is told, or the software that tells it to do it? I'd vote for the software. Any programmer who expects the hardware to "just do what I mean, not what I say" is not a programmer, but a dreamer. You told it to not forward memory. Why complain when it does as told? > > Quite frankly, if we just didn't use mmconfig, the whole issue would > > go away. Isn't _that_ the much better solution? > > Not for systems with PCIe... and the platforms I've been having trouble > with have PCIe slots, so I'd really like mmconfig to be used at least > on machines with PCIe bridges. For other machines, it probably doesn't > matter much. I don't know of any regular PCI devices offhand that > really need extended config space. Ehh. Even for PCIe, why not use the normal accesses for the first 256 bytes? Problem solved. Linus ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 20:56 ` Linus Torvalds @ 2007-05-23 21:03 ` Jesse Barnes 2007-05-23 21:09 ` Jeff Garzik 2007-05-23 21:54 ` Linus Torvalds 2007-05-23 21:20 ` Jesse Barnes 1 sibling, 2 replies; 61+ messages in thread From: Jesse Barnes @ 2007-05-23 21:03 UTC (permalink / raw) To: Linus Torvalds Cc: Robert Hancock, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wednesday, May 23, 2007 1:56 pm Linus Torvalds wrote: > On Wed, 23 May 2007, Jesse Barnes wrote: > > On Wednesday, May 23, 2007 1:20 pm Linus Torvalds wrote: > > > On Wed, 23 May 2007, Jesse Barnes wrote: > > > > Fixed it (finally). I don't think moving the 64 bit probing > > > > around would make a difference, since we'd restore its original > > > > value anyway before moving on to the 32 bit probe which is > > > > where I think the problem is. > > > > > > Well, the thing is, I'm pretty sure there is at least one > > > northbridge that stops memory accesses from the CPU when you turn > > > off the MEM bit on it. Oops, you just killed the machine. > > > > Wow, that sounds like a pretty lame host bridge. > > Umm. Why? Think about it. > > You ASKED it to stop forwarding memory. > > So who is lamer: the chip that does what it is told, or the software > that tells it to do it? > > I'd vote for the software. Any programmer who expects the hardware to > "just do what I mean, not what I say" is not a programmer, but a > dreamer. > > You told it to not forward memory. Why complain when it does as told? Well, because that's not actually very useful functionality, and likely makes software that seems "obviously" correct wrt the PCI spec break. > > > Quite frankly, if we just didn't use mmconfig, the whole issue > > > would go away. Isn't _that_ the much better solution? > > > > Not for systems with PCIe... and the platforms I've been having > > trouble with have PCIe slots, so I'd really like mmconfig to be > > used at least on machines with PCIe bridges. For other machines, > > it probably doesn't matter much. I don't know of any regular PCI > > devices offhand that really need extended config space. > > Ehh. Even for PCIe, why not use the normal accesses for the first 256 > bytes? Problem solved. Yeah, that's another option. Would just mean an additional conditional in the mmconfig code, I'll give it a try... Apparently Vista will move away from using type 1 config space accesses though, so if we keep using it, we'll probably run into some lame board that assumes you're using mmconfig at some point in the near future. But then again, we're often on that less tested path (e.g. with ACPI), so maybe that doesn't matter much. Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 21:03 ` Jesse Barnes @ 2007-05-23 21:09 ` Jeff Garzik 2007-05-23 21:35 ` Alan Cox 2007-05-23 23:07 ` Stephen Hemminger 2007-05-23 21:54 ` Linus Torvalds 1 sibling, 2 replies; 61+ messages in thread From: Jeff Garzik @ 2007-05-23 21:09 UTC (permalink / raw) To: Jesse Barnes Cc: Linus Torvalds, Robert Hancock, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown Jesse Barnes wrote: > Apparently Vista will move away from using type 1 config space accesses > though, so if we keep using it, we'll probably run into some lame board Yep. > that assumes you're using mmconfig at some point in the near future. > But then again, we're often on that less tested path (e.g. with ACPI), > so maybe that doesn't matter much. One of the reasons why hardware vendors want to move away from traditional accesses is to be able to use the larger config space in PCI-Express, rather than being locked into the 256-byte legacy PCI config space. Several modern PCI-Express devices utilize the upper config space, but due to legacy reasons the registers are usually ones that do not require OS drivers to know about (like BIST stuff or diagnostic registers). Expect that to change, as MS shakes out the bugs (or maybe we are doing their job for them?). Jeff ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 21:09 ` Jeff Garzik @ 2007-05-23 21:35 ` Alan Cox 2007-05-23 21:35 ` Jeff Garzik 2007-05-23 21:37 ` Jesse Barnes 2007-05-23 23:07 ` Stephen Hemminger 1 sibling, 2 replies; 61+ messages in thread From: Alan Cox @ 2007-05-23 21:35 UTC (permalink / raw) To: Jeff Garzik Cc: Jesse Barnes, Linus Torvalds, Robert Hancock, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown > One of the reasons why hardware vendors want to move away from > traditional accesses is to be able to use the larger config space in > PCI-Express, rather than being locked into the 256-byte legacy PCI > config space. Mostly for treacherous computing extensions where subsets of the config space can only be accessed by signed machines blessed by your favourite movie company and video card vendor... > Expect that to change, as MS shakes out the bugs (or maybe we are doing > their job for them?). The longer it takes - the better. Alan ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 21:35 ` Alan Cox @ 2007-05-23 21:35 ` Jeff Garzik 2007-05-23 21:37 ` Jesse Barnes 1 sibling, 0 replies; 61+ messages in thread From: Jeff Garzik @ 2007-05-23 21:35 UTC (permalink / raw) To: Alan Cox Cc: Jesse Barnes, Linus Torvalds, Robert Hancock, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown Alan Cox wrote: >> One of the reasons why hardware vendors want to move away from >> traditional accesses is to be able to use the larger config space in >> PCI-Express, rather than being locked into the 256-byte legacy PCI >> config space. > > Mostly for treacherous computing extensions where subsets of the config > space can only be accessed by signed machines blessed by your favourite > movie company and video card vendor... Um, no, Mr. Paranoia, it's a standard part of the spec. Jeff ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 21:35 ` Alan Cox 2007-05-23 21:35 ` Jeff Garzik @ 2007-05-23 21:37 ` Jesse Barnes 2007-05-23 21:42 ` Jeff Garzik 1 sibling, 1 reply; 61+ messages in thread From: Jesse Barnes @ 2007-05-23 21:37 UTC (permalink / raw) To: Alan Cox Cc: Jeff Garzik, Linus Torvalds, Robert Hancock, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wednesday, May 23, 2007 2:35 pm Alan Cox wrote: > > One of the reasons why hardware vendors want to move away from > > traditional accesses is to be able to use the larger config space > > in PCI-Express, rather than being locked into the 256-byte legacy > > PCI config space. > > Mostly for treacherous computing extensions where subsets of the > config space can only be accessed by signed machines blessed by your > favourite movie company and video card vendor... I hate "trusted" platform garbage as much as the next guy (where "trusted" means the actual user can't trust it, just the seller), but I think there are legitimate uses of extended space as well, PCIe AER uses it iirc, so don't dismiss it on those grounds. :) Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 21:37 ` Jesse Barnes @ 2007-05-23 21:42 ` Jeff Garzik 0 siblings, 0 replies; 61+ messages in thread From: Jeff Garzik @ 2007-05-23 21:42 UTC (permalink / raw) To: Jesse Barnes Cc: Alan Cox, Linus Torvalds, Robert Hancock, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown Jesse Barnes wrote: > On Wednesday, May 23, 2007 2:35 pm Alan Cox wrote: >>> One of the reasons why hardware vendors want to move away from >>> traditional accesses is to be able to use the larger config space >>> in PCI-Express, rather than being locked into the 256-byte legacy >>> PCI config space. >> Mostly for treacherous computing extensions where subsets of the >> config space can only be accessed by signed machines blessed by your >> favourite movie company and video card vendor... > > I hate "trusted" platform garbage as much as the next guy > (where "trusted" means the actual user can't trust it, just the > seller), but I think there are legitimate uses of extended space as > well, PCIe AER uses it iirc, so don't dismiss it on those grounds. :) Indeed. It's just a register space. Assuming one register space is "more evil" than another, simply because it is bigger, is.. well.. silly. Jeff ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 21:09 ` Jeff Garzik 2007-05-23 21:35 ` Alan Cox @ 2007-05-23 23:07 ` Stephen Hemminger 1 sibling, 0 replies; 61+ messages in thread From: Stephen Hemminger @ 2007-05-23 23:07 UTC (permalink / raw) To: linux-kernel On Wed, 23 May 2007 17:09:37 -0400 Jeff Garzik <jgarzik@pobox.com> wrote: > Jesse Barnes wrote: > > Apparently Vista will move away from using type 1 config space accesses > > though, so if we keep using it, we'll probably run into some lame board > > Yep. > > > > that assumes you're using mmconfig at some point in the near future. > > But then again, we're often on that less tested path (e.g. with ACPI), > > so maybe that doesn't matter much. > > One of the reasons why hardware vendors want to move away from > traditional accesses is to be able to use the larger config space in > PCI-Express, rather than being locked into the 256-byte legacy PCI > config space. > > Several modern PCI-Express devices utilize the upper config space, but > due to legacy reasons the registers are usually ones that do not require > OS drivers to know about (like BIST stuff or diagnostic registers). > > Expect that to change, as MS shakes out the bugs (or maybe we are doing > their job for them?). > On some PCI-Express boards, if you don't clear the advanced error reporting registers on boot up, they will cause IRQ storm. The AER registers are above 256 boundary. In fact, the AER support in Linux should depend on MMCONFIG. -- Stephen Hemminger <shemminger@linux-foundation.org> ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 21:03 ` Jesse Barnes 2007-05-23 21:09 ` Jeff Garzik @ 2007-05-23 21:54 ` Linus Torvalds 2007-05-23 22:06 ` Jesse Barnes 1 sibling, 1 reply; 61+ messages in thread From: Linus Torvalds @ 2007-05-23 21:54 UTC (permalink / raw) To: Jesse Barnes Cc: Robert Hancock, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wed, 23 May 2007, Jesse Barnes wrote: > > You told it to not forward memory. Why complain when it does as told? > > Well, because that's not actually very useful functionality, and likely > makes software that seems "obviously" correct wrt the PCI spec break. I agree that a chip that doesn't do it isn't broken either, but the fact is, there is never any reason to disable MEM/IO on a host bridge. Doing so is senseless - it can never be a valid operation. So I duspute the "obviously correct" part. It's _not_ obviously correct at all. To get back to the MMIO example: even if you were to never shut off RAM, if you turn off just PCI MMIO on the northbridge, what is a mmconfig cycle supposed to do? It's not going to _work_ if you disable MEM accesses. So again, the only sane situation is: don't do it then! You claim that hardware shouldn't do it, but I don't think software is in any different situation at all! If it's insane to do, then software shouldn't do it. It's just insane to turn off the MEM bit. There's simply no valid reason to. And any PCI spec that says you should is *broken*, or written by somebody who really only meant to talk about normal PCI devices, not bridges. > Apparently Vista will move away from using type 1 config space accesses > though, so if we keep using it, we'll probably run into some lame board > that assumes you're using mmconfig at some point in the near future. How are those boards going to set up mmconfig? The whole standard is broken, since there is no way to set it up. Trust the firmware? What a piece of crap! Linus ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 21:54 ` Linus Torvalds @ 2007-05-23 22:06 ` Jesse Barnes 2007-05-23 22:16 ` Linus Torvalds 0 siblings, 1 reply; 61+ messages in thread From: Jesse Barnes @ 2007-05-23 22:06 UTC (permalink / raw) To: Linus Torvalds Cc: Robert Hancock, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wednesday, May 23, 2007 2:54 pm Linus Torvalds wrote: > On Wed, 23 May 2007, Jesse Barnes wrote: > > > You told it to not forward memory. Why complain when it does as > > > told? > > > > Well, because that's not actually very useful functionality, and > > likely makes software that seems "obviously" correct wrt the PCI > > spec break. > > I agree that a chip that doesn't do it isn't broken either, but the > fact is, there is never any reason to disable MEM/IO on a host > bridge. Doing so is senseless - it can never be a valid operation. So > I duspute the "obviously correct" part. It's _not_ obviously correct > at all. > > To get back to the MMIO example: even if you were to never shut off > RAM, if you turn off just PCI MMIO on the northbridge, what is a > mmconfig cycle supposed to do? It's not going to _work_ if you > disable MEM accesses. > > So again, the only sane situation is: don't do it then! You claim > that hardware shouldn't do it, but I don't think software is in any > different situation at all! If it's insane to do, then software > shouldn't do it. > > It's just insane to turn off the MEM bit. There's simply no valid > reason to. And any PCI spec that says you should is *broken*, or > written by somebody who really only meant to talk about normal PCI > devices, not bridges. Well theoretically for just sizing BARs, turning off the MEM bit should be fine, since your next accesses should only be to config space until the MEM bit is reenabled. But if RAM accesses really are disabled, then you'd better be sure all the code you need is already in cache, or you'll get into trouble. So yeah, I guess special handling for host bridges is needed, but that doesn't seem like a big deal. > > Apparently Vista will move away from using type 1 config space > > accesses though, so if we keep using it, we'll probably run into > > some lame board that assumes you're using mmconfig at some point in > > the near future. > > How are those boards going to set up mmconfig? The whole standard is > broken, since there is no way to set it up. > > Trust the firmware? What a piece of crap! What do you mean? You set it up the normal way, by poking at config space to see what's there, then size the BARs (disabling mem and I/O accesses in PCI_COMMAND shouldn't affect config space cycles afaik). You just have to be careful to disable decoding for I/O and memory regions, especially if your mmconfig space overlaps with what the devices end up with in their BARs. Which is why my initial patch works ok (because fortunately the Intel host bridges hard code the mem decode bit to 1 too). Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 22:06 ` Jesse Barnes @ 2007-05-23 22:16 ` Linus Torvalds 2007-05-23 22:28 ` Jesse Barnes 2007-05-23 23:04 ` David Miller 0 siblings, 2 replies; 61+ messages in thread From: Linus Torvalds @ 2007-05-23 22:16 UTC (permalink / raw) To: Jesse Barnes Cc: Robert Hancock, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wed, 23 May 2007, Jesse Barnes wrote: > > > > How are those boards going to set up mmconfig? The whole standard is > > broken, since there is no way to set it up. > > > > Trust the firmware? What a piece of crap! > > What do you mean? You set it up the normal way, by poking at config > space to see what's there HOW DO YOU GET TO THE CONFIG SPACE IN THE FIRST PLACE? The reason mmconfig is *BROKEN*CRAP* is that you cannot bootstrap it. There's no standard way to even figure out WHERE IT IS! So we depend on firmware tables that are known to be broken! That crap should be seen for the crap it is! Dammit, how hard can it be to just admit that mmconfig isn't that great? Linus ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 22:16 ` Linus Torvalds @ 2007-05-23 22:28 ` Jesse Barnes 2007-05-23 23:04 ` David Miller 1 sibling, 0 replies; 61+ messages in thread From: Jesse Barnes @ 2007-05-23 22:28 UTC (permalink / raw) To: Linus Torvalds Cc: Robert Hancock, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wednesday, May 23, 2007 3:16 pm Linus Torvalds wrote: > On Wed, 23 May 2007, Jesse Barnes wrote: > > > How are those boards going to set up mmconfig? The whole standard > > > is broken, since there is no way to set it up. > > > > > > Trust the firmware? What a piece of crap! > > > > What do you mean? You set it up the normal way, by poking at > > config space to see what's there > > HOW DO YOU GET TO THE CONFIG SPACE IN THE FIRST PLACE? > > The reason mmconfig is *BROKEN*CRAP* is that you cannot bootstrap it. > There's no standard way to even figure out WHERE IT IS! > > So we depend on firmware tables that are known to be broken! > > That crap should be seen for the crap it is! Dammit, how hard can it > be to just admit that mmconfig isn't that great? Ah, yeah, that's platform specific, I thought you were confused about how the sizing worked. On x86, we either have to look at ACPI tables (yay) or use type 1 config accesses to get at the mmconfig base register (which is what the patches Olivier and I posted do). On ia64 there are firmware calls to do config space accesses. Not sure about other platforms. I'm not claiming mmconfig is great and we should make everything use it, but we do need it these days, so we should figure out a good way of getting at it. Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 22:16 ` Linus Torvalds 2007-05-23 22:28 ` Jesse Barnes @ 2007-05-23 23:04 ` David Miller 2007-05-23 23:11 ` Jesse Barnes 1 sibling, 1 reply; 61+ messages in thread From: David Miller @ 2007-05-23 23:04 UTC (permalink / raw) To: torvalds; +Cc: jbarnes, hancockr, galibert, linux-kernel, ak, cebbert, lenb From: Linus Torvalds <torvalds@linux-foundation.org> Date: Wed, 23 May 2007 15:16:23 -0700 (PDT) > That crap should be seen for the crap it is! Dammit, how hard can it > be to just admit that mmconfig isn't that great? I knew mmconfig was broken conceptually the first time I started seeing write posting "bug fixes" for it that would do a read back from PCI config space via mmconfig to post the write, which of course has potential side-effects on the device and is absolutely illegal if the write just performed put the device into a PM state or whatever. Truth is stranger than fiction at times. MMCONFIG is very much an ill-conceived idea. ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 23:04 ` David Miller @ 2007-05-23 23:11 ` Jesse Barnes 2007-05-23 23:15 ` Robert Hancock 0 siblings, 1 reply; 61+ messages in thread From: Jesse Barnes @ 2007-05-23 23:11 UTC (permalink / raw) To: David Miller Cc: torvalds, hancockr, galibert, linux-kernel, ak, cebbert, lenb On Wednesday, May 23, 2007 4:04 pm David Miller wrote: > From: Linus Torvalds <torvalds@linux-foundation.org> > Date: Wed, 23 May 2007 15:16:23 -0700 (PDT) > > > That crap should be seen for the crap it is! Dammit, how hard can > > it be to just admit that mmconfig isn't that great? > > I knew mmconfig was broken conceptually the first time I started > seeing write posting "bug fixes" for it that would do a read back > from PCI config space via mmconfig to post the write, which of course > has potential side-effects on the device and is absolutely illegal if > the write just performed put the device into a PM state or whatever. I've actually seen that specific form of posted write flushing cause crashes on some machines, so yes, it sucks. Unfortunately, I don't think we have any other way of getting at extended config space on x86, unless EFI provides methods or something, but I'm not sure that would be an improvement... Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 23:11 ` Jesse Barnes @ 2007-05-23 23:15 ` Robert Hancock 2007-05-23 23:21 ` Jesse Barnes 0 siblings, 1 reply; 61+ messages in thread From: Robert Hancock @ 2007-05-23 23:15 UTC (permalink / raw) To: Jesse Barnes Cc: David Miller, torvalds, galibert, linux-kernel, ak, cebbert, lenb Jesse Barnes wrote: > On Wednesday, May 23, 2007 4:04 pm David Miller wrote: >> From: Linus Torvalds <torvalds@linux-foundation.org> >> Date: Wed, 23 May 2007 15:16:23 -0700 (PDT) >> >>> That crap should be seen for the crap it is! Dammit, how hard can >>> it be to just admit that mmconfig isn't that great? >> I knew mmconfig was broken conceptually the first time I started >> seeing write posting "bug fixes" for it that would do a read back >> from PCI config space via mmconfig to post the write, which of course >> has potential side-effects on the device and is absolutely illegal if >> the write just performed put the device into a PM state or whatever. > > I've actually seen that specific form of posted write flushing cause > crashes on some machines, so yes, it sucks. > > Unfortunately, I don't think we have any other way of getting at > extended config space on x86, unless EFI provides methods or something, > but I'm not sure that would be an improvement... That "fix" shouldn't be needed at all, the MMCONFIG memory range shouldn't be covered by PCI ordering rules, so there should be no such thing as write posting. I suspect that the author of such patch(es) was doing so out of some misguided sense that it was needed. (And if there is some chipset where it is actually needed, better just disable MMCONFIG on that one, as there's no way to use it sanely.) -- Robert Hancock Saskatoon, SK, Canada To email, remove "nospam" from hancockr@nospamshaw.ca Home Page: http://www.roberthancock.com/ ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 23:15 ` Robert Hancock @ 2007-05-23 23:21 ` Jesse Barnes 0 siblings, 0 replies; 61+ messages in thread From: Jesse Barnes @ 2007-05-23 23:21 UTC (permalink / raw) To: Robert Hancock Cc: David Miller, torvalds, galibert, linux-kernel, ak, cebbert, lenb On Wednesday, May 23, 2007 4:15 pm Robert Hancock wrote: > Jesse Barnes wrote: > > On Wednesday, May 23, 2007 4:04 pm David Miller wrote: > >> From: Linus Torvalds <torvalds@linux-foundation.org> > >> Date: Wed, 23 May 2007 15:16:23 -0700 (PDT) > >> > >>> That crap should be seen for the crap it is! Dammit, how hard can > >>> it be to just admit that mmconfig isn't that great? > >> > >> I knew mmconfig was broken conceptually the first time I started > >> seeing write posting "bug fixes" for it that would do a read back > >> from PCI config space via mmconfig to post the write, which of > >> course has potential side-effects on the device and is absolutely > >> illegal if the write just performed put the device into a PM state > >> or whatever. > > > > I've actually seen that specific form of posted write flushing > > cause crashes on some machines, so yes, it sucks. > > > > Unfortunately, I don't think we have any other way of getting at > > extended config space on x86, unless EFI provides methods or > > something, but I'm not sure that would be an improvement... > > That "fix" shouldn't be needed at all, the MMCONFIG memory range > shouldn't be covered by PCI ordering rules, so there should be no > such thing as write posting. I suspect that the author of such > patch(es) was doing so out of some misguided sense that it was > needed. (And if there is some chipset where it is actually needed, > better just disable MMCONFIG on that one, as there's no way to use it > sanely.) PCI allows write posting, and on systems using mmconfig the posting generally (unfortunately) extends to that space as well. So drivers need to deal with it somehow. But many get it wrong. Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 20:56 ` Linus Torvalds 2007-05-23 21:03 ` Jesse Barnes @ 2007-05-23 21:20 ` Jesse Barnes 2007-05-23 22:24 ` Olivier Galibert 1 sibling, 1 reply; 61+ messages in thread From: Jesse Barnes @ 2007-05-23 21:20 UTC (permalink / raw) To: Linus Torvalds Cc: Robert Hancock, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wednesday, May 23, 2007 1:56 pm Linus Torvalds wrote: > > Not for systems with PCIe... and the platforms I've been having > > trouble with have PCIe slots, so I'd really like mmconfig to be > > used at least on machines with PCIe bridges. For other machines, > > it probably doesn't matter much. I don't know of any regular PCI > > devices offhand that really need extended config space. > > Ehh. Even for PCIe, why not use the normal accesses for the first 256 > bytes? Problem solved. Ok, this patch also works. We still need to enable mmconfig space for PCIe and extended config space, but we can continue to use type 1 accesses for legacy PCI config space cycles to avoid decode trouble with mmconfig based BAR sizing. Assuming Robert's and my patches to enable mmconfig space go in, we'd want a similar patch to the i386 mmconfig code. Jesse diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c index 65d8273..5052f80 100644 --- a/arch/x86_64/pci/mmconfig.c +++ b/arch/x86_64/pci/mmconfig.c @@ -61,7 +61,7 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus, } addr = pci_dev_base(seg, bus, devfn); - if (!addr) + if (!addr || reg < 256) /* Use type 1 for non-extended access */ return pci_conf1_read(seg,bus,devfn,reg,len,value); switch (len) { @@ -89,7 +89,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, return -EINVAL; addr = pci_dev_base(seg, bus, devfn); - if (!addr) + if (!addr || reg < 256) /* Use type 1 for non-extended access */ return pci_conf1_write(seg,bus,devfn,reg,len,value); switch (len) { ^ permalink raw reply related [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 21:20 ` Jesse Barnes @ 2007-05-23 22:24 ` Olivier Galibert 2007-05-23 22:31 ` Jesse Barnes 2007-05-23 22:48 ` Linus Torvalds 0 siblings, 2 replies; 61+ messages in thread From: Olivier Galibert @ 2007-05-23 22:24 UTC (permalink / raw) To: Jesse Barnes Cc: Linus Torvalds, Robert Hancock, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wed, May 23, 2007 at 02:20:23PM -0700, Jesse Barnes wrote: > On Wednesday, May 23, 2007 1:56 pm Linus Torvalds wrote: > > Ehh. Even for PCIe, why not use the normal accesses for the first 256 > > bytes? Problem solved. > > Ok, this patch also works. We still need to enable mmconfig space for > PCIe and extended config space, but we can continue to use type 1 > accesses for legacy PCI config space cycles to avoid decode trouble > with mmconfig based BAR sizing. Isn't that a mac-intel instant killer? AFAIK they don't have type1, period. OG. ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 22:24 ` Olivier Galibert @ 2007-05-23 22:31 ` Jesse Barnes 2007-05-23 22:48 ` Linus Torvalds 1 sibling, 0 replies; 61+ messages in thread From: Jesse Barnes @ 2007-05-23 22:31 UTC (permalink / raw) To: Olivier Galibert Cc: Linus Torvalds, Robert Hancock, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wednesday, May 23, 2007 3:24 pm Olivier Galibert wrote: > On Wed, May 23, 2007 at 02:20:23PM -0700, Jesse Barnes wrote: > > On Wednesday, May 23, 2007 1:56 pm Linus Torvalds wrote: > > > Ehh. Even for PCIe, why not use the normal accesses for the first > > > 256 bytes? Problem solved. > > > > Ok, this patch also works. We still need to enable mmconfig space > > for PCIe and extended config space, but we can continue to use type > > 1 accesses for legacy PCI config space cycles to avoid decode > > trouble with mmconfig based BAR sizing. > > Isn't that a mac-intel instant killer? AFAIK they don't have type1, > period. Yuck. I'll have to add a check for type 1 then... but that also means Macs will probably want the decode disable stuff I posted earlier. Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 22:24 ` Olivier Galibert 2007-05-23 22:31 ` Jesse Barnes @ 2007-05-23 22:48 ` Linus Torvalds 2007-05-23 22:55 ` Jesse Barnes 1 sibling, 1 reply; 61+ messages in thread From: Linus Torvalds @ 2007-05-23 22:48 UTC (permalink / raw) To: Olivier Galibert Cc: Jesse Barnes, Robert Hancock, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Thu, 24 May 2007, Olivier Galibert wrote: > > Isn't that a mac-intel instant killer? AFAIK they don't have type1, > period. mac-intel are totally standard Intel chipsets. They have all of conf1/conf2/mmconfig afaik. I just happily booted my mac-mini with "pci=nommconf", nothing bad happened, and the kernel says PCI: Using configuration type 1 and I don't think you even _can_ disable conf1 type accesses: they are deep in the Intel chipsets. Of course, in a virtualized environment, anything can happen. Virtual machines prefer mmconf, because you can use page-level remapping to hide devices or make pseudo-devices show up by mapping in pages that have nothing to do with the true hardware. So no, I don't think Alan was totally smoking crack when he talked about "trusted" computing. Read the above paragraph a few times. (You can do it with trapping IO port accesses too, but it's going to cost you a lot, so if you want to make a fast but untrustoworthy setup, MMIO is the better option). Linus ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 22:48 ` Linus Torvalds @ 2007-05-23 22:55 ` Jesse Barnes 2007-05-24 0:21 ` Linus Torvalds 0 siblings, 1 reply; 61+ messages in thread From: Jesse Barnes @ 2007-05-23 22:55 UTC (permalink / raw) To: Linus Torvalds Cc: Olivier Galibert, Robert Hancock, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wednesday, May 23, 2007 3:48 pm Linus Torvalds wrote: > On Thu, 24 May 2007, Olivier Galibert wrote: > > Isn't that a mac-intel instant killer? AFAIK they don't have > > type1, period. > > mac-intel are totally standard Intel chipsets. They have all of > conf1/conf2/mmconfig afaik. > > I just happily booted my mac-mini with "pci=nommconf", nothing bad > happened, and the kernel says > > PCI: Using configuration type 1 > > and I don't think you even _can_ disable conf1 type accesses: they > are deep in the Intel chipsets. After I sent my last message I realized the same thing... though I occasionally hear people talk about removing it (I seriously doubt that will ever happen). I don't even think there's a way to disable type 1 config access on Intel chipsets... So the last patch is ok then, as long as we can find mmconfig space in the first place, but that's a separate problem for another set of patches (ones that seem to be working fairly well now btw). Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 22:55 ` Jesse Barnes @ 2007-05-24 0:21 ` Linus Torvalds 2007-05-24 2:59 ` Jesse Barnes 2007-05-24 6:18 ` Jeff Garzik 0 siblings, 2 replies; 61+ messages in thread From: Linus Torvalds @ 2007-05-24 0:21 UTC (permalink / raw) To: Jesse Barnes Cc: Olivier Galibert, Robert Hancock, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wed, 23 May 2007, Jesse Barnes wrote: > > After I sent my last message I realized the same thing... though I > occasionally hear people talk about removing it (I seriously doubt that > will ever happen). I don't even think there's a way to disable type 1 > config access on Intel chipsets... Considering that the chipsets still have support for features that *really* aren't used (and haven't been used in over a decade), I doubt the conf1 thing is going away any time soon. Things like: A20 gate, 15-16MB holes, i387 FP exception on irq 13 are totally pointless in this day and age. Things like the DMA controller are getting there, along with PS/2 keyboard support. So there's a lot of things that are likely to be removed before conf1 accesses would. Removing CONF1 accesses would break every single current OS, they'll do that ten years from now at the earliest. Linus ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-24 0:21 ` Linus Torvalds @ 2007-05-24 2:59 ` Jesse Barnes 2007-05-24 3:18 ` Linus Torvalds 2007-05-24 6:18 ` Jeff Garzik 1 sibling, 1 reply; 61+ messages in thread From: Jesse Barnes @ 2007-05-24 2:59 UTC (permalink / raw) To: Linus Torvalds Cc: Olivier Galibert, Robert Hancock, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wednesday, May 23, 2007 5:21:13 Linus Torvalds wrote: > On Wed, 23 May 2007, Jesse Barnes wrote: > > After I sent my last message I realized the same thing... though I > > occasionally hear people talk about removing it (I seriously doubt that > > will ever happen). I don't even think there's a way to disable type 1 > > config access on Intel chipsets... > > Considering that the chipsets still have support for features that > *really* aren't used (and haven't been used in over a decade), I doubt the > conf1 thing is going away any time soon. > > Things like: A20 gate, 15-16MB holes, i387 FP exception on irq 13 are > totally pointless in this day and age. Things like the DMA controller are > getting there, along with PS/2 keyboard support. > > So there's a lot of things that are likely to be removed before conf1 > accesses would. Removing CONF1 accesses would break every single current > OS, they'll do that ten years from now at the earliest. So what do you think? You ok with enabling mmconfig if it's available as long as we use type 1 accesses for non-extended stuff? If so, I think the patches are pretty much ready... Thanks, Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-24 2:59 ` Jesse Barnes @ 2007-05-24 3:18 ` Linus Torvalds 2007-05-24 3:20 ` Linus Torvalds 0 siblings, 1 reply; 61+ messages in thread From: Linus Torvalds @ 2007-05-24 3:18 UTC (permalink / raw) To: Jesse Barnes Cc: Olivier Galibert, Robert Hancock, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wed, 23 May 2007, Jesse Barnes wrote: > > So what do you think? You ok with enabling mmconfig if it's available as long > as we use type 1 accesses for non-extended stuff? If so, I think the patches > are pretty much ready... Sure. I think mmconfig is perfectly sane if it falls back to conf1 accesses for legacy stuff.. And I also actually think that your patch to disable MMIO/PIO when testing the BAR size is fine - I just think that it should likely only be done for non-bridge devices (or at least non-host-bridge). Linus ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-24 3:18 ` Linus Torvalds @ 2007-05-24 3:20 ` Linus Torvalds 2007-05-24 3:40 ` Jesse Barnes 0 siblings, 1 reply; 61+ messages in thread From: Linus Torvalds @ 2007-05-24 3:20 UTC (permalink / raw) To: Jesse Barnes Cc: Olivier Galibert, Robert Hancock, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wed, 23 May 2007, Linus Torvalds wrote: > > Sure. I think mmconfig is perfectly sane if it falls back to conf1 > accesses for legacy stuff.. .. but without a regression, it's obviously a post-2.6.22 thing, I guess I should make that clear, just because I think people send me patches after -rc1 way too eagerly just because they think it fixes a bug. Basically if it's not somethign that has _ever_ worked some way, it's not a bug, it's a feature ;) Linus ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-24 3:20 ` Linus Torvalds @ 2007-05-24 3:40 ` Jesse Barnes 2007-05-24 5:19 ` Robert Hancock 0 siblings, 1 reply; 61+ messages in thread From: Jesse Barnes @ 2007-05-24 3:40 UTC (permalink / raw) To: Linus Torvalds Cc: Olivier Galibert, Robert Hancock, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Wednesday, May 23, 2007 8:20:14 Linus Torvalds wrote: > On Wed, 23 May 2007, Linus Torvalds wrote: > > Sure. I think mmconfig is perfectly sane if it falls back to conf1 > > accesses for legacy stuff.. > > .. but without a regression, it's obviously a post-2.6.22 thing, I guess I > should make that clear, just because I think people send me patches after > -rc1 way too eagerly just because they think it fixes a bug. > > Basically if it's not somethign that has _ever_ worked some way, it's not > a bug, it's a feature ;) No, I know better than to send something after your merge window closes. I have no desire to be flamed even further on this topic. :) And come to think of it, adding the enable/disable bits might be good even with the patch to make legacy accesses go through type 1, since PCIe BAR probing is probably done the same way (I haven't looked) and so we might run into the same problems there. Thanks, Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-24 3:40 ` Jesse Barnes @ 2007-05-24 5:19 ` Robert Hancock 0 siblings, 0 replies; 61+ messages in thread From: Robert Hancock @ 2007-05-24 5:19 UTC (permalink / raw) To: Jesse Barnes Cc: Linus Torvalds, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown Jesse Barnes wrote: > On Wednesday, May 23, 2007 8:20:14 Linus Torvalds wrote: >> On Wed, 23 May 2007, Linus Torvalds wrote: >>> Sure. I think mmconfig is perfectly sane if it falls back to conf1 >>> accesses for legacy stuff.. >> .. but without a regression, it's obviously a post-2.6.22 thing, I guess I >> should make that clear, just because I think people send me patches after >> -rc1 way too eagerly just because they think it fixes a bug. >> >> Basically if it's not somethign that has _ever_ worked some way, it's not >> a bug, it's a feature ;) > > No, I know better than to send something after your merge window closes. I > have no desire to be flamed even further on this topic. :) > > And come to think of it, adding the enable/disable bits might be good even > with the patch to make legacy accesses go through type 1, since PCIe BAR > probing is probably done the same way (I haven't looked) and so we might run > into the same problems there. I think that disabling decode on non-host-bridge devices during the BAR sizing is something we should at least try, indeed. The issue I have with forcing legacy config space accesses to type1 is that it would make it much less obvious if the MMCONFIG access wasn't working properly. You'd likely be able to boot up but then wonder why something that does extended config space accesses didn't work or hung the box. As I mentioned before, either we trust the MMCONFIG or we don't, and if we decide that we don't on a particular box, we should really be shutting it off entirely. Hopefully with the ACPI reservation checking patch and the disable-decode-during-BAR-sizing patch we wouldn't need to add that restriction. But yes, post-2.6.22 for all of this :-) -- Robert Hancock Saskatoon, SK, Canada To email, remove "nospam" from hancockr@nospamshaw.ca Home Page: http://www.roberthancock.com/ ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-24 0:21 ` Linus Torvalds 2007-05-24 2:59 ` Jesse Barnes @ 2007-05-24 6:18 ` Jeff Garzik 2007-05-24 15:42 ` Linus Torvalds 1 sibling, 1 reply; 61+ messages in thread From: Jeff Garzik @ 2007-05-24 6:18 UTC (permalink / raw) To: Linus Torvalds Cc: Jesse Barnes, Olivier Galibert, Robert Hancock, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown Linus Torvalds wrote: > Things like: A20 gate, 15-16MB holes, i387 FP exception on irq 13 are > totally pointless in this day and age. Things like the DMA controller are > getting there, along with PS/2 keyboard support. The latest Intel chipset I have (ICH9) is legacy free: no serial port and no PS/2 ports. I had to disable the Linux PS2 input drivers completely, just to get the thing to boot. Whee, "progress". :) Jeff, digging out that USB debug cable cuz there's no serial ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-24 6:18 ` Jeff Garzik @ 2007-05-24 15:42 ` Linus Torvalds 0 siblings, 0 replies; 61+ messages in thread From: Linus Torvalds @ 2007-05-24 15:42 UTC (permalink / raw) To: Jeff Garzik Cc: Jesse Barnes, Olivier Galibert, Robert Hancock, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Thu, 24 May 2007, Jeff Garzik wrote: > > The latest Intel chipset I have (ICH9) is legacy free: no serial port and no > PS/2 ports. I had to disable the Linux PS2 input drivers completely, just to > get the thing to boot. Ahh, that would be a bug. Can you help trying to debug where it locks up? I'm also surprised, since on the mac mini I have, I already have: i8042.c: No controller found. and it all works beautifully. Of course, it only did that after the horrible crud that is "grub" got fixed, because the bootloader used to wait forever, but I thought the kernel itself was able to handle a missing PS/2 controller fine. Linus ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 20:20 ` Linus Torvalds 2007-05-23 20:38 ` Alan Cox 2007-05-23 20:49 ` Jesse Barnes @ 2007-05-23 23:04 ` Robert Hancock 2 siblings, 0 replies; 61+ messages in thread From: Robert Hancock @ 2007-05-23 23:04 UTC (permalink / raw) To: Linus Torvalds Cc: Jesse Barnes, Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown Linus Torvalds wrote: > > On Wed, 23 May 2007, Jesse Barnes wrote: >> Fixed it (finally). I don't think moving the 64 bit probing around >> would make a difference, since we'd restore its original value anyway >> before moving on to the 32 bit probe which is where I think the problem >> is. > > Well, the thing is, I'm pretty sure there is at least one northbridge that > stops memory accesses from the CPU when you turn off the MEM bit on it. > Oops, you just killed the machine. Which is retarded, since the command bits are only supposed to be for memory ranges that are part of the BARs, it's not supposed to completely kill the device function. Unless somehow the memory on that system is accessed through the PCI bus or something. Anyway, it's something we have to deal with. > > Looking at the 925X datasheet (which I happened to have around in my > google search history because of the discussions of the sky2 DMA > problems), it looks like at least that one just hardcodes the MEM bit to > be 1, and thus writing to it is a total no-op. > > But I really think that clearing the MEM bit for at least the host bridge > is conceptually quite wrong, even if it might turn out that all chipsets > end up just saying (like Intel) "screw it, the user is insane, we're not > going to actually do what he asks us to do". > > Do we really want to be that insane? Turn off memory accesses when probing > the CPU host bridge? > > So at a _minimum_ I would say that that thing needs to be more careful > about host bridges. Maybe it's not needed, who knows? I think we should likely avoid disabling the command bits on host bridges (maybe any bridge) due to this risk of disabling something that will break things. Ideally we can get around this without doing any disabling at all, as noted in my last email. > >> Linus, since you were the one concerned about breaking working setups, >> what do you think? Should we use this approach, or specifically quirk >> out cases where mmconfig space might conflict with BAR probing? > > So see above. I think at a minimum, we should consider the host bridge > special. > > I also suspect that we'd be simply better off if we didn't use mmconfig at > all unless we _have_ to. Why use mmconfig for the standard BAR accesses? > Is there really any reason? I can understand using it for extended config > space, since then the old-fashioned approach won't work. But for normal > accesses? What's the point, really? Why not? Either you trust that the MMCONFIG is working or you don't. If you trust it, you might as well use it for everything, and if you don't, you can't risk using it for anything. If there are problems that show up only with MMCONFIG, doing what you propose would simply cover them up until somebody actually tried accessing extended config space. > mmconfig seems to be fundamentally designed to be impossible to bootstrap > off, so there's no way you can have a machine that _only_ supports > mmconfig. So why do people seem to think it's so wonderful? Please fill me > in on this fundamental mystery. Sure you can bootstrap off it, you just need to have some way to know where to find it (either ACPI or some other system-specific mechanism). > > Quite frankly, if we just didn't use mmconfig, the whole issue would go > away. Isn't _that_ the much better solution? I don't think that is going to be viable in the long run now that Windows Vista is out and MS is actually encouraging HW developers to allow using that config space.. -- Robert Hancock Saskatoon, SK, Canada To email, remove "nospam" from hancockr@nospamshaw.ca Home Page: http://www.roberthancock.com/ ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 18:52 ` Jesse Barnes 2007-05-23 20:20 ` Linus Torvalds @ 2007-05-23 23:04 ` Robert Hancock 2007-05-23 23:06 ` Jesse Barnes 1 sibling, 1 reply; 61+ messages in thread From: Robert Hancock @ 2007-05-23 23:04 UTC (permalink / raw) To: Jesse Barnes Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown, Linus Torvalds Jesse Barnes wrote: > On Tuesday, May 22, 2007 6:06 pm Robert Hancock wrote: >> There was a big discussion about this back in 2002, in which Linus >> wasn't overly enthused about disabling the decode during probing due >> to risk of causing problems with some devices: >> >> http://lkml.org/lkml/2002/12/19/145 >> >> In this particular case (64-bit BAR) we might be able to avoid the >> problem by changing the order in which we probe the two halves of the >> address, i.e. change the top half to 0xffffffff before messing with >> the bottom half and then change it back last. That way, we end up >> mapping it way to the top of 64-bit address space, which hopefully is >> less likely to conflict.. > > Fixed it (finally). I don't think moving the 64 bit probing around > would make a difference, since we'd restore its original value anyway > before moving on to the 32 bit probe which is where I think the problem > is. You couldn't just reorder the code the way it is now, you'd have to rearrange the way we do things for 64-bit BARs: -write FFFFFFFF to high part of 64-bit address (we end up moving the BAR to 0xFFFFFFFFC0000000 for example) -If any bits stick, we know what the size is now (more than 4GB of decode), so just change it back, we're done -If not, we need to check the low part, so write FFFFFFFF to low part of 64-bit address (BAR moves to 0xFFFFFFFFFFFFFFFF) -Check which bits stick and calculate the address -Change the low part of the address back (BAR moves to 0xFFFFFFFFC000000) -Change the high part of the address back (BAR moves to the original 0xC0000000 address) This means that at no point do we map the BAR anywhere near the top of 32-bit memory, so we should avoid this issue in this particular case. I don't think this strategy is too likely to break anything, surely less likely than disabling command bits. Jesse, you might want to try hacking up something like this and see what happens. -- Robert Hancock Saskatoon, SK, Canada To email, remove "nospam" from hancockr@nospamshaw.ca Home Page: http://www.roberthancock.com/ ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-05-23 23:04 ` Robert Hancock @ 2007-05-23 23:06 ` Jesse Barnes 0 siblings, 0 replies; 61+ messages in thread From: Jesse Barnes @ 2007-05-23 23:06 UTC (permalink / raw) To: Robert Hancock Cc: Olivier Galibert, linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown, Linus Torvalds On Wednesday, May 23, 2007 4:04 pm Robert Hancock wrote: > Jesse Barnes wrote: > > On Tuesday, May 22, 2007 6:06 pm Robert Hancock wrote: > >> There was a big discussion about this back in 2002, in which Linus > >> wasn't overly enthused about disabling the decode during probing > >> due to risk of causing problems with some devices: > >> > >> http://lkml.org/lkml/2002/12/19/145 > >> > >> In this particular case (64-bit BAR) we might be able to avoid the > >> problem by changing the order in which we probe the two halves of > >> the address, i.e. change the top half to 0xffffffff before messing > >> with the bottom half and then change it back last. That way, we > >> end up mapping it way to the top of 64-bit address space, which > >> hopefully is less likely to conflict.. > > > > Fixed it (finally). I don't think moving the 64 bit probing around > > would make a difference, since we'd restore its original value > > anyway before moving on to the 32 bit probe which is where I think > > the problem is. > > You couldn't just reorder the code the way it is now, you'd have to > rearrange the way we do things for 64-bit BARs: > > -write FFFFFFFF to high part of 64-bit address (we end up moving the > BAR to 0xFFFFFFFFC0000000 for example) > -If any bits stick, we know what the size is now (more than 4GB of > decode), so just change it back, we're done > -If not, we need to check the low part, so write FFFFFFFF to low part > of 64-bit address (BAR moves to 0xFFFFFFFFFFFFFFFF) > -Check which bits stick and calculate the address > -Change the low part of the address back (BAR moves to > 0xFFFFFFFFC000000) -Change the high part of the address back (BAR > moves to the original 0xC0000000 address) > > This means that at no point do we map the BAR anywhere near the top > of 32-bit memory, so we should avoid this issue in this particular > case. I don't think this strategy is too likely to break anything, > surely less likely than disabling command bits. Jesse, you might want > to try hacking up something like this and see what happens. Ah yeah, that would probably work in this particular case, but doesn't seem very general. I think just using type 1 accesses for non-extended config space is a bit more solid. Jesse ^ permalink raw reply [flat|nested] 61+ messages in thread
* Re: [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources 2007-04-30 2:14 [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources Robert Hancock 2007-04-30 2:59 ` Randy Dunlap 2007-04-30 22:59 ` Olivier Galibert @ 2007-05-24 0:02 ` Jesse Barnes 2 siblings, 0 replies; 61+ messages in thread From: Jesse Barnes @ 2007-05-24 0:02 UTC (permalink / raw) To: Robert Hancock; +Cc: linux-kernel, Andi Kleen, Chuck Ebbert, Len Brown On Sunday, April 29, 2007 7:14 pm Robert Hancock wrote: > This path adds validation of the MMCONFIG table against the ACPI > reserved motherboard resources. If the MMCONFIG table is found to be > reserved in ACPI, we don't bother checking the E820 table. The PCI > Express firmware spec apparently tells BIOS developers that > reservation in ACPI is required and E820 reservation is optional, so > checking against ACPI first makes sense. Many BIOSes don't reserve > the MMCONFIG region in E820 even though it is perfectly functional, > the existing check needlessly disables MMCONFIG in these cases. > > In order to do this, MMCONFIG setup has been split into two phases. > If PCI configuration type 1 is not available (like on EFI Macs) then > MMCONFIG is enabled early as before. Otherwise, it is enabled later > after the ACPI interpreter is enabled, since we need to be able to > execute control methods in order to check the ACPI reserved > resources. Presently this is just triggered off the end of ACPI > interpreter initialization. > > There are a few other behavioral changes here: > > -Validate all MMCONFIG configurations provided, not just the first > one. > > -Validate the entire required length of each configuration according > to the provided ending bus number is reserved, not just the minimum > required allocation. > > -Validate that the area is reserved even if we read it from the > chipset directly and not from the MCFG table. This catches the case > where the BIOS didn't set the location properly in the chipset and > has mapped it over other things it shouldn't have. This might be > overly pessimistic - we might be able to instead verify that no other > reserved resources (like chipset registers) are inside this memory > range. > > Some testing is needed to see if this rejects MMCONFIG on all systems > where it is problematic. There were some patches floating around to > read the table location out of the chipset for Intel 915 and 965, I > think the author found the latter to be problematic since the chipset > had the table mapped over top of motherboard resources. The extra > checking here may catch that case if we add that chipset-specific > support. > > Applies to 2.6.21.1. > > Signed-off-by: Robert Hancock <hancockr@shaw.ca> Acked-by: Jesse Barnes <jesse.barnes@intel.com> As long as we get a fix for the mmconfig based probing issues in the other thread, I think this patch should go in. Robert, maybe you could submit it along with this one (and an i386 equivalent)? Type 1 config access is already a fallback for mmconfig for x86_64 at least, so it should be safe for non-extended access too, and it avoids problems with our lack of decode disable in the generic PCI probing code. Signed-off-by: Jesse Barnes <jesse.barnes@intel.com> diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c index 65d8273..5052f80 100644 --- a/arch/x86_64/pci/mmconfig.c +++ b/arch/x86_64/pci/mmconfig.c @@ -61,7 +61,7 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus, } addr = pci_dev_base(seg, bus, devfn); - if (!addr) + if (!addr || reg < 256) /* Use type 1 for non-extended access */ return pci_conf1_read(seg,bus,devfn,reg,len,value); switch (len) { @@ -89,7 +89,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, return -EINVAL; addr = pci_dev_base(seg, bus, devfn); - if (!addr) + if (!addr || reg < 256) /* Use type 1 for non-extended access */ return pci_conf1_write(seg,bus,devfn,reg,len,value); switch (len) { ^ permalink raw reply related [flat|nested] 61+ messages in thread
end of thread, other threads:[~2007-05-24 15:43 UTC | newest] Thread overview: 61+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2007-04-30 2:14 [RFC PATCH] PCI MMCONFIG: add validation against ACPI motherboard resources Robert Hancock 2007-04-30 2:59 ` Randy Dunlap 2007-04-30 22:59 ` Olivier Galibert 2007-04-30 23:26 ` Robert Hancock 2007-05-01 16:48 ` Jesse Barnes 2007-05-02 2:41 ` Jesse Barnes 2007-05-02 2:56 ` Jesse Barnes 2007-05-02 5:27 ` Jesse Barnes 2007-05-02 14:34 ` Robert Hancock 2007-05-02 17:57 ` Jesse Barnes 2007-05-02 23:45 ` Robert Hancock 2007-05-02 23:54 ` Jesse Barnes 2007-05-04 21:06 ` Jesse Barnes 2007-05-05 0:22 ` Robert Hancock 2007-05-21 19:10 ` Jesse Barnes 2007-05-21 19:26 ` Robert Hancock 2007-05-21 20:07 ` Jesse Barnes 2007-05-21 20:22 ` Jesse Barnes 2007-05-23 0:31 ` Robert Hancock 2007-05-23 0:38 ` Jesse Barnes 2007-05-23 0:53 ` Robert Hancock 2007-05-23 0:56 ` Jesse Barnes 2007-05-23 1:06 ` Robert Hancock 2007-05-23 18:52 ` Jesse Barnes 2007-05-23 20:20 ` Linus Torvalds 2007-05-23 20:38 ` Alan Cox 2007-05-23 20:45 ` Linus Torvalds 2007-05-23 20:49 ` Jesse Barnes 2007-05-23 20:56 ` Linus Torvalds 2007-05-23 21:03 ` Jesse Barnes 2007-05-23 21:09 ` Jeff Garzik 2007-05-23 21:35 ` Alan Cox 2007-05-23 21:35 ` Jeff Garzik 2007-05-23 21:37 ` Jesse Barnes 2007-05-23 21:42 ` Jeff Garzik 2007-05-23 23:07 ` Stephen Hemminger 2007-05-23 21:54 ` Linus Torvalds 2007-05-23 22:06 ` Jesse Barnes 2007-05-23 22:16 ` Linus Torvalds 2007-05-23 22:28 ` Jesse Barnes 2007-05-23 23:04 ` David Miller 2007-05-23 23:11 ` Jesse Barnes 2007-05-23 23:15 ` Robert Hancock 2007-05-23 23:21 ` Jesse Barnes 2007-05-23 21:20 ` Jesse Barnes 2007-05-23 22:24 ` Olivier Galibert 2007-05-23 22:31 ` Jesse Barnes 2007-05-23 22:48 ` Linus Torvalds 2007-05-23 22:55 ` Jesse Barnes 2007-05-24 0:21 ` Linus Torvalds 2007-05-24 2:59 ` Jesse Barnes 2007-05-24 3:18 ` Linus Torvalds 2007-05-24 3:20 ` Linus Torvalds 2007-05-24 3:40 ` Jesse Barnes 2007-05-24 5:19 ` Robert Hancock 2007-05-24 6:18 ` Jeff Garzik 2007-05-24 15:42 ` Linus Torvalds 2007-05-23 23:04 ` Robert Hancock 2007-05-23 23:04 ` Robert Hancock 2007-05-23 23:06 ` Jesse Barnes 2007-05-24 0:02 ` Jesse Barnes
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox