* [PATCH 3/3] Add NumaChip quirk
2011-07-22 10:44 [PATCH 1/3] Add Numachip APIC support Daniel J Blueman
@ 2011-07-22 10:44 ` Daniel J Blueman
0 siblings, 0 replies; 15+ messages in thread
From: Daniel J Blueman @ 2011-07-22 10:44 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H Peter Anvin
Cc: Steffen Persvold, linux-kernel, x86, Daniel J Blueman
Add quirk for Numascale's NumaChip to prevent resource conflicts.
Signed-off-by: Steffen Persvold <sp@numascale.com>
Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com>
---
drivers/pci/quirks.c | 14 ++++++++++++++
include/linux/pci_ids.h | 4 ++++
2 files changed, 18 insertions(+), 0 deletions(-)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 02145e9..4db74e0 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2679,6 +2679,20 @@ static void __devinit quirk_hotplug_bridge(struct pci_dev *dev)
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HINT, 0x0020, quirk_hotplug_bridge);
+/* Clear all resources on NumaChip to avoid conflicts */
+static void __devinit numachip_resource_fixup(struct pci_dev *pdev)
+{
+ int i;
+
+ for (i = PCI_STD_RESOURCES; i <= PCI_ROM_RESOURCE; i++)
+ memset(&pdev->resource[i], 0, sizeof(pdev->resource[i]));
+
+ dev_notice(&pdev->dev, "Disabled all PCI resources for NumaChip\n");
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NUMASCALE,
+ PCI_DEVICE_ID_NUMASCALE_NUMACHIP0, numachip_resource_fixup);
+
/*
* This is a quirk for the Ricoh MMC controller found as a part of
* some mulifunction chips.
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index f8910e1..50d7319 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2390,6 +2390,10 @@
#define PCI_VENDOR_ID_AZWAVE 0x1a3b
+#define PCI_VENDOR_ID_NUMASCALE 0x1b47
+#define PCI_DEVICE_ID_NUMASCALE_NUMACHIP0 0x0601
+#define PCI_DEVICE_ID_NUMASCALE_NUMACHIP1 0x0602
+
#define PCI_VENDOR_ID_TEKRAM 0x1de1
#define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29
--
1.7.4.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 1/3] Add Numachip APIC support
@ 2011-10-18 8:22 Daniel J Blueman
2011-10-18 8:22 ` [PATCH 2/3] Add multi-node boot support Daniel J Blueman
2011-10-18 8:22 ` [PATCH 3/3] Add NumaChip quirk Daniel J Blueman
0 siblings, 2 replies; 15+ messages in thread
From: Daniel J Blueman @ 2011-10-18 8:22 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H Peter Anvin
Cc: Steffen Persvold, linux-kernel, x86, Daniel J Blueman
Add support for Numascale's NumaChip APIC mapping mechanism to allow
booting more than ~168 cores.
v2:
- [Steffen] enumerate only accessible northbridges
- [Daniel] rediffed and validated against 3.1-rc10
Signed-off-by: Steffen Persvold <sp@numascale.com>
Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com>
---
arch/x86/Kconfig | 12 +
arch/x86/include/asm/numachip/numachip.h | 29 +++
arch/x86/include/asm/numachip/numachip_csr.h | 173 +++++++++++++
arch/x86/kernel/apic/Makefile | 1 +
arch/x86/kernel/apic/apic_numachip.c | 332 ++++++++++++++++++++++++++
arch/x86/kernel/cpu/intel_cacheinfo.c | 4 +
arch/x86/kernel/smpboot.c | 6 +-
7 files changed, 556 insertions(+), 1 deletions(-)
create mode 100644 arch/x86/include/asm/numachip/numachip.h
create mode 100644 arch/x86/include/asm/numachip/numachip_csr.h
create mode 100644 arch/x86/kernel/apic/apic_numachip.c
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6a47bb2..712ea9c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -344,6 +344,7 @@ config X86_EXTENDED_PLATFORM
If you enable this option then you'll be able to select support
for the following (non-PC) 64 bit x86 platforms:
+ Numascale NumaChip
ScaleMP vSMP
SGI Ultraviolet
@@ -352,6 +353,17 @@ config X86_EXTENDED_PLATFORM
endif
# This is an alphabetically sorted list of 64 bit extended platforms
# Please maintain the alphabetic order if and when there are additions
+config X86_NUMACHIP
+ bool "Numascale NumaChip"
+ depends on X86_64
+ depends on X86_EXTENDED_PLATFORM
+ depends on NUMA
+ depends on X86_X2APIC
+ depends on !EDAC_AMD64
+ ---help---
+ Adds support for Numascale NumaChip large-SMP systems. Needed to
+ enable more than ~168 cores.
+ If you don't have one of these, you should say N here.
config X86_VSMP
bool "ScaleMP vSMP"
diff --git a/arch/x86/include/asm/numachip/numachip.h b/arch/x86/include/asm/numachip/numachip.h
new file mode 100644
index 0000000..ccc3584
--- /dev/null
+++ b/arch/x86/include/asm/numachip/numachip.h
@@ -0,0 +1,29 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-Specific Header file
+ *
+ * Copyright (C) 2011 Numascale AS. All rights reserved.
+ *
+ * Send feedback to <support@numascale.com>
+ *
+ */
+
+#ifndef _ASM_X86_NUMACHIP_NUMACHIP_H
+#define _ASM_X86_NUMACHIP_NUMACHIP_H
+
+#ifdef CONFIG_X86_NUMACHIP
+
+extern int is_numachip_system(void);
+extern void numachip_system_init(void);
+
+#else /* X86_NUMACHIP */
+
+static inline int is_numachip_system(void) { return 0; }
+static inline void numachip_system_init(void) { }
+
+#endif /* X86_NUMACHIP */
+
+#endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */
diff --git a/arch/x86/include/asm/numachip/numachip_csr.h b/arch/x86/include/asm/numachip/numachip_csr.h
new file mode 100644
index 0000000..e6d8bc9
--- /dev/null
+++ b/arch/x86/include/asm/numachip/numachip_csr.h
@@ -0,0 +1,173 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-Specific Header file
+ *
+ * Copyright (C) 2011 Numascale AS. All rights reserved.
+ *
+ * Send feedback to <support@numascale.com>
+ *
+ */
+
+#ifndef _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
+#define _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
+
+#include <linux/numa.h>
+#include <linux/percpu.h>
+#include <linux/io.h>
+#include <linux/swab.h>
+#include <asm/types.h>
+#include <asm/processor.h>
+
+#define NUMACHIP_CSR_NODE_SHIFT 16
+#define NUMACHIP_CSR_NODE_BITS(p) \
+ (((unsigned long)(p)) << NUMACHIP_CSR_NODE_SHIFT)
+
+#define NUMACHIP_CSR_NODE_MASK 0x0fff /* 4K nodes */
+
+/* 32K CSR space, b15 indicates geo/non-geo */
+#define NUMACHIP_CSR_OFFSET_MASK 0x7fffUL
+
+/* Global CSR space covers all 4K possible nodes with 64K CSR space per node */
+#define NUMACHIP_GLOBAL_CSR_BASE 0x3fff00000000ULL
+#define NUMACHIP_GLOBAL_CSR_LIM 0x3fff0fffffffULL
+#define NUMACHIP_GLOBAL_CSR_SIZE \
+ (NUMACHIP_GLOBAL_CSR_LIM - NUMACHIP_GLOBAL_CSR_BASE + 1)
+
+/* Local CSR space starts in global CSR space with "nodeid" = 0xfff0, however
+ when using the direct mapping on x86_64, both start and size needs to be
+ aligned with PMD_SIZE which is 2M */
+#define NUMACHIP_LOCAL_CSR_BASE 0x3ffffe000000ULL
+#define NUMACHIP_LOCAL_CSR_LIM 0x3fffffffffffULL
+#define NUMACHIP_LOCAL_CSR_SIZE \
+ (NUMACHIP_LOCAL_CSR_LIM - NUMACHIP_LOCAL_CSR_BASE + 1)
+
+static inline void *numachip_global_csr_address(int node, unsigned long offset)
+{
+ return __va(NUMACHIP_GLOBAL_CSR_BASE | (1UL << 15) |
+ NUMACHIP_CSR_NODE_BITS(node & NUMACHIP_CSR_NODE_MASK) |
+ (offset & NUMACHIP_CSR_OFFSET_MASK));
+}
+
+static inline void *numachip_local_csr_address(unsigned long offset)
+{
+ return __va(NUMACHIP_LOCAL_CSR_BASE | (1UL << 15) |
+ NUMACHIP_CSR_NODE_BITS(0xfff0) |
+ (offset & NUMACHIP_CSR_OFFSET_MASK));
+}
+
+static inline unsigned int numachip_read_global_csr(int node,
+ unsigned long offset)
+{
+ return swab32(readl(numachip_global_csr_address(node, offset)));
+}
+
+static inline void numachip_write_global_csr(int node, unsigned long offset,
+ unsigned int val)
+{
+ writel(swab32(val), numachip_global_csr_address(node, offset));
+}
+
+static inline unsigned int numachip_read_local_csr(unsigned long offset)
+{
+ return swab32(readl(numachip_local_csr_address(offset)));
+}
+
+static inline void numachip_write_local_csr(unsigned long offset,
+ unsigned int val)
+{
+ writel(swab32(val), numachip_local_csr_address(offset));
+}
+
+/* ========================================================================= */
+/* NUMACHIP_CSR_G0_STATE_CLEAR */
+/* ========================================================================= */
+
+#define NUMACHIP_CSR_G0_STATE_CLEAR (0x000 + (0 << 12))
+union numachip_csr_g0_state_clear {
+ unsigned int v;
+ struct numachip_csr_g0_state_clear_s {
+ unsigned int _state:2;
+ unsigned int _rsvd_2_6:5;
+ unsigned int _lost:1;
+ unsigned int _rsvd_8_31:24;
+ } s;
+};
+
+/* ========================================================================= */
+/* NUMACHIP_CSR_G0_NODE_IDS */
+/* ========================================================================= */
+
+#define NUMACHIP_CSR_G0_NODE_IDS (0x008 + (0 << 12))
+union numachip_csr_g0_node_ids {
+ unsigned int v;
+ struct numachip_csr_g0_node_ids_s {
+ unsigned int _initialid:16;
+ unsigned int _nodeid:12;
+ unsigned int _rsvd_28_31:4;
+ } s;
+};
+
+/* ========================================================================= */
+/* NUMACHIP_CSR_G3_EXT_INTERRUPT_GEN */
+/* ========================================================================= */
+
+#define NUMACHIP_CSR_G3_EXT_INTERRUPT_GEN (0x030 + (3 << 12))
+union numachip_csr_g3_ext_interrupt_gen {
+ unsigned int v;
+ struct numachip_csr_g3_ext_interrupt_gen_s {
+ unsigned int _vector:8;
+ unsigned int _msgtype:3;
+ unsigned int _index:5;
+ unsigned int _destination_apic_id:16;
+ } s;
+};
+
+/* ========================================================================= */
+/* NUMACHIP_CSR_G3_EXT_INTERRUPT_STATUS */
+/* ========================================================================= */
+
+#define NUMACHIP_CSR_G3_EXT_INTERRUPT_STATUS (0x034 + (3 << 12))
+union numachip_csr_g3_ext_interrupt_status {
+ unsigned int v;
+ struct numachip_csr_g3_ext_interrupt_status_s {
+ unsigned int _result:32;
+ } s;
+};
+
+/* ========================================================================= */
+/* NUMACHIP_CSR_G3_EXT_INTERRUPT_DEST */
+/* ========================================================================= */
+
+#define NUMACHIP_CSR_G3_EXT_INTERRUPT_DEST (0x038 + (3 << 12))
+union numachip_csr_g3_ext_interrupt_dest {
+ unsigned int v;
+ struct numachip_csr_g3_ext_interrupt_dest_s {
+ unsigned int _interrupt:8;
+ unsigned int _rsvd_8_31:24;
+ } s;
+};
+
+/* ========================================================================= */
+/* NUMACHIP_CSR_G3_NC_ATT_MAP_SELECT */
+/* ========================================================================= */
+
+#define NUMACHIP_CSR_G3_NC_ATT_MAP_SELECT (0x7fc + (3 << 12))
+union numachip_csr_g3_nc_att_map_select {
+ unsigned int v;
+ struct numachip_csr_g3_nc_att_map_select_s {
+ unsigned int _upper_address_bits:4;
+ unsigned int _select_ram:4;
+ unsigned int _rsvd_8_31:24;
+ } s;
+};
+
+/* ========================================================================= */
+/* NUMACHIP_CSR_G3_NC_ATT_MAP_SELECT_0-255 */
+/* ========================================================================= */
+
+#define NUMACHIP_CSR_G3_NC_ATT_MAP_SELECT_0 (0x800 + (3 << 12))
+
+#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 767fd04..0ae0323 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_SMP) += ipi.o
ifeq ($(CONFIG_X86_64),y)
# APIC probe will depend on the listing order here
+obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o
obj-$(CONFIG_X86_UV) += x2apic_uv_x.o
obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o
obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
new file mode 100644
index 0000000..e999afa
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -0,0 +1,332 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-Specific APIC Code
+ *
+ * Copyright (C) 2011 Numascale AS. All rights reserved.
+ *
+ * Send feedback to <support@numascale.com>
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/hardirq.h>
+#include <linux/delay.h>
+
+#include <asm/numachip/numachip.h>
+#include <asm/numachip/numachip_csr.h>
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+
+static int numachip_system;
+
+static struct apic apic_numachip;
+
+static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ if (!strncmp(oem_id, "NUMASC", 6)) {
+ numachip_system = 1;
+ return 1;
+ }
+
+ return 0;
+}
+
+int is_numachip_system(void)
+{
+ return numachip_system == 1;
+}
+EXPORT_SYMBOL_GPL(is_numachip_system);
+
+/*
+ * Set up the logical destination ID.
+ *
+ * Intel recommends to set DFR, LDR and TPR before enabling
+ * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116).
+ */
+static void numachip_init_apic_ldr(void)
+{
+ unsigned long val;
+ unsigned long num, id;
+
+ num = smp_processor_id();
+ id = 1UL << num;
+ apic_write(APIC_DFR, APIC_DFR_FLAT);
+ val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+ val |= SET_APIC_LOGICAL_ID(id);
+ apic_write(APIC_LDR, val);
+}
+
+static unsigned int get_apic_id(unsigned long x)
+{
+ unsigned long value;
+ unsigned int id;
+
+ rdmsrl(MSR_FAM10H_NODE_ID, value);
+ id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U);
+
+ return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+ unsigned long x;
+
+ x = ((id & 0xffU) << 24);
+ return x;
+}
+
+static unsigned int read_xapic_id(void)
+{
+ return get_apic_id(apic_read(APIC_ID));
+}
+
+static int numachip_apic_id_registered(void)
+{
+ return physid_isset(read_xapic_id(), phys_cpu_present_map);
+}
+
+static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
+{
+ return initial_apic_id >> index_msb;
+}
+
+static const struct cpumask *numachip_target_cpus(void)
+{
+ return cpu_online_mask;
+}
+
+static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
+}
+
+static int __cpuinit numachip_wakeup_secondary(int phys_apicid,
+ unsigned long start_rip)
+{
+#ifdef CONFIG_SMP
+ union numachip_csr_g3_ext_interrupt_gen int_gen;
+ unsigned long flags;
+
+ int_gen.s._destination_apic_id = phys_apicid;
+ int_gen.s._vector = 0;
+ int_gen.s._msgtype = APIC_DM_INIT >> 8;
+ int_gen.s._index = 0;
+
+ local_irq_save(flags);
+ numachip_write_local_csr(NUMACHIP_CSR_G3_EXT_INTERRUPT_GEN,
+ int_gen.v);
+ local_irq_restore(flags);
+
+ mdelay(10);
+
+ int_gen.s._msgtype = APIC_DM_STARTUP >> 8;
+ int_gen.s._vector = start_rip >> 12;
+
+ local_irq_save(flags);
+ numachip_write_local_csr(NUMACHIP_CSR_G3_EXT_INTERRUPT_GEN,
+ int_gen.v);
+ local_irq_restore(flags);
+
+ atomic_set(&init_deasserted, 1);
+#endif
+ return 0;
+}
+
+static void numachip_send_IPI_one(int cpu, int vector)
+{
+ union numachip_csr_g3_ext_interrupt_gen int_gen;
+ int apicid = per_cpu(x86_cpu_to_apicid, cpu);
+
+ int_gen.s._destination_apic_id = apicid;
+ int_gen.s._vector = vector;
+ int_gen.s._msgtype =
+ (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8;
+ int_gen.s._index = 0;
+
+ numachip_write_local_csr(NUMACHIP_CSR_G3_EXT_INTERRUPT_GEN,
+ int_gen.v);
+}
+
+static void numachip_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+ unsigned int cpu;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ for_each_cpu(cpu, mask)
+ numachip_send_IPI_one(cpu, vector);
+ local_irq_restore(flags);
+}
+
+static void numachip_send_IPI_mask_allbutself(const struct cpumask *mask,
+ int vector)
+{
+ unsigned int this_cpu = smp_processor_id();
+ unsigned int cpu;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ for_each_cpu(cpu, mask) {
+ if (cpu != this_cpu)
+ numachip_send_IPI_one(cpu, vector);
+ }
+ local_irq_restore(flags);
+}
+
+static void numachip_send_IPI_allbutself(int vector)
+{
+ unsigned int this_cpu = smp_processor_id();
+ unsigned int cpu;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ for_each_online_cpu(cpu) {
+ if (cpu != this_cpu)
+ numachip_send_IPI_one(cpu, vector);
+ }
+ local_irq_restore(flags);
+}
+
+static void numachip_send_IPI_all(int vector)
+{
+ numachip_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static void numachip_send_IPI_self(int vector)
+{
+ __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+}
+
+static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ cpu = cpumask_first(cpumask);
+ if ((unsigned)cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ else
+ return BAD_APICID;
+}
+
+static unsigned int
+numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ for_each_cpu_and(cpu, cpumask, andmask) {
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ break;
+ }
+ return per_cpu(x86_cpu_to_apicid, cpu);
+}
+
+static int __init numachip_probe(void)
+{
+ return apic == &apic_numachip;
+}
+
+static struct apic apic_numachip __refconst = {
+
+ .name = "NumaConnect system",
+ .probe = numachip_probe,
+ .acpi_madt_oem_check = numachip_acpi_madt_oem_check,
+ .apic_id_registered = numachip_apic_id_registered,
+
+ .irq_delivery_mode = dest_Fixed,
+ .irq_dest_mode = 0, /* physical */
+
+ .target_cpus = numachip_target_cpus,
+ .disable_esr = 0,
+ .dest_logical = 0,
+ .check_apicid_used = NULL,
+ .check_apicid_present = NULL,
+
+ .vector_allocation_domain = numachip_vector_allocation_domain,
+ .init_apic_ldr = numachip_init_apic_ldr,
+
+ .ioapic_phys_id_map = NULL,
+ .setup_apic_routing = NULL,
+ .multi_timer_check = NULL,
+ .cpu_present_to_apicid = default_cpu_present_to_apicid,
+ .apicid_to_cpu_present = NULL,
+ .setup_portio_remap = NULL,
+ .check_phys_apicid_present = default_check_phys_apicid_present,
+ .enable_apic_mode = NULL,
+ .phys_pkg_id = numachip_phys_pkg_id,
+ .mps_oem_check = NULL,
+
+ .get_apic_id = get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = 0xffU << 24,
+
+ .cpu_mask_to_apicid = numachip_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = numachip_cpu_mask_to_apicid_and,
+
+ .send_IPI_mask = numachip_send_IPI_mask,
+ .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself,
+ .send_IPI_allbutself = numachip_send_IPI_allbutself,
+ .send_IPI_all = numachip_send_IPI_all,
+ .send_IPI_self = numachip_send_IPI_self,
+
+ .wakeup_secondary_cpu = numachip_wakeup_secondary,
+ .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
+ .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+ .wait_for_init_deassert = NULL,
+ .smp_callin_clear_local_apic = NULL,
+ .inquire_remote_apic = NULL, /* REMRD not supported */
+
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = native_apic_icr_read,
+ .icr_write = native_apic_icr_write,
+ .wait_icr_idle = native_apic_wait_icr_idle,
+ .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+};
+
+static void __init map_csrs(void)
+{
+ printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n",
+ NUMACHIP_LOCAL_CSR_BASE,
+ NUMACHIP_LOCAL_CSR_BASE + NUMACHIP_LOCAL_CSR_SIZE - 1);
+ init_extra_mapping_uc(NUMACHIP_LOCAL_CSR_BASE, NUMACHIP_LOCAL_CSR_SIZE);
+
+ printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n",
+ NUMACHIP_GLOBAL_CSR_BASE,
+ NUMACHIP_GLOBAL_CSR_BASE + NUMACHIP_GLOBAL_CSR_SIZE - 1);
+ init_extra_mapping_uc(NUMACHIP_GLOBAL_CSR_BASE,
+ NUMACHIP_GLOBAL_CSR_SIZE);
+}
+
+void __init numachip_system_init(void)
+{
+ unsigned int val;
+
+ map_csrs();
+
+ val = numachip_read_local_csr(NUMACHIP_CSR_G0_NODE_IDS);
+ printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val);
+}
+
+apic_driver(apic_numachip);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index c105c53..ca0b70e 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -354,6 +354,10 @@ static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
node = amd_get_nb_id(smp_processor_id());
+ /* sanity check, in case we haven't allocated enough */
+ if (node >= amd_nb_num())
+ return;
+
if (!l3_caches[node].nb) {
l3_caches[node].nb = node_to_amd_nb(node);
amd_calc_l3_indices(&l3_caches[node]);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9f548cb..f4b93de 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -66,6 +66,7 @@
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/setup.h>
+#include <asm/numachip/numachip.h>
#include <asm/uv/uv.h>
#include <linux/mc146818rtc.h>
@@ -513,7 +514,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
return (send_status | accept_status);
}
-static int __cpuinit
+int __cpuinit
wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
@@ -1094,6 +1095,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
print_cpu_info(&cpu_data(0));
x86_init.timers.setup_percpu_clockev();
+ if (is_numachip_system())
+ numachip_system_init();
+
if (is_uv_system())
uv_system_init();
--
1.7.5.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 2/3] Add multi-node boot support
2011-10-18 8:22 [PATCH 1/3] Add Numachip APIC support Daniel J Blueman
@ 2011-10-18 8:22 ` Daniel J Blueman
2011-10-18 8:22 ` [PATCH 3/3] Add NumaChip quirk Daniel J Blueman
1 sibling, 0 replies; 15+ messages in thread
From: Daniel J Blueman @ 2011-10-18 8:22 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H Peter Anvin
Cc: Steffen Persvold, linux-kernel, x86, Daniel J Blueman
Fix booting multi-node systems with Numascale's NumaChip.
v2:
- [Daniel] rediffed and validated against 3.1-rc10
Signed-off-by: Steffen Persvold <sp@numascale.com>
Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com>
---
arch/x86/kernel/cpu/amd.c | 7 +++++++
1 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b13ed39..4b551f5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -7,6 +7,7 @@
#include <asm/apic.h>
#include <asm/cpu.h>
#include <asm/pci-direct.h>
+#include <asm/numachip/numachip.h>
#ifdef CONFIG_X86_64
# include <asm/numa_64.h>
@@ -350,6 +351,12 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
node = numa_cpu_node(cpu);
if (node == NUMA_NO_NODE)
node = per_cpu(cpu_llc_id, cpu);
+ else if (is_numachip_system()) {
+ /* fix for multi-node systems: set phys_proc_id and cpu_llc_id
+ to node number from ACPI SRAT table */
+ c->phys_proc_id = node;
+ per_cpu(cpu_llc_id, cpu) = node;
+ }
if (!node_online(node)) {
/*
--
1.7.5.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 3/3] Add NumaChip quirk
2011-10-18 8:22 [PATCH 1/3] Add Numachip APIC support Daniel J Blueman
2011-10-18 8:22 ` [PATCH 2/3] Add multi-node boot support Daniel J Blueman
@ 2011-10-18 8:22 ` Daniel J Blueman
2011-10-25 13:54 ` Thomas Gleixner
2011-10-25 14:38 ` Bjorn Helgaas
1 sibling, 2 replies; 15+ messages in thread
From: Daniel J Blueman @ 2011-10-18 8:22 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H Peter Anvin
Cc: Steffen Persvold, linux-kernel, x86, Daniel J Blueman
Add quirk for Numascale's NumaChip to prevent resource conflicts.
v2:
- [Daniel] rediffed and validated against 3.1-rc10
Signed-off-by: Steffen Persvold <sp@numascale.com>
Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com>
---
drivers/pci/quirks.c | 14 ++++++++++++++
include/linux/pci_ids.h | 4 ++++
2 files changed, 18 insertions(+), 0 deletions(-)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 1196f61..051b793 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2679,6 +2679,20 @@ static void __devinit quirk_hotplug_bridge(struct pci_dev *dev)
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HINT, 0x0020, quirk_hotplug_bridge);
+/* Clear all resources on NumaChip to avoid conflicts */
+static void __devinit numachip_resource_fixup(struct pci_dev *pdev)
+{
+ int i;
+
+ for (i = PCI_STD_RESOURCES; i <= PCI_ROM_RESOURCE; i++)
+ memset(&pdev->resource[i], 0, sizeof(pdev->resource[i]));
+
+ dev_notice(&pdev->dev, "Disabled all PCI resources for NumaChip\n");
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NUMASCALE,
+ PCI_DEVICE_ID_NUMASCALE_NUMACHIP0, numachip_resource_fixup);
+
/*
* This is a quirk for the Ricoh MMC controller found as a part of
* some mulifunction chips.
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ae96bbe..02e0959 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2400,6 +2400,10 @@
#define PCI_VENDOR_ID_AZWAVE 0x1a3b
+#define PCI_VENDOR_ID_NUMASCALE 0x1b47
+#define PCI_DEVICE_ID_NUMASCALE_NUMACHIP0 0x0601
+#define PCI_DEVICE_ID_NUMASCALE_NUMACHIP1 0x0602
+
#define PCI_VENDOR_ID_TEKRAM 0x1de1
#define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29
--
1.7.5.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH 3/3] Add NumaChip quirk
2011-10-18 8:22 ` [PATCH 3/3] Add NumaChip quirk Daniel J Blueman
@ 2011-10-25 13:54 ` Thomas Gleixner
2011-10-25 14:17 ` Jesse Barnes
2011-10-26 3:12 ` Daniel J Blueman
2011-10-25 14:38 ` Bjorn Helgaas
1 sibling, 2 replies; 15+ messages in thread
From: Thomas Gleixner @ 2011-10-25 13:54 UTC (permalink / raw)
To: Daniel J Blueman
Cc: Ingo Molnar, H Peter Anvin, Steffen Persvold, linux-kernel, x86
Daniel,
On Tue, 18 Oct 2011, Daniel J Blueman wrote:
> Add quirk for Numascale's NumaChip to prevent resource conflicts.
>
> v2:
> - [Daniel] rediffed and validated against 3.1-rc10
>
> Signed-off-by: Steffen Persvold <sp@numascale.com>
So, who wrote that code? If Steffen, then you should put a From:
.... when sending patches.
> Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com>
> ---
> drivers/pci/quirks.c | 14 ++++++++++++++
> include/linux/pci_ids.h | 4 ++++
> 2 files changed, 18 insertions(+), 0 deletions(-)
Jesse,
can you take that or should I push it through x86 ?
Thanks,
tglx
> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> index 1196f61..051b793 100644
> --- a/drivers/pci/quirks.c
> +++ b/drivers/pci/quirks.c
> @@ -2679,6 +2679,20 @@ static void __devinit quirk_hotplug_bridge(struct pci_dev *dev)
>
> DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HINT, 0x0020, quirk_hotplug_bridge);
>
> +/* Clear all resources on NumaChip to avoid conflicts */
> +static void __devinit numachip_resource_fixup(struct pci_dev *pdev)
> +{
> + int i;
> +
> + for (i = PCI_STD_RESOURCES; i <= PCI_ROM_RESOURCE; i++)
> + memset(&pdev->resource[i], 0, sizeof(pdev->resource[i]));
> +
> + dev_notice(&pdev->dev, "Disabled all PCI resources for NumaChip\n");
> +}
> +
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NUMASCALE,
> + PCI_DEVICE_ID_NUMASCALE_NUMACHIP0, numachip_resource_fixup);
> +
> /*
> * This is a quirk for the Ricoh MMC controller found as a part of
> * some mulifunction chips.
> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
> index ae96bbe..02e0959 100644
> --- a/include/linux/pci_ids.h
> +++ b/include/linux/pci_ids.h
> @@ -2400,6 +2400,10 @@
>
> #define PCI_VENDOR_ID_AZWAVE 0x1a3b
>
> +#define PCI_VENDOR_ID_NUMASCALE 0x1b47
> +#define PCI_DEVICE_ID_NUMASCALE_NUMACHIP0 0x0601
> +#define PCI_DEVICE_ID_NUMASCALE_NUMACHIP1 0x0602
> +
> #define PCI_VENDOR_ID_TEKRAM 0x1de1
> #define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29
>
> --
> 1.7.5.4
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 3/3] Add NumaChip quirk
2011-10-25 13:54 ` Thomas Gleixner
@ 2011-10-25 14:17 ` Jesse Barnes
2011-10-26 3:12 ` Daniel J Blueman
1 sibling, 0 replies; 15+ messages in thread
From: Jesse Barnes @ 2011-10-25 14:17 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Daniel J Blueman, Ingo Molnar, H Peter Anvin, Steffen Persvold,
linux-kernel, x86
On Tue, 25 Oct 2011 15:54:29 +0200 (CEST)
Thomas Gleixner <tglx@linutronix.de> wrote:
> Daniel,
>
> On Tue, 18 Oct 2011, Daniel J Blueman wrote:
>
> > Add quirk for Numascale's NumaChip to prevent resource conflicts.
> >
> > v2:
> > - [Daniel] rediffed and validated against 3.1-rc10
> >
> > Signed-off-by: Steffen Persvold <sp@numascale.com>
>
> So, who wrote that code? If Steffen, then you should put a From:
> .... when sending patches.
>
> > Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com>
> > ---
> > drivers/pci/quirks.c | 14 ++++++++++++++
> > include/linux/pci_ids.h | 4 ++++
> > 2 files changed, 18 insertions(+), 0 deletions(-)
>
> Jesse,
>
> can you take that or should I push it through x86 ?
I can pull it in.
Thanks,
Jesse
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 3/3] Add NumaChip quirk
2011-10-18 8:22 ` [PATCH 3/3] Add NumaChip quirk Daniel J Blueman
2011-10-25 13:54 ` Thomas Gleixner
@ 2011-10-25 14:38 ` Bjorn Helgaas
2011-10-25 15:36 ` Steffen Persvold
1 sibling, 1 reply; 15+ messages in thread
From: Bjorn Helgaas @ 2011-10-25 14:38 UTC (permalink / raw)
To: Daniel J Blueman
Cc: Ingo Molnar, Thomas Gleixner, H Peter Anvin, Steffen Persvold,
linux-kernel, x86, Jesse Barnes
On Tue, Oct 18, 2011 at 2:22 AM, Daniel J Blueman
<daniel@numascale-asia.com> wrote:
> Add quirk for Numascale's NumaChip to prevent resource conflicts.
>
> v2:
> - [Daniel] rediffed and validated against 3.1-rc10
>
> Signed-off-by: Steffen Persvold <sp@numascale.com>
> Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com>
> ---
> drivers/pci/quirks.c | 14 ++++++++++++++
> include/linux/pci_ids.h | 4 ++++
> 2 files changed, 18 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> index 1196f61..051b793 100644
> --- a/drivers/pci/quirks.c
> +++ b/drivers/pci/quirks.c
> @@ -2679,6 +2679,20 @@ static void __devinit quirk_hotplug_bridge(struct pci_dev *dev)
>
> DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HINT, 0x0020, quirk_hotplug_bridge);
>
> +/* Clear all resources on NumaChip to avoid conflicts */
> +static void __devinit numachip_resource_fixup(struct pci_dev *pdev)
> +{
> + int i;
> +
> + for (i = PCI_STD_RESOURCES; i <= PCI_ROM_RESOURCE; i++)
> + memset(&pdev->resource[i], 0, sizeof(pdev->resource[i]));
> +
> + dev_notice(&pdev->dev, "Disabled all PCI resources for NumaChip\n");
> +}
> +
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NUMASCALE,
> + PCI_DEVICE_ID_NUMASCALE_NUMACHIP0, numachip_resource_fixup);
This feels like a band-aid ... what's the background here? I can see
that you're disabling PCI resources, and I can read that this avoids
conflicts, but what's the underlying cause of the conflict?
I wonder if there's a more generic problem that should be fixed
differently. Presumably the NumaChip designers put those BARs there
for a reason, and often when we report "conflicts," it's really a clue
that we're doing something wrong in host bridge discovery or in
generic PCI.
Can you post a complete dmesg log showing the conflict?
Bjorn
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 3/3] Add NumaChip quirk
2011-10-25 14:38 ` Bjorn Helgaas
@ 2011-10-25 15:36 ` Steffen Persvold
2011-10-25 17:15 ` Bjorn Helgaas
0 siblings, 1 reply; 15+ messages in thread
From: Steffen Persvold @ 2011-10-25 15:36 UTC (permalink / raw)
To: Bjorn Helgaas
Cc: Daniel J Blueman, Ingo Molnar, Thomas Gleixner, H Peter Anvin,
linux-kernel, x86, Jesse Barnes, 'Arne Georg Gleditsch'
On 10/25/2011 16:38, Bjorn Helgaas wrote:
> On Tue, Oct 18, 2011 at 2:22 AM, Daniel J Blueman
> <daniel@numascale-asia.com> wrote:
>> Add quirk for Numascale's NumaChip to prevent resource conflicts.
[]
> This feels like a band-aid ... what's the background here? I can see
> that you're disabling PCI resources, and I can read that this avoids
> conflicts, but what's the underlying cause of the conflict?
>
> I wonder if there's a more generic problem that should be fixed
> differently. Presumably the NumaChip designers put those BARs there
> for a reason, and often when we report "conflicts," it's really a clue
> that we're doing something wrong in host bridge discovery or in
> generic PCI.
>
> Can you post a complete dmesg log showing the conflict?
>
Hi Bjorn,
The issue is a bit complicated, but here's the story; NumaChip is a
coherent NorthBridge device on AMD systems (i.e part of the coherent
fabric) but the BIOS does *not* assign any resources to it, in fact the
BIOS skips our device entirely leaving our BAR registers at HW init
values (0x00000000). This is in fact by design (AMD AGESA code). This
isn't really a big issue for us anyway because we have other means of
reaching our CSR logic (not going into detail, but the other patches in
this patchset would reveal how we do that).
Linux however, when scanning the PCI buses finds our device (because it
is responding to config space requests) it thinks that we got a BAR0
that starts at 0x00000000 which obviously isn't correct. In addition, in
the bootloader that we've written for NumaChip systems (to bring them
all together as a huge coherent system) we had to use the expansion rom
config space register (F0x030) as kind of a "scratch register".
The end result is :
[ 4.636297] pci 0000:00:1a.0: reg 10: [mem 0x00000000-0x000fffff]
[ 4.640317] pci 0000:00:1a.0: reg 30: [mem 0x3fff0000-0x3fffffff pref]
[ 4.644369] pci 0000:00:1a.1: [1b47:0602] type 0 class 0x000600
Neither of these resources are real, our device will not respond to
requests to any of those windows.
The conflict we refer to in the patch is that since Linux thinks we have
those windows assigned to us, we get conflicts later on with real devices :
[ 5.887856] pnp 00:0e: disabling [mem 0x00000000-0x0009ffff] because
it overlaps 0000:00:1a.0 BAR 0 [mem 0x00000000-0x000fffff]
[ 5.899525] pnp 00:0e: disabling [mem 0x000c0000-0x000cffff] because
it overlaps 0000:00:1a.0 BAR 0 [mem 0x00000000-0x000fffff]
[ 5.911002] pnp 00:0e: disabling [mem 0x000e0000-0x000fffff] because
it overlaps 0000:00:1a.0 BAR 0 [mem 0x00000000-0x000fffff]
I guess technically, the Linux PCI bus probing code should check the
Command register (offset 0x4) to see if MemorySpace is enabled (which in
our case it won't be) before checking the BAR registers.
From my perspective however, it seemed easier and less intrusive to
just hook in a quirk for our device since we do not use those BAR
settings anyway in any form once Linux is booted.
I hope this clear things up a bit. If you have any other questions don't
hesitate to ask.
Kind regards,
--
Steffen Persvold, Chief Architect NumaChip
Numascale AS - www.numascale.com
Tel: +47 92 49 25 54 Skype: spersvold
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 3/3] Add NumaChip quirk
2011-10-25 15:36 ` Steffen Persvold
@ 2011-10-25 17:15 ` Bjorn Helgaas
2011-10-25 17:33 ` Steffen Persvold
0 siblings, 1 reply; 15+ messages in thread
From: Bjorn Helgaas @ 2011-10-25 17:15 UTC (permalink / raw)
To: Steffen Persvold
Cc: Daniel J Blueman, Ingo Molnar, Thomas Gleixner, H Peter Anvin,
linux-kernel, x86, Jesse Barnes, Arne Georg Gleditsch, linux-pci
[+cc linux-pci]
On Tue, Oct 25, 2011 at 9:36 AM, Steffen Persvold <sp@numascale.com> wrote:
> On 10/25/2011 16:38, Bjorn Helgaas wrote:
>>
>> On Tue, Oct 18, 2011 at 2:22 AM, Daniel J Blueman
>> <daniel@numascale-asia.com> wrote:
>>>
>>> Add quirk for Numascale's NumaChip to prevent resource conflicts.
>
> []
>>
>> This feels like a band-aid ... what's the background here? I can see
>> that you're disabling PCI resources, and I can read that this avoids
>> conflicts, but what's the underlying cause of the conflict?
>>
>> I wonder if there's a more generic problem that should be fixed
>> differently. Presumably the NumaChip designers put those BARs there
>> for a reason, and often when we report "conflicts," it's really a clue
>> that we're doing something wrong in host bridge discovery or in
>> generic PCI.
>>
>> Can you post a complete dmesg log showing the conflict?
>>
>
> Hi Bjorn,
>
> The issue is a bit complicated, but here's the story; NumaChip is a coherent
> NorthBridge device on AMD systems (i.e part of the coherent fabric) but the
> BIOS does *not* assign any resources to it, in fact the BIOS skips our
> device entirely leaving our BAR registers at HW init values (0x00000000).
> This is in fact by design (AMD AGESA code). This isn't really a big issue
> for us anyway because we have other means of reaching our CSR logic (not
> going into detail, but the other patches in this patchset would reveal how
> we do that).
>
> Linux however, when scanning the PCI buses finds our device (because it is
> responding to config space requests) it thinks that we got a BAR0 that
> starts at 0x00000000 which obviously isn't correct. In addition, in the
> bootloader that we've written for NumaChip systems (to bring them all
> together as a huge coherent system) we had to use the expansion rom config
> space register (F0x030) as kind of a "scratch register".
We do treat zero as a special value when found in BARs, but that's
sort of a muddy area. On x86, a zero-valued BAR is not very useful
because typically there's RAM at address zero and PCI host bridges
don't usually perform address translation. But on architectures like
ia64/alpha/parisc/powerpc/etc., where host bridges often *do*
translate addresses, zero might be a perfectly valid BAR value.
> The end result is :
>
> [ 4.636297] pci 0000:00:1a.0: reg 10: [mem 0x00000000-0x000fffff]
> [ 4.640317] pci 0000:00:1a.0: reg 30: [mem 0x3fff0000-0x3fffffff pref]
> [ 4.644369] pci 0000:00:1a.1: [1b47:0602] type 0 class 0x000600
>
> Neither of these resources are real, our device will not respond to requests
> to any of those windows.
>
> The conflict we refer to in the patch is that since Linux thinks we have
> those windows assigned to us, we get conflicts later on with real devices :
>
> [ 5.887856] pnp 00:0e: disabling [mem 0x00000000-0x0009ffff] because it
> overlaps 0000:00:1a.0 BAR 0 [mem 0x00000000-0x000fffff]
> [ 5.899525] pnp 00:0e: disabling [mem 0x000c0000-0x000cffff] because it
> overlaps 0000:00:1a.0 BAR 0 [mem 0x00000000-0x000fffff]
> [ 5.911002] pnp 00:0e: disabling [mem 0x000e0000-0x000fffff] because it
> overlaps 0000:00:1a.0 BAR 0 [mem 0x00000000-0x000fffff]
Yeah, this is gross, and this is definitely something Linux is doing
wrong. We don't have a consistent way of marking PCI BARs as
"disabled," so every zero-valued BAR seems to conflict with PNP
devices. Typically there are motherboard devices like your 00:0e that
reserve regions of low memory.
Lots of machines complain like this, not just NumaChip, and there's no
real ill effect. We say we're disabling a PNP device resource, but we
don't actually evaluate an _SRS method to tell the BIOS to do
anything. So I think we complain about the conflict but don't do
anything else.
> I guess technically, the Linux PCI bus probing code should check the Command
> register (offset 0x4) to see if MemorySpace is enabled (which in our case it
> won't be) before checking the BAR registers.
The question is how we handle a device with MemorySpace disabled. In
most cases, I think we want to assign BAR resources to it so that if a
driver claims the device, we can enable MemorySpace and the device
will work. If the BIOS leaves MemorySpace disabled and Linux doesn't
assign BAR space at boot-time, we may be stuck because in general we
can't assign resources dynamically. Dynamic assignment might require
moving other devices, enlarging bridge windows, etc., which Linux
currently doesn't support.
NumaChip sounds like an exception because you know you never care
about using those BARs. But I'm curious -- it looks like Linux didn't
even try to assign resources to them. I thought something in the
pci_assign_unassigned_resources() path would have tried to do
something with them. If we *did* assign resources to those BARs, I
assume nothing would break, since there's no driver that actually uses
them. Right?
Bjorn
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 3/3] Add NumaChip quirk
2011-10-25 17:15 ` Bjorn Helgaas
@ 2011-10-25 17:33 ` Steffen Persvold
2011-10-25 18:09 ` Arne Georg Gleditsch
2011-10-25 19:57 ` Bjorn Helgaas
0 siblings, 2 replies; 15+ messages in thread
From: Steffen Persvold @ 2011-10-25 17:33 UTC (permalink / raw)
To: Bjorn Helgaas
Cc: Daniel J Blueman, Ingo Molnar, Thomas Gleixner, H Peter Anvin,
linux-kernel, x86, Jesse Barnes, Arne Georg Gleditsch, linux-pci
On 10/25/2011 19:15, Bjorn Helgaas wrote:
> [+cc linux-pci]
>
[]
>>
>> The conflict we refer to in the patch is that since Linux thinks we have
>> those windows assigned to us, we get conflicts later on with real devices :
>>
>> [ 5.887856] pnp 00:0e: disabling [mem 0x00000000-0x0009ffff] because it
>> overlaps 0000:00:1a.0 BAR 0 [mem 0x00000000-0x000fffff]
>> [ 5.899525] pnp 00:0e: disabling [mem 0x000c0000-0x000cffff] because it
>> overlaps 0000:00:1a.0 BAR 0 [mem 0x00000000-0x000fffff]
>> [ 5.911002] pnp 00:0e: disabling [mem 0x000e0000-0x000fffff] because it
>> overlaps 0000:00:1a.0 BAR 0 [mem 0x00000000-0x000fffff]
>
> Yeah, this is gross, and this is definitely something Linux is doing
> wrong. We don't have a consistent way of marking PCI BARs as
> "disabled," so every zero-valued BAR seems to conflict with PNP
> devices. Typically there are motherboard devices like your 00:0e that
> reserve regions of low memory.
>
> Lots of machines complain like this, not just NumaChip, and there's no
> real ill effect. We say we're disabling a PNP device resource, but we
> don't actually evaluate an _SRS method to tell the BIOS to do
> anything. So I think we complain about the conflict but don't do
> anything else.
Ah, ok. I didn't dive into it, so I didn't see if anything bad(tm)
really happened when that PNP device got "disabled" resources.
>
>> I guess technically, the Linux PCI bus probing code should check the Command
>> register (offset 0x4) to see if MemorySpace is enabled (which in our case it
>> won't be) before checking the BAR registers.
>
> The question is how we handle a device with MemorySpace disabled. In
> most cases, I think we want to assign BAR resources to it so that if a
> driver claims the device, we can enable MemorySpace and the device
> will work. If the BIOS leaves MemorySpace disabled and Linux doesn't
> assign BAR space at boot-time, we may be stuck because in general we
> can't assign resources dynamically. Dynamic assignment might require
> moving other devices, enlarging bridge windows, etc., which Linux
> currently doesn't support.
Yes, I didn't want to open that can of worms :)
>
> NumaChip sounds like an exception because you know you never care
> about using those BARs. But I'm curious -- it looks like Linux didn't
> even try to assign resources to them. I thought something in the
> pci_assign_unassigned_resources() path would have tried to do
> something with them. If we *did* assign resources to those BARs, I
> assume nothing would break, since there's no driver that actually uses
> them. Right?
>
Correct, the BARs are there and if something sensible were written to
them (and MemorySpace was enabled in the Command register) NumaChip
*would* respond to mmio accesses to that address range. However, those
BARs are only for memory mapped configuration register (CSR) access
which can be accessed in a "reserved" address range anyway which is what
we refer to as global CSR space. This is what the 2 other sub-patches
are using for talking to the NumaChip, and if someone would write any
driver at some point it would most likely be tied to that type of access
anyway.
The BARs can only be used for "local" CSR space which isn't that useful
anyway so we don't bother assigning anything to it (or don't care if
anything is). Besides, in a cluster with 1000s of NumaChip nodes all
connected together (with different PCI segments etc. etc.) you really
don't want to assign BARs anyway since you already have the global CSR
space.
With this background, would you agree that it makes sense to still have
the quirk or would you go about and solve it in a different way ?
Cheers,
--
Steffen Persvold, Chief Architect NumaChip
Numascale AS - www.numascale.com
Tel: +47 92 49 25 54 Skype: spersvold
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 3/3] Add NumaChip quirk
2011-10-25 17:33 ` Steffen Persvold
@ 2011-10-25 18:09 ` Arne Georg Gleditsch
2011-10-25 20:03 ` Bjorn Helgaas
2011-10-25 19:57 ` Bjorn Helgaas
1 sibling, 1 reply; 15+ messages in thread
From: Arne Georg Gleditsch @ 2011-10-25 18:09 UTC (permalink / raw)
To: Steffen Persvold
Cc: Bjorn Helgaas, Daniel J Blueman, Ingo Molnar, Thomas Gleixner,
H Peter Anvin, linux-kernel, x86, Jesse Barnes, linux-pci
On 25. okt. 2011 19:33, Steffen Persvold wrote:
> On 10/25/2011 19:15, Bjorn Helgaas wrote:
>> NumaChip sounds like an exception because you know you never care
>> about using those BARs. But I'm curious -- it looks like Linux didn't
>> even try to assign resources to them. I thought something in the
>> pci_assign_unassigned_resources() path would have tried to do
>> something with them. If we *did* assign resources to those BARs, I
>> assume nothing would break, since there's no driver that actually uses
>> them. Right?
>>
>
> Correct, the BARs are there and if something sensible were written to
> them (and MemorySpace was enabled in the Command register) NumaChip
> *would* respond to mmio accesses to that address range.
A minor point: adjusting the BARs would not strictly speaking be
sufficient for the NumaChip to respond, as it would never see these
accesses unless the [MMIO address range]->[HyperTransport node/link]
registers of the CPU NorthBridges were also updated with the relevant
ranges. This is a bit messy, but in a way much the same issue as when
secondary southbridges are connected to secondary CPUs in any other
HyperTransport-based system.
Perhaps an alternative to this NumaChip-specific quirk would be to
special-case BARs belonging to "PCI" devices 00:18 - 00:1f in AMD
Opteron systems. These always indicate coherent HT devices and fiddling
with the CPU NB maps are going to be required if anything is changed
regarding the BAR assignments here.
--
Arne.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 3/3] Add NumaChip quirk
2011-10-25 17:33 ` Steffen Persvold
2011-10-25 18:09 ` Arne Georg Gleditsch
@ 2011-10-25 19:57 ` Bjorn Helgaas
1 sibling, 0 replies; 15+ messages in thread
From: Bjorn Helgaas @ 2011-10-25 19:57 UTC (permalink / raw)
To: Steffen Persvold
Cc: Daniel J Blueman, Ingo Molnar, Thomas Gleixner, H Peter Anvin,
linux-kernel, x86, Jesse Barnes, Arne Georg Gleditsch, linux-pci
On Tue, Oct 25, 2011 at 11:33 AM, Steffen Persvold <sp@numascale.com> wrote:
> On 10/25/2011 19:15, Bjorn Helgaas wrote:
>>
>> [+cc linux-pci]
>>
> []
>>>
>>> The conflict we refer to in the patch is that since Linux thinks we have
>>> those windows assigned to us, we get conflicts later on with real devices
>>> :
>>>
>>> [ 5.887856] pnp 00:0e: disabling [mem 0x00000000-0x0009ffff] because
>>> it
>>> overlaps 0000:00:1a.0 BAR 0 [mem 0x00000000-0x000fffff]
>>> [ 5.899525] pnp 00:0e: disabling [mem 0x000c0000-0x000cffff] because
>>> it
>>> overlaps 0000:00:1a.0 BAR 0 [mem 0x00000000-0x000fffff]
>>> [ 5.911002] pnp 00:0e: disabling [mem 0x000e0000-0x000fffff] because
>>> it
>>> overlaps 0000:00:1a.0 BAR 0 [mem 0x00000000-0x000fffff]
>>
>> Yeah, this is gross, and this is definitely something Linux is doing
>> wrong. We don't have a consistent way of marking PCI BARs as
>> "disabled," so every zero-valued BAR seems to conflict with PNP
>> devices. Typically there are motherboard devices like your 00:0e that
>> reserve regions of low memory.
>>
>> Lots of machines complain like this, not just NumaChip, and there's no
>> real ill effect. We say we're disabling a PNP device resource, but we
>> don't actually evaluate an _SRS method to tell the BIOS to do
>> anything. So I think we complain about the conflict but don't do
>> anything else.
>
> Ah, ok. I didn't dive into it, so I didn't see if anything bad(tm) really
> happened when that PNP device got "disabled" resources.
>
>>
>>> I guess technically, the Linux PCI bus probing code should check the
>>> Command
>>> register (offset 0x4) to see if MemorySpace is enabled (which in our case
>>> it
>>> won't be) before checking the BAR registers.
>>
>> The question is how we handle a device with MemorySpace disabled. In
>> most cases, I think we want to assign BAR resources to it so that if a
>> driver claims the device, we can enable MemorySpace and the device
>> will work. If the BIOS leaves MemorySpace disabled and Linux doesn't
>> assign BAR space at boot-time, we may be stuck because in general we
>> can't assign resources dynamically. Dynamic assignment might require
>> moving other devices, enlarging bridge windows, etc., which Linux
>> currently doesn't support.
>
> Yes, I didn't want to open that can of worms :)
>
>>
>> NumaChip sounds like an exception because you know you never care
>> about using those BARs. But I'm curious -- it looks like Linux didn't
>> even try to assign resources to them. I thought something in the
>> pci_assign_unassigned_resources() path would have tried to do
>> something with them. If we *did* assign resources to those BARs, I
>> assume nothing would break, since there's no driver that actually uses
>> them. Right?
>>
>
> Correct, the BARs are there and if something sensible were written to them
> (and MemorySpace was enabled in the Command register) NumaChip *would*
> respond to mmio accesses to that address range. However, those BARs are only
> for memory mapped configuration register (CSR) access which can be accessed
> in a "reserved" address range anyway which is what we refer to as global CSR
> space. This is what the 2 other sub-patches are using for talking to the
> NumaChip, and if someone would write any driver at some point it would most
> likely be tied to that type of access anyway.
>
> The BARs can only be used for "local" CSR space which isn't that useful
> anyway so we don't bother assigning anything to it (or don't care if
> anything is). Besides, in a cluster with 1000s of NumaChip nodes all
> connected together (with different PCI segments etc. etc.) you really don't
> want to assign BARs anyway since you already have the global CSR space.
>
> With this background, would you agree that it makes sense to still have the
> quirk or would you go about and solve it in a different way ?
In this case, I think the only thing the quirk does for you is to get
rid of the warning. I don't think it will make anything work better,
so my inclination would be to just skip the quirk.
Tangent: I looked at the other patches, specifically
https://lkml.org/lkml/2011/7/22/97, and I see some of the global CSR
space stuff. I assume you have ACPI devices that describe the global
CSR address space? I don't see anything in the patch that reserves
that space in iomem_resource. There should be some mechanism for
reserving that space to prevent collisions. For PCI devices, normally
that's the BAR. For other devices on x86, normally that's ACPI.
Bjorn
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 3/3] Add NumaChip quirk
2011-10-25 18:09 ` Arne Georg Gleditsch
@ 2011-10-25 20:03 ` Bjorn Helgaas
0 siblings, 0 replies; 15+ messages in thread
From: Bjorn Helgaas @ 2011-10-25 20:03 UTC (permalink / raw)
To: Arne Georg Gleditsch
Cc: Steffen Persvold, Daniel J Blueman, Ingo Molnar, Thomas Gleixner,
H Peter Anvin, linux-kernel, x86, Jesse Barnes, linux-pci
On Tue, Oct 25, 2011 at 12:09 PM, Arne Georg Gleditsch
<arne.gleditsch@numascale.com> wrote:
> On 25. okt. 2011 19:33, Steffen Persvold wrote:
>> On 10/25/2011 19:15, Bjorn Helgaas wrote:
>>> NumaChip sounds like an exception because you know you never care
>>> about using those BARs. But I'm curious -- it looks like Linux didn't
>>> even try to assign resources to them. I thought something in the
>>> pci_assign_unassigned_resources() path would have tried to do
>>> something with them. If we *did* assign resources to those BARs, I
>>> assume nothing would break, since there's no driver that actually uses
>>> them. Right?
>>>
>>
>> Correct, the BARs are there and if something sensible were written to
>> them (and MemorySpace was enabled in the Command register) NumaChip
>> *would* respond to mmio accesses to that address range.
>
> A minor point: adjusting the BARs would not strictly speaking be
> sufficient for the NumaChip to respond, as it would never see these
> accesses unless the [MMIO address range]->[HyperTransport node/link]
> registers of the CPU NorthBridges were also updated with the relevant
> ranges. This is a bit messy, but in a way much the same issue as when
> secondary southbridges are connected to secondary CPUs in any other
> HyperTransport-based system.
The [MMIO address range]->[HyperTransport node/link] registers you're
talking about are the implementation side of the PCI host bridge
abstraction. You should have an ACPI host bridge device (PNP0A03 or
PNP0A08) that describes the apertures, and if Linux assigns resources
to the BARs, it will only use areas inside the apertures. If there's
no available space in the apertures, we just leave the device alone
(maybe this explains why we didn't assign anything).
> Perhaps an alternative to this NumaChip-specific quirk would be to
> special-case BARs belonging to "PCI" devices 00:18 - 00:1f in AMD
> Opteron systems. These always indicate coherent HT devices and fiddling
> with the CPU NB maps are going to be required if anything is changed
> regarding the BAR assignments here.
I don't think there's a need to special-case these AMD BARs. If the
BIOS programs them correctly (inside a PCI host bridge aperture
reported via ACPI), Linux won't touch them.
Bjorn
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 3/3] Add NumaChip quirk
2011-10-25 13:54 ` Thomas Gleixner
2011-10-25 14:17 ` Jesse Barnes
@ 2011-10-26 3:12 ` Daniel J Blueman
1 sibling, 0 replies; 15+ messages in thread
From: Daniel J Blueman @ 2011-10-26 3:12 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Ingo Molnar, H Peter Anvin, Steffen Persvold, linux-kernel, x86
On 25/10/2011 21:54, Thomas Gleixner wrote:
> Daniel,
>
> On Tue, 18 Oct 2011, Daniel J Blueman wrote:
>
>> Add quirk for Numascale's NumaChip to prevent resource conflicts.
>>
>> v2:
>> - [Daniel] rediffed and validated against 3.1-rc10
>>
>> Signed-off-by: Steffen Persvold<sp@numascale.com>
> So, who wrote that code? If Steffen, then you should put a From:
> .... when sending patches.
Steffen wrote the code; I reviewed and addressed compliance issues
(hence the Signed-off-by ordering).
I'll add the From header and send the patches again.
Thanks Thomas,
Daniel
--
Daniel J Blueman
Principal Software Engineer, Numascale Asia
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 3/3] Add NumaChip quirk
2011-10-26 6:07 [PATCH 1/3] Add Numachip APIC support Daniel J Blueman
@ 2011-10-26 6:07 ` Daniel J Blueman
0 siblings, 0 replies; 15+ messages in thread
From: Daniel J Blueman @ 2011-10-26 6:07 UTC (permalink / raw)
To: Jesse Barnes, Thomas Gleixner
Cc: Ingo Molnar, H Peter Anvin, Steffen Persvold, linux-kernel, x86,
Daniel J Blueman
From: Steffen Persvold <sp@numascale.com>
From: Steffen Persvold <sp@numascale.com>
Add quirk for Numascale's NumaChip to prevent resource conflicts.
v2:
- [Daniel] rediffed and validated against 3.1-rc10
Signed-off-by: Steffen Persvold <sp@numascale.com>
Signed-off-by: Daniel J Blueman <daniel@numascale-asia.com>
---
drivers/pci/quirks.c | 14 ++++++++++++++
include/linux/pci_ids.h | 4 ++++
2 files changed, 18 insertions(+), 0 deletions(-)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 1196f61..051b793 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2679,6 +2679,20 @@ static void __devinit quirk_hotplug_bridge(struct pci_dev *dev)
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HINT, 0x0020, quirk_hotplug_bridge);
+/* Clear all resources on NumaChip to avoid conflicts */
+static void __devinit numachip_resource_fixup(struct pci_dev *pdev)
+{
+ int i;
+
+ for (i = PCI_STD_RESOURCES; i <= PCI_ROM_RESOURCE; i++)
+ memset(&pdev->resource[i], 0, sizeof(pdev->resource[i]));
+
+ dev_notice(&pdev->dev, "Disabled all PCI resources for NumaChip\n");
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NUMASCALE,
+ PCI_DEVICE_ID_NUMASCALE_NUMACHIP0, numachip_resource_fixup);
+
/*
* This is a quirk for the Ricoh MMC controller found as a part of
* some mulifunction chips.
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ae96bbe..02e0959 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2400,6 +2400,10 @@
#define PCI_VENDOR_ID_AZWAVE 0x1a3b
+#define PCI_VENDOR_ID_NUMASCALE 0x1b47
+#define PCI_DEVICE_ID_NUMASCALE_NUMACHIP0 0x0601
+#define PCI_DEVICE_ID_NUMASCALE_NUMACHIP1 0x0602
+
#define PCI_VENDOR_ID_TEKRAM 0x1de1
#define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29
--
1.7.5.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
end of thread, other threads:[~2011-10-26 6:08 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-10-18 8:22 [PATCH 1/3] Add Numachip APIC support Daniel J Blueman
2011-10-18 8:22 ` [PATCH 2/3] Add multi-node boot support Daniel J Blueman
2011-10-18 8:22 ` [PATCH 3/3] Add NumaChip quirk Daniel J Blueman
2011-10-25 13:54 ` Thomas Gleixner
2011-10-25 14:17 ` Jesse Barnes
2011-10-26 3:12 ` Daniel J Blueman
2011-10-25 14:38 ` Bjorn Helgaas
2011-10-25 15:36 ` Steffen Persvold
2011-10-25 17:15 ` Bjorn Helgaas
2011-10-25 17:33 ` Steffen Persvold
2011-10-25 18:09 ` Arne Georg Gleditsch
2011-10-25 20:03 ` Bjorn Helgaas
2011-10-25 19:57 ` Bjorn Helgaas
-- strict thread matches above, loose matches on Subject: below --
2011-10-26 6:07 [PATCH 1/3] Add Numachip APIC support Daniel J Blueman
2011-10-26 6:07 ` [PATCH 3/3] Add NumaChip quirk Daniel J Blueman
2011-07-22 10:44 [PATCH 1/3] Add Numachip APIC support Daniel J Blueman
2011-07-22 10:44 ` [PATCH 3/3] Add NumaChip quirk Daniel J Blueman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox