[PULL}: latest tip/cpus4096 changes

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PULL}: latest tip/cpus4096 changes
@ 2009-01-16  9:05 Mike Travis
  2009-01-16  9:25 ` Ingo Molnar
                   ` (3 more replies)
  0 siblings, 4 replies; 21+ messages in thread
From: Mike Travis @ 2009-01-16  9:05 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Rusty Russell, LKML


Hi Ingo,

Please pull the following 'fairly lightweight' changes for tip/cpus4096.
(Well, except for "cpumask: use work_on_cpu in acpi-cpufreq.c for drv_read and drv_write"
which has been tested to be more reliable now.)

Thanks!
Mike
--- 
The following changes since commit c99dbbe9f8f6b3e9383e64710217e873431d1c31:
  Mike Travis (1):
        sched: fix warning on ia64

are available in the git repository at:

  ssh://master.kernel.org/pub/scm/linux/kernel/git/travis/linux-2.6-cpus4096-for-ingo master

Mike Travis (6):
      cpumask: use work_on_cpu in acpi-cpufreq.c for drv_read and drv_write
      x86: cleanup remaining cpumask_t code in microcode_core.c
      xen: reduce static memory usage
      x86: reduce static memory usage in microcode_core.c
      kgdb: reduce static memory usage in kgdb.c
      acpi: reduce memory required for apic_version

Rusty Russell (2):
      cpumask: don't try to get_online_cpus() in work_on_cpu.
      work_on_cpu: Use our own workqueue.

 arch/x86/include/asm/microcode.h           |    2 +-
 arch/x86/include/asm/mpspec.h              |   22 +++++++
 arch/x86/kernel/acpi/boot.c                |    8 +-
 arch/x86/kernel/apic.c                     |   70 ++++++++++++++++++++++-
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c |   22 +++----
 arch/x86/kernel/io_apic.c                  |    2 +-
 arch/x86/kernel/microcode_core.c           |   85 ++++++++++++++++++----------
 arch/x86/kernel/setup_percpu.c             |    3 +
 arch/x86/kernel/smpboot.c                  |    6 +-
 arch/x86/kernel/visws_quirks.c             |    2 +-
 drivers/xen/events.c                       |   10 ++-
 kernel/kgdb.c                              |   10 +++-
 kernel/workqueue.c                         |   20 +++---
 13 files changed, 193 insertions(+), 69 deletions(-)

commit 4eadffe68fb5f1d4c18ee2bbcb91f5c79f434db5
Author: Mike Travis <travis@sgi.com>
Date:   Fri Jan 16 00:22:34 2009 -0800

    acpi: reduce memory required for apic_version
    
    Impact: reduce memory usage
    
    By moving the initial static apic_version array into __initdata
    memory, and allocating a correctly sized one once the number of
    apic's is known, reduces the memory required when the MAX_APICS
    is >= 256.  This deals with this memory bump when NR_CPUS bumped
    from 128 to 4096:
    
       1020   131072  +130052 +12750%  apic_version(.bss)
    
    Since apic_version is lightly used, a simple lookup is used to
    convert apicid -> version.
    
    If MAX_APICS < 256, then the current apic_version[MAX_APIC] array
    is left in place.
    
    Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 62d14ce..ec01fab 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -5,7 +5,29 @@
 
 #include <asm/mpspec_def.h>
 
+#if MAX_APICS < 256
 extern int apic_version[MAX_APICS];
+static inline int add_apic_version(unsigned int apicid, int version)
+{
+	apic_version[apicid] = version;
+	return 0;
+}
+
+static inline int get_apic_version(unsigned int apicid)
+{
+	return apic_version[apicid];
+}
+
+static inline void cleanup_apic_version(void)
+{
+}
+
+#else	/* MAX_APICS >= 256 */
+int __cpuinit add_apic_version(unsigned int apicid, int version);
+int get_apic_version(unsigned int apicid);
+void __init cleanup_apic_version(void);
+#endif
+
 extern int pic_mode;
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d37593c..0ea7036 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -254,7 +254,7 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
 	}
 
 	if (boot_cpu_physical_apicid != -1U)
-		ver = apic_version[boot_cpu_physical_apicid];
+		ver = get_apic_version(boot_cpu_physical_apicid);
 
 	generic_processor_info(id, ver);
 }
@@ -789,8 +789,8 @@ static void __init acpi_register_lapic_address(unsigned long address)
 	set_fixmap_nocache(FIX_APIC_BASE, address);
 	if (boot_cpu_physical_apicid == -1U) {
 		boot_cpu_physical_apicid  = read_apic_id();
-		apic_version[boot_cpu_physical_apicid] =
-			 GET_APIC_VERSION(apic_read(APIC_LVR));
+		add_apic_version(boot_cpu_physical_apicid,
+			 GET_APIC_VERSION(apic_read(APIC_LVR)));
 	}
 }
 
@@ -903,7 +903,7 @@ static u8 __init uniq_ioapic_id(u8 id)
 {
 #ifdef CONFIG_X86_32
 	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-	    !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+	    !APIC_XAPIC(get_apic_version(boot_cpu_physical_apicid)))
 		return io_apic_get_unique_id(nr_ioapics, id);
 	else
 		return id;
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 0f830e4..2182094 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1562,8 +1562,69 @@ void __init init_apic_mappings(void)
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
+
+#if MAX_APICS < 256
 int apic_version[MAX_APICS];
 
+#else
+struct apic_version_info {
+	unsigned int apicid;
+	int version;
+};
+
+struct apic_version_info _apic_version_info[CONFIG_NR_CPUS] __initdata;
+struct apic_version_info *apic_version_info __refdata = _apic_version_info;
+int nr_apic_version_info;
+
+/* can be called either during init or cpu hotplug add */
+int __cpuinit add_apic_version(unsigned int apicid, int version)
+{
+	int i;
+
+	for (i = 0; i < nr_apic_version_info; i++)
+		if (apicid == apic_version_info[i].apicid) {
+			apic_version_info[i].version = version;
+			return 0;
+		}
+
+	if (likely(nr_apic_version_info < nr_cpu_ids)) {
+		i = nr_apic_version_info++;
+		apic_version_info[i].apicid = apicid;
+		apic_version_info[i].version = version;
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+/* lookup version for apic, usually first one (boot cpu) */
+int get_apic_version(unsigned int apicid)
+{
+	int i;
+
+	for (i = 0; i < nr_apic_version_info; i++)
+		if (apicid == apic_version_info[i].apicid)
+			return apic_version_info[i].version;
+
+	return 0;
+}
+
+/* allocate permanent apic_version structure */
+void __init cleanup_apic_version(void)
+{
+	size_t size;
+	int i;
+
+	/* allows disabled_cpus to be brought online */
+	size = nr_cpu_ids * sizeof(*apic_version_info);
+	apic_version_info = alloc_bootmem(size);
+
+	/* copy version info from initial array to permanent array */
+	for (i = 0; i < nr_apic_version_info; i++)
+		apic_version_info[i] = _apic_version_info[i];
+}
+
+#endif /* MAX_APICS >= 256 */
+
 int __init APIC_init_uniprocessor(void)
 {
 #ifdef CONFIG_X86_64
@@ -1584,7 +1645,7 @@ int __init APIC_init_uniprocessor(void)
 	 * Complain if the BIOS pretends there is one.
 	 */
 	if (!cpu_has_apic &&
-	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+	    APIC_INTEGRATED(get_apic_version(boot_cpu_physical_apicid))) {
 		pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
 			boot_cpu_physical_apicid);
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
@@ -1816,7 +1877,12 @@ void __cpuinit generic_processor_info(int apicid, int version)
 				version);
 		version = 0x10;
 	}
-	apic_version[apicid] = version;
+	if (unlikely(add_apic_version(apicid, version)) < 0) {
+		pr_warning(
+			"ACPI: cannot add apicid 0x%x version: out of memory\n",
+			apicid);
+		return;
+	}
 
 	if (num_processors >= nr_cpu_ids) {
 		int max = nr_cpu_ids;
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 1579869..e575c3c 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -2103,7 +2103,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
 	 * no meaning without the serial APIC bus.
 	 */
 	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+		|| APIC_XAPIC(get_apic_version(boot_cpu_physical_apicid)))
 		return;
 	/*
 	 * This is broken; anything with a real cpu count has to
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 55c4607..fb7a461 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -209,6 +209,9 @@ void __init setup_per_cpu_areas(void)
 
 	/* Setup cpu initialized, callin, callout masks */
 	setup_cpu_local_masks();
+
+	/* Cleanup apic_version array */
+	cleanup_apic_version();
 }
 
 #endif
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index bb1a3b1..ae2c845 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -592,7 +592,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
 	 * Give the other CPU some time to accept the IPI.
 	 */
 	udelay(200);
-	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+	if (APIC_INTEGRATED(get_apic_version(boot_cpu_physical_apicid))) {
 		maxlvt = lapic_get_maxlvt();
 		if (maxlvt > 3)			/* Due to the Pentium erratum 3AP.  */
 			apic_write(APIC_ESR, 0);
@@ -625,7 +625,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 	/*
 	 * Be paranoid about clearing APIC errors.
 	 */
-	if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+	if (APIC_INTEGRATED(get_apic_version(phys_apicid))) {
 		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP.  */
 			apic_write(APIC_ESR, 0);
 		apic_read(APIC_ESR);
@@ -665,7 +665,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 	 * Determine this based on the APIC version.
 	 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
 	 */
-	if (APIC_INTEGRATED(apic_version[phys_apicid]))
+	if (APIC_INTEGRATED(get_apic_version(phys_apicid)))
 		num_starts = 2;
 	else
 		num_starts = 0;
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index d801d06..7fe2b25 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -211,7 +211,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
 			m->apicid);
 		ver = 0x10;
 	}
-	apic_version[m->apicid] = ver;
+	add_apic_version(m->apicid, ver);
 }
 
 static int __init visws_find_smp_config(unsigned int reserve)

commit 0389b4e73561c3ccf36d6c8290e9496b959f06a6
Author: Mike Travis <travis@sgi.com>
Date:   Fri Jan 16 00:22:33 2009 -0800

    kgdb: reduce static memory usage in kgdb.c
    
    Impact: reduce static memory usage.
    
    By allocating kgdb_info based on nr_cpu_ids instead of NR_CPUS,
    it will be sized big enough for the number of cpus on the running
    system.  This deals with this memory bump when NR_CPUS bumped
    from 128 to 4096:
    
       2048    65536   +63488 +3100%  kgdb_info(.bss)
    
    Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index e4dcfb2..21fde60 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -72,7 +72,7 @@ struct kgdb_state {
 static struct debuggerinfo_struct {
 	void			*debuggerinfo;
 	struct task_struct	*task;
-} kgdb_info[NR_CPUS];
+} *kgdb_info;
 
 /**
  * kgdb_connected - Is a host GDB connected to us?
@@ -1651,6 +1651,13 @@ int kgdb_register_io_module(struct kgdb_io *new_kgdb_io_ops)
 		return -EBUSY;
 	}
 
+	kgdb_info = kmalloc(nr_cpu_ids * sizeof(*kgdb_info), GFP_KERNEL);
+	if (unlikely(!kgdb_info)) {
+		spin_unlock(&kgdb_registration_lock);
+		printk(KERN_ERR "kgdb: No memory for kgdb_info\n");
+		return -ENOMEM;
+	}
+
 	if (new_kgdb_io_ops->init) {
 		err = new_kgdb_io_ops->init();
 		if (err) {
@@ -1696,6 +1703,7 @@ void kgdb_unregister_io_module(struct kgdb_io *old_kgdb_io_ops)
 
 	WARN_ON_ONCE(kgdb_io_ops != old_kgdb_io_ops);
 	kgdb_io_ops = NULL;
+	kfee(kgdb_info);
 
 	spin_unlock(&kgdb_registration_lock);
 

commit 16c4ae6a8845d6ccda26326678e0e7ec2e4b0509
Author: Mike Travis <travis@sgi.com>
Date:   Fri Jan 16 00:22:33 2009 -0800

    x86: reduce static memory usage in microcode_core.c
    
    Impact: reduce static memory usage.
    
    By allocating ucode_cpu_info based on nr_cpu_ids instead of
    NR_CPUS, it will be sized big enough for the number of cpus
    on the running system.  This deals with this memory bump
    when NR_CPUS bumped from 128 to 4096:
    
       3072    98304   +95232 +3100%  ucode_cpu_info(.bss)
    
    Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index c882664..ca973f6 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -24,7 +24,7 @@ struct ucode_cpu_info {
 	int valid;
 	void *mc;
 };
-extern struct ucode_cpu_info ucode_cpu_info[];
+extern struct ucode_cpu_info *ucode_cpu_info;
 
 #ifdef CONFIG_MICROCODE_INTEL
 extern struct microcode_ops * __init init_intel_microcode(void);
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 50f9e18..5a1aafc 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -104,7 +104,7 @@ static struct microcode_ops *microcode_ops;
 /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
 static DEFINE_MUTEX(microcode_mutex);
 
-struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
+struct ucode_cpu_info *ucode_cpu_info;
 EXPORT_SYMBOL_GPL(ucode_cpu_info);
 
 #ifdef CONFIG_MICROCODE_OLD_INTERFACE
@@ -471,6 +471,13 @@ static int __init microcode_init(void)
 {
 	struct cpuinfo_x86 *c = &cpu_data(0);
 	int error;
+	size_t size = sizeof(*ucode_cpu_info) * nr_cpu_ids;
+
+	ucode_cpu_info = kmalloc(size, GFP_KERNEL);
+	if (!ucode_cpu_info) {
+		WARN(1, "CPU: cannot allocate microcode info structure\n");
+		return -ENOMEM;
+	}
 
 	if (c->x86_vendor == X86_VENDOR_INTEL)
 		microcode_ops = init_intel_microcode();
@@ -525,6 +532,8 @@ static void __exit microcode_exit(void)
 
 	microcode_ops = NULL;
 
+	kfree(ucode_cpu_info);
+
 	printk(KERN_INFO
 	       "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
 }

commit beec9183a43f8a42f5b790326a3b120a3b513590
Author: Mike Travis <travis@sgi.com>
Date:   Fri Jan 16 00:22:33 2009 -0800

    xen: reduce static memory usage
    
    Impact: reduce memory usage
    
    By allocating the irq_info and irq_bindcount based
    on nr_irqs instead of NR_IRQS, it will contain only
    enough entries as needed by the running system.
    
    This addresses this memory bump when NR_CPUS bumped
    from 128 to 4096:
    
      17408   132096  +114688  +658%  irq_info(.bss)
      17408   132096  +114688  +658%  irq_bindcount(.bss)
    
    This is only effective when CONFIG_SPARSE_IRQS=y.
    
    Signed-off-by: Mike Travis <travis@sgi.com>
    Tested-by: Christophe Saout <christophe@saout.de>

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 3141e14..c8894d7 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/bootmem.h>
+#include <linux/irqnr.h>
 
 #include <asm/ptrace.h>
 #include <asm/irq.h>
@@ -59,7 +60,7 @@ struct packed_irq
 	unsigned char type;
 };
 
-static struct packed_irq irq_info[NR_IRQS];
+static struct packed_irq *irq_info;
 
 /* Binding types. */
 enum {
@@ -87,7 +88,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
 static u8 cpu_evtchn[NR_EVENT_CHANNELS];
 
 /* Reference counts for bindings to IRQs. */
-static int irq_bindcount[NR_IRQS];
+static int *irq_bindcount;
 
 /* Xen will never allocate port zero for any purpose. */
 #define VALID_EVTCHN(chn)	((chn) != 0)
@@ -833,7 +834,10 @@ void __init xen_init_IRQ(void)
 	size_t size = nr_cpu_ids * sizeof(struct cpu_evtchn_s);
 
 	cpu_evtchn_mask_p = alloc_bootmem(size);
-	BUG_ON(cpu_evtchn_mask_p == NULL);
+
+	irq_info = alloc_bootmem(nr_irqs * sizeof(struct packed_irq));
+
+	irq_bindcount = alloc_bootmem(nr_irqs * sizeof(int));
 
 	init_evtchn_cpu_bindings();
 

commit 47c28f0a59121a7bbdfb46d0362ca319f35538dc
Author: Mike Travis <travis@sgi.com>
Date:   Thu Jan 15 17:16:55 2009 -0800

    x86: cleanup remaining cpumask_t code in microcode_core.c
    
    Impact: Reduce problem with changing current->cpus_allowed mask directly.
    
    Use "work_on_cpu" to replace instances where set_cpus_allowed_ptr was being used.
    
    Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index c9b721b..50f9e18 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -108,29 +108,43 @@ struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
 EXPORT_SYMBOL_GPL(ucode_cpu_info);
 
 #ifdef CONFIG_MICROCODE_OLD_INTERFACE
+struct do_microcode_update_args {
+	const void __user *buf;
+	size_t size;
+};
+
+static long do_microcode_update_sub(void *_args)
+{
+	struct do_microcode_update_args *args = _args;
+	long error;
+	int cpu = smp_processor_id();
+
+	error = microcode_ops->request_microcode_user(cpu, args->buf,
+						      args->size);
+	if (!error)
+		microcode_ops->apply_microcode(cpu);
+
+	return error;
+}
+
 static int do_microcode_update(const void __user *buf, size_t size)
 {
-	cpumask_t old;
+	struct do_microcode_update_args args;
 	int error = 0;
 	int cpu;
 
-	old = current->cpus_allowed;
-
+	args.buf = buf;
+	args.size = size;
 	for_each_online_cpu(cpu) {
 		struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
 		if (!uci->valid)
 			continue;
 
-		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-		error = microcode_ops->request_microcode_user(cpu, buf, size);
+		error = work_on_cpu(cpu, do_microcode_update_sub, &args);
 		if (error < 0)
-			goto out;
-		if (!error)
-			microcode_ops->apply_microcode(cpu);
+			break;
 	}
-out:
-	set_cpus_allowed_ptr(current, &old);
 	return error;
 }
 
@@ -205,6 +219,18 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
 /* fake device for request_firmware */
 static struct platform_device *microcode_pdev;
 
+static long reload_store_sub(void *unused)
+{
+	int cpu = smp_processor_id();
+	long err;
+
+	err = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
+	if (!err)
+		microcode_ops->apply_microcode(cpu);
+
+	return err;
+}
+
 static ssize_t reload_store(struct sys_device *dev,
 			    struct sysdev_attribute *attr,
 			    const char *buf, size_t sz)
@@ -218,20 +244,12 @@ static ssize_t reload_store(struct sys_device *dev,
 	if (end == buf)
 		return -EINVAL;
 	if (val == 1) {
-		cpumask_t old = current->cpus_allowed;
-
 		get_online_cpus();
 		if (cpu_online(cpu)) {
-			set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 			mutex_lock(&microcode_mutex);
-			if (uci->valid) {
-				err = microcode_ops->request_microcode_fw(cpu,
-						&microcode_pdev->dev);
-				if (!err)
-					microcode_ops->apply_microcode(cpu);
-			}
+			if (uci->valid)
+				work_on_cpu(cpu, reload_store_sub, NULL);
 			mutex_unlock(&microcode_mutex);
-			set_cpus_allowed_ptr(current, &old);
 		}
 		put_online_cpus();
 	}
@@ -349,19 +367,17 @@ static void microcode_update_cpu(int cpu)
 		microcode_ops->apply_microcode(cpu);
 }
 
-static void microcode_init_cpu(int cpu)
+static long microcode_update_cpu_sub(void *unused)
 {
-	cpumask_t old = current->cpus_allowed;
-
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-	/* We should bind the task to the CPU */
-	BUG_ON(raw_smp_processor_id() != cpu);
+	microcode_update_cpu(smp_processor_id());
+	return 0;
+}
 
+static void microcode_init_cpu(int cpu)
+{
 	mutex_lock(&microcode_mutex);
-	microcode_update_cpu(cpu);
+	work_on_cpu(cpu, microcode_update_cpu_sub, NULL);
 	mutex_unlock(&microcode_mutex);
-
-	set_cpus_allowed_ptr(current, &old);
 }
 
 static int mc_sysdev_add(struct sys_device *sys_dev)

commit f766ec2751f6f7ebed571e87f5f0f20f25a116be
Author: Mike Travis <travis@sgi.com>
Date:   Thu Jan 15 16:29:16 2009 -0800

    cpumask: use work_on_cpu in acpi-cpufreq.c for drv_read and drv_write
    
    Impact: use new work_on_cpu function to reduce stack usage
    
    Replace the saving of current->cpus_allowed and set_cpus_allowed_ptr() with
    a work_on_cpu function for drv_read() and drv_write().
    
    Basically converts do_drv_{read,write} into "work_on_cpu" functions that
    are now called by drv_read and drv_write.
    
    Signed-off-by: Mike Travis <travis@sgi.com>
    Acked-by: Rusty Russell <rusty@rustcorp.com.au>
    Tested-by: Dieter Ries <clip2@gmx.de>
    Tested-by: Maciej Rutecki <maciej.rutecki@gmail.com>

diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 0192767..4b1c319 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -150,8 +150,9 @@ struct drv_cmd {
 	u32 val;
 };
 
-static void do_drv_read(struct drv_cmd *cmd)
+static long do_drv_read(void *_cmd)
 {
+	struct drv_cmd *cmd = _cmd;
 	u32 h;
 
 	switch (cmd->type) {
@@ -166,10 +167,12 @@ static void do_drv_read(struct drv_cmd *cmd)
 	default:
 		break;
 	}
+	return 0;
 }
 
-static void do_drv_write(struct drv_cmd *cmd)
+static long do_drv_write(void *_cmd)
 {
+	struct drv_cmd *cmd = _cmd;
 	u32 lo, hi;
 
 	switch (cmd->type) {
@@ -186,30 +189,23 @@ static void do_drv_write(struct drv_cmd *cmd)
 	default:
 		break;
 	}
+	return 0;
 }
 
 static void drv_read(struct drv_cmd *cmd)
 {
-	cpumask_t saved_mask = current->cpus_allowed;
 	cmd->val = 0;
 
-	set_cpus_allowed_ptr(current, cmd->mask);
-	do_drv_read(cmd);
-	set_cpus_allowed_ptr(current, &saved_mask);
+	work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd);
 }
 
 static void drv_write(struct drv_cmd *cmd)
 {
-	cpumask_t saved_mask = current->cpus_allowed;
 	unsigned int i;
 
 	for_each_cpu(i, cmd->mask) {
-		set_cpus_allowed_ptr(current, cpumask_of(i));
-		do_drv_write(cmd);
+		work_on_cpu(i, do_drv_write, cmd);
 	}
-
-	set_cpus_allowed_ptr(current, &saved_mask);
-	return;
 }
 
 static u32 get_cur_val(const struct cpumask *mask)
@@ -367,7 +363,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 	return freq;
 }
 
-static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq,
+static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
 				struct acpi_cpufreq_data *data)
 {
 	unsigned int cur_freq;

commit b758cdbee5da0b8fb7e34a68651e6ccc5310b48a
Author: Rusty Russell <rusty@rustcorp.com.au>
Date:   Thu Jan 15 16:29:16 2009 -0800

    work_on_cpu: Use our own workqueue.
    
    Impact: remove potential circular lock dependency with generic kevent workqueue
    
    Annoyingly, some places we want to use work_on_cpu are already in
    workqueues.  As per Ingo's suggestion, we create a different workqueue
    for work_on_cpu.
    
    Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
    Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a35afdb..1f0c509 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -971,6 +971,8 @@ undo:
 }
 
 #ifdef CONFIG_SMP
+static struct workqueue_struct *work_on_cpu_wq __read_mostly;
+
 struct work_for_cpu {
 	struct work_struct work;
 	long (*fn)(void *);
@@ -1001,7 +1003,7 @@ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
 	INIT_WORK(&wfc.work, do_work_for_cpu);
 	wfc.fn = fn;
 	wfc.arg = arg;
-	schedule_work_on(cpu, &wfc.work);
+	queue_work_on(cpu, work_on_cpu_wq, &wfc.work);
 	flush_work(&wfc.work);
 
 	return wfc.ret;
@@ -1019,4 +1021,8 @@ void __init init_workqueues(void)
 	hotcpu_notifier(workqueue_cpu_callback, 0);
 	keventd_wq = create_workqueue("events");
 	BUG_ON(!keventd_wq);
+#ifdef CONFIG_SMP
+	work_on_cpu_wq = create_workqueue("work_on_cpu");
+	BUG_ON(!work_on_cpu_wq);
+#endif
 }

commit 660130abaa2d26672b7670f88741e29e88552dc6
Author: Rusty Russell <rusty@rustcorp.com.au>
Date:   Thu Jan 15 16:29:16 2009 -0800

    cpumask: don't try to get_online_cpus() in work_on_cpu.
    
    Impact: remove potential circular lock dependency with cpu hotplug lock
    
    This has caused more problems than it solved, with a pile of cpu
    hotplug locking issues.
    
    Followup patches will get_online_cpus() in callers that need it, but
    if they don't do it they're no worse than before when they were using
    set_cpus_allowed without locking.
    
    Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
    Signed-off-by: Mike Travis <travis@sgi.com>

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2f44583..a35afdb 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -991,8 +991,8 @@ static void do_work_for_cpu(struct work_struct *w)
  * @fn: the function to run
  * @arg: the function arg
  *
- * This will return -EINVAL in the cpu is not online, or the return value
- * of @fn otherwise.
+ * This will return the value @fn returns.
+ * It is up to the caller to ensure that the cpu doesn't go offline.
  */
 long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
 {
@@ -1001,14 +1001,8 @@ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
 	INIT_WORK(&wfc.work, do_work_for_cpu);
 	wfc.fn = fn;
 	wfc.arg = arg;
-	get_online_cpus();
-	if (unlikely(!cpu_online(cpu)))
-		wfc.ret = -EINVAL;
-	else {
-		schedule_work_on(cpu, &wfc.work);
-		flush_work(&wfc.work);
-	}
-	put_online_cpus();
+	schedule_work_on(cpu, &wfc.work);
+	flush_work(&wfc.work);
 
 	return wfc.ret;
 }


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PULL}: latest tip/cpus4096 changes
  2009-01-16  9:05 [PULL}: latest tip/cpus4096 changes Mike Travis
@ 2009-01-16  9:25 ` Ingo Molnar
  2009-01-16 17:53   ` Mike Travis
  2009-01-16  9:28 ` [PULL}: latest tip/cpus4096 changes Ingo Molnar
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 21+ messages in thread
From: Ingo Molnar @ 2009-01-16  9:25 UTC (permalink / raw)
  To: Mike Travis; +Cc: Rusty Russell, LKML

* Mike Travis <travis@sgi.com> wrote:

> 
> Hi Ingo,
> 
> Please pull the following 'fairly lightweight' changes for tip/cpus4096.

> --- a/arch/x86/kernel/apic.c
> +++ b/arch/x86/kernel/apic.c
> @@ -1562,8 +1562,69 @@ void __init init_apic_mappings(void)
>   * This initializes the IO-APIC and APIC hardware if this is
>   * a UP kernel.
>   */
> +
> +#if MAX_APICS < 256
>  int apic_version[MAX_APICS];
>  
> +#else
> +struct apic_version_info {
> +	unsigned int apicid;
> +	int version;
> +};
> +
> +struct apic_version_info _apic_version_info[CONFIG_NR_CPUS] __initdata;
> +struct apic_version_info *apic_version_info __refdata = _apic_version_info;
> +int nr_apic_version_info;
> +
> +/* can be called either during init or cpu hotplug add */
> +int __cpuinit add_apic_version(unsigned int apicid, int version)
> +{
> +	int i;
> +
> +	for (i = 0; i < nr_apic_version_info; i++)
> +		if (apicid == apic_version_info[i].apicid) {
> +			apic_version_info[i].version = version;
> +			return 0;
> +		}
> +
> +	if (likely(nr_apic_version_info < nr_cpu_ids)) {
> +		i = nr_apic_version_info++;
> +		apic_version_info[i].apicid = apicid;
> +		apic_version_info[i].version = version;
> +		return 0;
> +	}
> +	return -ENOMEM;
> +}
> +
> +/* lookup version for apic, usually first one (boot cpu) */
> +int get_apic_version(unsigned int apicid)
> +{
> +	int i;
> +
> +	for (i = 0; i < nr_apic_version_info; i++)
> +		if (apicid == apic_version_info[i].apicid)
> +			return apic_version_info[i].version;
> +
> +	return 0;
> +}
> +
> +/* allocate permanent apic_version structure */
> +void __init cleanup_apic_version(void)
> +{
> +	size_t size;
> +	int i;
> +
> +	/* allows disabled_cpus to be brought online */
> +	size = nr_cpu_ids * sizeof(*apic_version_info);
> +	apic_version_info = alloc_bootmem(size);
> +
> +	/* copy version info from initial array to permanent array */
> +	for (i = 0; i < nr_apic_version_info; i++)
> +		apic_version_info[i] = _apic_version_info[i];
> +}
> +
> +#endif /* MAX_APICS >= 256 */

this is all but 'lightweight'. A 'lightweight' patch is that which either 
is less than say a dozen lines or one that changes a provably trivial 
aspect of the kernel. This patch goes to the guts of the APIC code and is 
not only complex but also very ugly:

 - it's riddled with #ifdefs

 - it splits the testing space between <256 apics and large system - guess 
   which one will get 99% of the testing?

And why is it all done, a hundred lines of very ugly code in a fragile 
area of the x86 architecture? Because you want to shrink this array:

   int apic_version[MAX_APICS];

which can take 128K of RAM if MAX_APICs is 32K ...

Firstly, if you want to shrink the APIC version array, you might want to 
shrink it the most obvious way: by changing the 'int' to 'char' via a 
oneliner patch. APIC version values tend to be significantly below 256. 
That gives 75% of space savings already.

Secondly, you might even observe the fact that the set of systems with 
assymetric APIC versions approximates that of the nil set. Assymetric SMP 
never took off and probably never will - vendors have hard enough time 
getting the very same type of CPU working across the board.

And _even_ if there existed such systems, we already have a lot of other 
places where symmetry is assumed and _material_ APIC version assymetry to 
the boot CPU's APIC version will very likely not work anyway, on the 
physical signalling level.

So the simplest approach might as well be to turn apic_version into a 
single __read_mostly boot_apic_version variable. Maybe also a 
WARN_ONCE("whee") message if an APIC is seen with a version different from 
that of the boot CPU.

_Please_ think these changes through because these kinds of mindless 
complication patches are not acceptable at all.

	Ingo

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PULL}: latest tip/cpus4096 changes
  2009-01-16  9:25 ` Ingo Molnar
@ 2009-01-16 17:53   ` Mike Travis
  2009-01-16 22:30     ` Ingo Molnar
  0 siblings, 1 reply; 21+ messages in thread
From: Mike Travis @ 2009-01-16 17:53 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Rusty Russell, LKML

Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
> 
>> Hi Ingo,
>>
>> Please pull the following 'fairly lightweight' changes for tip/cpus4096.
> 
>> --- a/arch/x86/kernel/apic.c
>> +++ b/arch/x86/kernel/apic.c
>> @@ -1562,8 +1562,69 @@ void __init init_apic_mappings(void)
>>   * This initializes the IO-APIC and APIC hardware if this is
>>   * a UP kernel.
>>   */
>> +
>> +#if MAX_APICS < 256
>>  int apic_version[MAX_APICS];
>>  
>> +#else
>> +struct apic_version_info {
>> +	unsigned int apicid;
>> +	int version;
>> +};
>> +
>> +struct apic_version_info _apic_version_info[CONFIG_NR_CPUS] __initdata;
>> +struct apic_version_info *apic_version_info __refdata = _apic_version_info;
>> +int nr_apic_version_info;
>> +
>> +/* can be called either during init or cpu hotplug add */
>> +int __cpuinit add_apic_version(unsigned int apicid, int version)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < nr_apic_version_info; i++)
>> +		if (apicid == apic_version_info[i].apicid) {
>> +			apic_version_info[i].version = version;
>> +			return 0;
>> +		}
>> +
>> +	if (likely(nr_apic_version_info < nr_cpu_ids)) {
>> +		i = nr_apic_version_info++;
>> +		apic_version_info[i].apicid = apicid;
>> +		apic_version_info[i].version = version;
>> +		return 0;
>> +	}
>> +	return -ENOMEM;
>> +}
>> +
>> +/* lookup version for apic, usually first one (boot cpu) */
>> +int get_apic_version(unsigned int apicid)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < nr_apic_version_info; i++)
>> +		if (apicid == apic_version_info[i].apicid)
>> +			return apic_version_info[i].version;
>> +
>> +	return 0;
>> +}
>> +
>> +/* allocate permanent apic_version structure */
>> +void __init cleanup_apic_version(void)
>> +{
>> +	size_t size;
>> +	int i;
>> +
>> +	/* allows disabled_cpus to be brought online */
>> +	size = nr_cpu_ids * sizeof(*apic_version_info);
>> +	apic_version_info = alloc_bootmem(size);
>> +
>> +	/* copy version info from initial array to permanent array */
>> +	for (i = 0; i < nr_apic_version_info; i++)
>> +		apic_version_info[i] = _apic_version_info[i];
>> +}
>> +
>> +#endif /* MAX_APICS >= 256 */
> 
> this is all but 'lightweight'. A 'lightweight' patch is that which either 
> is less than say a dozen lines or one that changes a provably trivial 
> aspect of the kernel. This patch goes to the guts of the APIC code and is 
> not only complex but also very ugly:
> 
>  - it's riddled with #ifdefs
> 
>  - it splits the testing space between <256 apics and large system - guess 
>    which one will get 99% of the testing?
> 
> And why is it all done, a hundred lines of very ugly code in a fragile 
> area of the x86 architecture? Because you want to shrink this array:
> 
>    int apic_version[MAX_APICS];
> 
> which can take 128K of RAM if MAX_APICs is 32K ...
> 
> Firstly, if you want to shrink the APIC version array, you might want to 
> shrink it the most obvious way: by changing the 'int' to 'char' via a 
> oneliner patch. APIC version values tend to be significantly below 256. 
> That gives 75% of space savings already.
> 
> Secondly, you might even observe the fact that the set of systems with 
> assymetric APIC versions approximates that of the nil set. Assymetric SMP 
> never took off and probably never will - vendors have hard enough time 
> getting the very same type of CPU working across the board.
> 
> And _even_ if there existed such systems, we already have a lot of other 
> places where symmetry is assumed and _material_ APIC version assymetry to 
> the boot CPU's APIC version will very likely not work anyway, on the 
> physical signalling level.
> 
> So the simplest approach might as well be to turn apic_version into a 
> single __read_mostly boot_apic_version variable. Maybe also a 
> WARN_ONCE("whee") message if an APIC is seen with a version different from 
> that of the boot CPU.
> 
> _Please_ think these changes through because these kinds of mindless 
> complication patches are not acceptable at all.
> 
> 	Ingo

Hi Ingo,

I did notice that the versions all came up the same, and that the checks
were very specific.  I was trying to be as transparent and unintrusive as
possible.  Since there's so few calls, I though this was a good approach
but apparently I was wrong.

I like the idea of collapsing the array down to one and checking to
see if all apic's have the same version, but is this really the case?
Must all apics be the same?  

Thanks,
Mike

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PULL}: latest tip/cpus4096 changes
  2009-01-16 17:53   ` Mike Travis
@ 2009-01-16 22:30     ` Ingo Molnar
  2009-01-16 23:22       ` [PATCH] x86: put trigger in to detect mismatched apic versions Mike Travis
  0 siblings, 1 reply; 21+ messages in thread
From: Ingo Molnar @ 2009-01-16 22:30 UTC (permalink / raw)
  To: Mike Travis; +Cc: Rusty Russell, LKML

* Mike Travis <travis@sgi.com> wrote:

> Hi Ingo,
> 
> I did notice that the versions all came up the same, and that the checks 
> were very specific.  I was trying to be as transparent and unintrusive 
> as possible.  Since there's so few calls, I though this was a good 
> approach but apparently I was wrong.
> 
> I like the idea of collapsing the array down to one and checking to see 
> if all apic's have the same version, but is this really the case? Must 
> all apics be the same?

Could you please send a patch that doesnt change the code, only adds a 
'boot APIC version' kind of variable as an apic_version __read_mostly 
variable and does a WARN_ONCE() if that mismatches? We can then stick that 
into -tip and see whether it triggers.

The max array size is ~128K, right? So if the WARN_ONCE() does not 
trigger, we can just drop the array and use the central apic_version 
variable ...

And even if it _does_ trigger, the version incompatibilities between APIC 
protocols are very rare. They only happen across wildly different CPU 
architectures like when going from very old external apics to integrated 
apics, or going from apics to x-apics. We wont see any mixing across those 
boundaries.

	Ingo

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH] x86: put trigger in to detect mismatched apic versions.
  2009-01-16 22:30     ` Ingo Molnar
@ 2009-01-16 23:22       ` Mike Travis
  2009-01-17  0:06         ` Mike Travis
                           ` (2 more replies)
  0 siblings, 3 replies; 21+ messages in thread
From: Mike Travis @ 2009-01-16 23:22 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML

Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
> 
>> Hi Ingo,
>>
>> I did notice that the versions all came up the same, and that the checks 
>> were very specific.  I was trying to be as transparent and unintrusive 
>> as possible.  Since there's so few calls, I though this was a good 
>> approach but apparently I was wrong.
>>
>> I like the idea of collapsing the array down to one and checking to see 
>> if all apic's have the same version, but is this really the case? Must 
>> all apics be the same?
> 
> Could you please send a patch that doesnt change the code, only adds a 
> 'boot APIC version' kind of variable as an apic_version __read_mostly 
> variable and does a WARN_ONCE() if that mismatches? We can then stick that 
> into -tip and see whether it triggers.
> 
> The max array size is ~128K, right? So if the WARN_ONCE() does not 
> trigger, we can just drop the array and use the central apic_version 
> variable ...
> 
> And even if it _does_ trigger, the version incompatibilities between APIC 
> protocols are very rare. They only happen across wildly different CPU 
> architectures like when going from very old external apics to integrated 
> apics, or going from apics to x-apics. We wont see any mixing across those 
> boundaries.
> 
> 	Ingo

Btw, I checked with our UV architect and the problem is that we need a
16 bit apic id which is what caused the MAX_APICS to be bumped to 32k.
The lower 8 bits are the normal apic id, and the upper bit relate to
the node.  This means cpu 0 on node 0 has the same apic id as cpu 0 on
node 1, etc.  I also asked about whether we could rely on always having
the same apic version, and the answer was yes, though it's really only
relevant between the cpus on a node.

Thanks,
Mike
--- 
Subject: x86: put trigger in to detect mismatched apic versions.

Fire off one message if two apic's discovered with different
apic versions.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/kernel/apic.c |    5 +++++
 1 file changed, 5 insertions(+)

--- linux-2.6-for-ingo.orig/arch/x86/kernel/apic.c
+++ linux-2.6-for-ingo/arch/x86/kernel/apic.c
@@ -1833,6 +1833,11 @@ void __cpuinit generic_processor_info(in
 	num_processors++;
 	cpu = cpumask_next_zero(-1, cpu_present_mask);
 
+	if (version != apic_version[boot_cpu_physical_apicid])
+		WARN_ONCE(1,
+			"ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
+			apic_version[boot_cpu_physical_apicid], cpu, version);
+
 	physid_set(apicid, phys_cpu_present_map);
 	if (apicid == boot_cpu_physical_apicid) {
 		/*


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: put trigger in to detect mismatched apic versions.
  2009-01-16 23:22       ` [PATCH] x86: put trigger in to detect mismatched apic versions Mike Travis
@ 2009-01-17  0:06         ` Mike Travis
  2009-01-17  3:07         ` Jack Steiner
  2009-01-18 19:04         ` Ingo Molnar
  2 siblings, 0 replies; 21+ messages in thread
From: Mike Travis @ 2009-01-17  0:06 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML

Mike Travis wrote:
> Ingo Molnar wrote:
>> * Mike Travis <travis@sgi.com> wrote:
>>
>>> Hi Ingo,
>>>
>>> I did notice that the versions all came up the same, and that the checks 
>>> were very specific.  I was trying to be as transparent and unintrusive 
>>> as possible.  Since there's so few calls, I though this was a good 
>>> approach but apparently I was wrong.
>>>
>>> I like the idea of collapsing the array down to one and checking to see 
>>> if all apic's have the same version, but is this really the case? Must 
>>> all apics be the same?
>> Could you please send a patch that doesnt change the code, only adds a 
>> 'boot APIC version' kind of variable as an apic_version __read_mostly 
>> variable and does a WARN_ONCE() if that mismatches? We can then stick that 
>> into -tip and see whether it triggers.
>>
>> The max array size is ~128K, right? So if the WARN_ONCE() does not 
>> trigger, we can just drop the array and use the central apic_version 
>> variable ...
>>
>> And even if it _does_ trigger, the version incompatibilities between APIC 
>> protocols are very rare. They only happen across wildly different CPU 
>> architectures like when going from very old external apics to integrated 
>> apics, or going from apics to x-apics. We wont see any mixing across those 
>> boundaries.
>>
>> 	Ingo
> 
> Btw, I checked with our UV architect and the problem is that we need a
> 16 bit apic id which is what caused the MAX_APICS to be bumped to 32k.
> The lower 8 bits are the normal apic id, and the upper bit relate to
> the node.  This means cpu 0 on node 0 has the same apic id as cpu 0 on
> node 1, etc.  I also asked about whether we could rely on always having
> the same apic version, and the answer was yes, though it's really only
> relevant between the cpus on a node.
> 
> Thanks,
> Mike
> --- 
> Subject: x86: put trigger in to detect mismatched apic versions.
> 
> Fire off one message if two apic's discovered with different
> apic versions.
> 
> Signed-off-by: Mike Travis <travis@sgi.com>
> ---
>  arch/x86/kernel/apic.c |    5 +++++
>  1 file changed, 5 insertions(+)
> 
> --- linux-2.6-for-ingo.orig/arch/x86/kernel/apic.c
> +++ linux-2.6-for-ingo/arch/x86/kernel/apic.c
> @@ -1833,6 +1833,11 @@ void __cpuinit generic_processor_info(in
>  	num_processors++;
>  	cpu = cpumask_next_zero(-1, cpu_present_mask);
>  
> +	if (version != apic_version[boot_cpu_physical_apicid])
> +		WARN_ONCE(1,
> +			"ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
> +			apic_version[boot_cpu_physical_apicid], cpu, version);
> +
>  	physid_set(apicid, phys_cpu_present_map);
>  	if (apicid == boot_cpu_physical_apicid) {
>  		/*

I've pushed this one to my-for-you git tree as well.

(Seems awful quiet ... is everyone heading to Australia? ;-)

Thanks,
Mike
-- -

The following changes since commit 6eb714c63ed5bd663627f7dda8c4d5258f3b64ef:
  Mike Travis (1):
        cpufreq: use work_on_cpu in acpi-cpufreq.c for drv_read and drv_write

are available in the git repository at:

  ssh://master.kernel.org/pub/scm/linux/kernel/git/travis/linux-2.6-cpus4096-for-ingo.git master

Mike Travis (1):
      x86: put trigger in to detect mismatched apic versions.

 arch/x86/kernel/apic.c |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: put trigger in to detect mismatched apic versions.
  2009-01-16 23:22       ` [PATCH] x86: put trigger in to detect mismatched apic versions Mike Travis
  2009-01-17  0:06         ` Mike Travis
@ 2009-01-17  3:07         ` Jack Steiner
  2009-01-18 19:08           ` Ingo Molnar
  2009-01-18 19:04         ` Ingo Molnar
  2 siblings, 1 reply; 21+ messages in thread
From: Jack Steiner @ 2009-01-17  3:07 UTC (permalink / raw)
  To: Mike Travis; +Cc: Ingo Molnar, LKML

On Fri, Jan 16, 2009 at 03:22:16PM -0800, Mike Travis wrote:
> Ingo Molnar wrote:
> > * Mike Travis <travis@sgi.com> wrote:
> > 
> >> Hi Ingo,
> >>
> >> I did notice that the versions all came up the same, and that the checks 
> >> were very specific.  I was trying to be as transparent and unintrusive 
> >> as possible.  Since there's so few calls, I though this was a good 
> >> approach but apparently I was wrong.
> >>
> >> I like the idea of collapsing the array down to one and checking to see 
> >> if all apic's have the same version, but is this really the case? Must 
> >> all apics be the same?
> > 
> > Could you please send a patch that doesnt change the code, only adds a 
> > 'boot APIC version' kind of variable as an apic_version __read_mostly 
> > variable and does a WARN_ONCE() if that mismatches? We can then stick that 
> > into -tip and see whether it triggers.
> > 
> > The max array size is ~128K, right? So if the WARN_ONCE() does not 
> > trigger, we can just drop the array and use the central apic_version 
> > variable ...
> > 
> > And even if it _does_ trigger, the version incompatibilities between APIC 
> > protocols are very rare. They only happen across wildly different CPU 
> > architectures like when going from very old external apics to integrated 
> > apics, or going from apics to x-apics. We wont see any mixing across those 
> > boundaries.
> > 
> > 	Ingo
> 
> Btw, I checked with our UV architect and the problem is that we need a
> 16 bit apic id which is what caused the MAX_APICS to be bumped to 32k.
> The lower 8 bits are the normal apic id, and the upper bit relate to
> the node.  This means cpu 0 on node 0 has the same apic id as cpu 0 on
> node 1, etc.  I also asked about whether we could rely on always having

Not strictly true. The apicids in the ACPI tables are always globally unique
across the entire system. Because of the size of UV systems, UV needs 16 bit
apicids. This fits in the ACPI apicid id/eid fields.

The actual processor apicid register is unfortunately only 11 bits and there are
some restrictions on the actual values loaded into the apicid register.

If we can put unique ids into the apicid register, we do. If we
can't, the function that reads the apicid will automatically supply the rest
of the bits.  Most of the kernel is unaware that the processor apicid
register may have only a subset of the bits that are in the ACPI tables.


--- jack

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: put trigger in to detect mismatched apic versions.
  2009-01-17  3:07         ` Jack Steiner
@ 2009-01-18 19:08           ` Ingo Molnar
  2009-01-18 21:25             ` Jack Steiner
  0 siblings, 1 reply; 21+ messages in thread
From: Ingo Molnar @ 2009-01-18 19:08 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Mike Travis, LKML, H. Peter Anvin, Thomas Gleixner, Yinghai Lu,
	Maciej W. Rozycki


* Jack Steiner <steiner@sgi.com> wrote:

> > Btw, I checked with our UV architect and the problem is that we need a 
> > 16 bit apic id which is what caused the MAX_APICS to be bumped to 32k. 
> > The lower 8 bits are the normal apic id, and the upper bit relate to 
> > the node.  This means cpu 0 on node 0 has the same apic id as cpu 0 on 
> > node 1, etc.  I also asked about whether we could rely on always 
> > having
> 
> Not strictly true. The apicids in the ACPI tables are always globally 
> unique across the entire system. Because of the size of UV systems, UV 
> needs 16 bit apicids. This fits in the ACPI apicid id/eid fields.
> 
> The actual processor apicid register is unfortunately only 11 bits and 
> there are some restrictions on the actual values loaded into the apicid 
> register.
> 
> If we can put unique ids into the apicid register, we do. If we can't, 
> the function that reads the apicid will automatically supply the rest of 
> the bits.  Most of the kernel is unaware that the processor apicid 
> register may have only a subset of the bits that are in the ACPI tables.

apicid remapping is something we need/want, so we cannot remove that 
array. But it would be nice to offload such properties to the percpu area 
instead - is there any reason why that is hard? The local apic is attached 
to a CPU in any case. Is there some early init reason that complicates 
this?

	Ingo

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: put trigger in to detect mismatched apic versions.
  2009-01-18 19:08           ` Ingo Molnar
@ 2009-01-18 21:25             ` Jack Steiner
  2009-01-19 17:08               ` Mike Travis
  0 siblings, 1 reply; 21+ messages in thread
From: Jack Steiner @ 2009-01-18 21:25 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Mike Travis, LKML, H. Peter Anvin, Thomas Gleixner, Yinghai Lu,
	Maciej W. Rozycki

On Sun, Jan 18, 2009 at 08:08:49PM +0100, Ingo Molnar wrote:
> 
> * Jack Steiner <steiner@sgi.com> wrote:
> 
> > > Btw, I checked with our UV architect and the problem is that we need a 
> > > 16 bit apic id which is what caused the MAX_APICS to be bumped to 32k. 
> > > The lower 8 bits are the normal apic id, and the upper bit relate to 
> > > the node.  This means cpu 0 on node 0 has the same apic id as cpu 0 on 
> > > node 1, etc.  I also asked about whether we could rely on always 
> > > having
> > 
> > Not strictly true. The apicids in the ACPI tables are always globally 
> > unique across the entire system. Because of the size of UV systems, UV 
> > needs 16 bit apicids. This fits in the ACPI apicid id/eid fields.
> > 
> > The actual processor apicid register is unfortunately only 11 bits and 
> > there are some restrictions on the actual values loaded into the apicid 
> > register.
> > 
> > If we can put unique ids into the apicid register, we do. If we can't, 
> > the function that reads the apicid will automatically supply the rest of 
> > the bits.  Most of the kernel is unaware that the processor apicid 
> > register may have only a subset of the bits that are in the ACPI tables.
> 
> apicid remapping is something we need/want, so we cannot remove that 
> array. But it would be nice to offload such properties to the percpu area 
> instead - is there any reason why that is hard? The local apic is attached 
> to a CPU in any case. Is there some early init reason that complicates 
> this?

I can't think of any reason why it could not be moved into
the percpu data area. Mike???

--- jack

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: put trigger in to detect mismatched apic versions.
  2009-01-18 21:25             ` Jack Steiner
@ 2009-01-19 17:08               ` Mike Travis
  0 siblings, 0 replies; 21+ messages in thread
From: Mike Travis @ 2009-01-19 17:08 UTC (permalink / raw)
  To: Jack Steiner
  Cc: Ingo Molnar, LKML, H. Peter Anvin, Thomas Gleixner, Yinghai Lu,
	Maciej W. Rozycki

Jack Steiner wrote:
> On Sun, Jan 18, 2009 at 08:08:49PM +0100, Ingo Molnar wrote:
>> * Jack Steiner <steiner@sgi.com> wrote:
>>
>>>> Btw, I checked with our UV architect and the problem is that we need a 
>>>> 16 bit apic id which is what caused the MAX_APICS to be bumped to 32k. 
>>>> The lower 8 bits are the normal apic id, and the upper bit relate to 
>>>> the node.  This means cpu 0 on node 0 has the same apic id as cpu 0 on 
>>>> node 1, etc.  I also asked about whether we could rely on always 
>>>> having
>>> Not strictly true. The apicids in the ACPI tables are always globally 
>>> unique across the entire system. Because of the size of UV systems, UV 
>>> needs 16 bit apicids. This fits in the ACPI apicid id/eid fields.

Ahh, I did mean to say this applied to the lower 8 bits only.
>>>
>>> The actual processor apicid register is unfortunately only 11 bits and 
>>> there are some restrictions on the actual values loaded into the apicid 
>>> register.

This is for x2apics only, yes?

>>>
>>> If we can put unique ids into the apicid register, we do. If we can't, 
>>> the function that reads the apicid will automatically supply the rest of 
>>> the bits.  Most of the kernel is unaware that the processor apicid 
>>> register may have only a subset of the bits that are in the ACPI tables.

>> apicid remapping is something we need/want, so we cannot remove that 
>> array. But it would be nice to offload such properties to the percpu area 
>> instead - is there any reason why that is hard? The local apic is attached 
>> to a CPU in any case. Is there some early init reason that complicates 
>> this?
> 
> I can't think of any reason why it could not be moved into
> the percpu data area. Mike???

WHich array?  There are two now, the x86_cpu_to_apicid and x86_bios_cpu_apicid
that are in the percpu area?  (Maybe it's time to find out why there are two. ;-)

Thanks,
Mike




^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] x86: put trigger in to detect mismatched apic versions.
  2009-01-16 23:22       ` [PATCH] x86: put trigger in to detect mismatched apic versions Mike Travis
  2009-01-17  0:06         ` Mike Travis
  2009-01-17  3:07         ` Jack Steiner
@ 2009-01-18 19:04         ` Ingo Molnar
  2 siblings, 0 replies; 21+ messages in thread
From: Ingo Molnar @ 2009-01-18 19:04 UTC (permalink / raw)
  To: Mike Travis, Maciej W. Rozycki; +Cc: LKML, H. Peter Anvin, Thomas Gleixner


* Mike Travis <travis@sgi.com> wrote:

> Ingo Molnar wrote:
> > * Mike Travis <travis@sgi.com> wrote:
> > 
> >> Hi Ingo,
> >>
> >> I did notice that the versions all came up the same, and that the checks 
> >> were very specific.  I was trying to be as transparent and unintrusive 
> >> as possible.  Since there's so few calls, I though this was a good 
> >> approach but apparently I was wrong.
> >>
> >> I like the idea of collapsing the array down to one and checking to see 
> >> if all apic's have the same version, but is this really the case? Must 
> >> all apics be the same?
> > 
> > Could you please send a patch that doesnt change the code, only adds a 
> > 'boot APIC version' kind of variable as an apic_version __read_mostly 
> > variable and does a WARN_ONCE() if that mismatches? We can then stick that 
> > into -tip and see whether it triggers.
> > 
> > The max array size is ~128K, right? So if the WARN_ONCE() does not 
> > trigger, we can just drop the array and use the central apic_version 
> > variable ...
> > 
> > And even if it _does_ trigger, the version incompatibilities between APIC 
> > protocols are very rare. They only happen across wildly different CPU 
> > architectures like when going from very old external apics to integrated 
> > apics, or going from apics to x-apics. We wont see any mixing across those 
> > boundaries.
> > 
> > 	Ingo
> 
> Btw, I checked with our UV architect and the problem is that we need a 
> 16 bit apic id which is what caused the MAX_APICS to be bumped to 32k. 
> The lower 8 bits are the normal apic id, and the upper bit relate to the 
> node.  This means cpu 0 on node 0 has the same apic id as cpu 0 on node 
> 1, etc.  I also asked about whether we could rely on always having the 
> same apic version, and the answer was yes, though it's really only 
> relevant between the cpus on a node.
> 
> Thanks,
> Mike
> --- 
> Subject: x86: put trigger in to detect mismatched apic versions.
> 
> Fire off one message if two apic's discovered with different
> apic versions.
> 
> Signed-off-by: Mike Travis <travis@sgi.com>
> ---
>  arch/x86/kernel/apic.c |    5 +++++
>  1 file changed, 5 insertions(+)

Applied to tip/x86/urgent, thanks Mike! Note: for such changes we want a 
more comprehensive changelog that explains the motivation - i wrote one 
up, see the commit below.

	Ingo

------------->
>From a3b106db3578da7cf416aa0f85fc195a68a946f2 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Fri, 16 Jan 2009 15:22:16 -0800
Subject: [PATCH] x86: put trigger in to detect mismatched apic versions

Impact: add debug warning

Fire off one message if two apic's discovered with different
apic versions. (this code is only called during CPU init)

The goal of this is to pave the way of the removal of the apic_version[]
array. We dont expect any apic version incompatibilities in the x86
landscape of systems [if so we dont handle them very well and probably
never will handle deep apic version assymetries well], but it's prudent
to have a debug check for one kernel cycle nevertheless.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index d19aa3a..4b6df24 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1837,6 +1837,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
 	num_processors++;
 	cpu = cpumask_next_zero(-1, cpu_present_mask);
 
+	if (version != apic_version[boot_cpu_physical_apicid])
+		WARN_ONCE(1,
+			"ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
+			apic_version[boot_cpu_physical_apicid], cpu, version);
+
 	physid_set(apicid, phys_cpu_present_map);
 	if (apicid == boot_cpu_physical_apicid) {
 		/*

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PULL}: latest tip/cpus4096 changes
  2009-01-16  9:05 [PULL}: latest tip/cpus4096 changes Mike Travis
  2009-01-16  9:25 ` Ingo Molnar
@ 2009-01-16  9:28 ` Ingo Molnar
  2009-01-16 17:54   ` Mike Travis
  2009-01-16  9:34 ` Ingo Molnar
  2009-01-16 14:25 ` Ingo Molnar
  3 siblings, 1 reply; 21+ messages in thread
From: Ingo Molnar @ 2009-01-16  9:28 UTC (permalink / raw)
  To: Mike Travis; +Cc: Rusty Russell, LKML


* Mike Travis <travis@sgi.com> wrote:

> diff --git a/kernel/kgdb.c b/kernel/kgdb.c
> index e4dcfb2..21fde60 100644
> --- a/kernel/kgdb.c
> +++ b/kernel/kgdb.c
> @@ -72,7 +72,7 @@ struct kgdb_state {
>  static struct debuggerinfo_struct {
>  	void			*debuggerinfo;
>  	struct task_struct	*task;
> -} kgdb_info[NR_CPUS];
> +} *kgdb_info;
>  
>  /**
>   * kgdb_connected - Is a host GDB connected to us?
> @@ -1651,6 +1651,13 @@ int kgdb_register_io_module(struct kgdb_io *new_kgdb_io_ops)
>  		return -EBUSY;
>  	}
>  
> +	kgdb_info = kmalloc(nr_cpu_ids * sizeof(*kgdb_info), GFP_KERNEL);
> +	if (unlikely(!kgdb_info)) {
> +		spin_unlock(&kgdb_registration_lock);
> +		printk(KERN_ERR "kgdb: No memory for kgdb_info\n");
> +		return -ENOMEM;
> +	}
> +
>  	if (new_kgdb_io_ops->init) {
>  		err = new_kgdb_io_ops->init();
>  		if (err) {

Look how it continues:

                        spin_unlock(&kgdb_registration_lock);
                        return err;
                }
        }

See the memory leak? This is _trivially_ broken. When you add dynamic 
allocation to any codepath you _need_ to be careul and you need to check 
all interim paths of return.

Also, please submit kgdb patches via the KGDB maintainer:

 KGDB
 P:      Jason Wessel
 M:      jason.wessel@windriver.com
 L:      kgdb-bugreport@lists.sourceforge.net
 S:      Maintained

	Ingo

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PULL}: latest tip/cpus4096 changes
  2009-01-16  9:28 ` [PULL}: latest tip/cpus4096 changes Ingo Molnar
@ 2009-01-16 17:54   ` Mike Travis
  0 siblings, 0 replies; 21+ messages in thread
From: Mike Travis @ 2009-01-16 17:54 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Rusty Russell, LKML

Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
> 
>> diff --git a/kernel/kgdb.c b/kernel/kgdb.c
>> index e4dcfb2..21fde60 100644
>> --- a/kernel/kgdb.c
>> +++ b/kernel/kgdb.c
>> @@ -72,7 +72,7 @@ struct kgdb_state {
>>  static struct debuggerinfo_struct {
>>  	void			*debuggerinfo;
>>  	struct task_struct	*task;
>> -} kgdb_info[NR_CPUS];
>> +} *kgdb_info;
>>  
>>  /**
>>   * kgdb_connected - Is a host GDB connected to us?
>> @@ -1651,6 +1651,13 @@ int kgdb_register_io_module(struct kgdb_io *new_kgdb_io_ops)
>>  		return -EBUSY;
>>  	}
>>  
>> +	kgdb_info = kmalloc(nr_cpu_ids * sizeof(*kgdb_info), GFP_KERNEL);
>> +	if (unlikely(!kgdb_info)) {
>> +		spin_unlock(&kgdb_registration_lock);
>> +		printk(KERN_ERR "kgdb: No memory for kgdb_info\n");
>> +		return -ENOMEM;
>> +	}
>> +
>>  	if (new_kgdb_io_ops->init) {
>>  		err = new_kgdb_io_ops->init();
>>  		if (err) {
> 
> Look how it continues:
> 
>                         spin_unlock(&kgdb_registration_lock);
>                         return err;
>                 }
>         }
> 
> See the memory leak? This is _trivially_ broken. When you add dynamic 
> allocation to any codepath you _need_ to be careul and you need to check 
> all interim paths of return.
> 
> Also, please submit kgdb patches via the KGDB maintainer:
> 
>  KGDB
>  P:      Jason Wessel
>  M:      jason.wessel@windriver.com
>  L:      kgdb-bugreport@lists.sourceforge.net
>  S:      Maintained
> 
> 	Ingo

Yes, you're right I did miss that.  And I'll send it to Jason.

Thanks,
MIke


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PULL}: latest tip/cpus4096 changes
  2009-01-16  9:05 [PULL}: latest tip/cpus4096 changes Mike Travis
  2009-01-16  9:25 ` Ingo Molnar
  2009-01-16  9:28 ` [PULL}: latest tip/cpus4096 changes Ingo Molnar
@ 2009-01-16  9:34 ` Ingo Molnar
  2009-01-16 17:08   ` Jeremy Fitzhardinge
  2009-01-16 17:55   ` Mike Travis
  2009-01-16 14:25 ` Ingo Molnar
  3 siblings, 2 replies; 21+ messages in thread
From: Ingo Molnar @ 2009-01-16  9:34 UTC (permalink / raw)
  To: Mike Travis; +Cc: Rusty Russell, LKML, Jeremy Fitzhardinge


* Mike Travis <travis@sgi.com> wrote:

> --- a/arch/x86/kernel/microcode_core.c
> +++ b/arch/x86/kernel/microcode_core.c
> @@ -104,7 +104,7 @@ static struct microcode_ops *microcode_ops;
>  /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
>  static DEFINE_MUTEX(microcode_mutex);
>  
> -struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
> +struct ucode_cpu_info *ucode_cpu_info;
>  EXPORT_SYMBOL_GPL(ucode_cpu_info);
>  
>  #ifdef CONFIG_MICROCODE_OLD_INTERFACE
> @@ -471,6 +471,13 @@ static int __init microcode_init(void)
>  {
>  	struct cpuinfo_x86 *c = &cpu_data(0);
>  	int error;
> +	size_t size = sizeof(*ucode_cpu_info) * nr_cpu_ids;
> +
> +	ucode_cpu_info = kmalloc(size, GFP_KERNEL);
> +	if (!ucode_cpu_info) {
> +		WARN(1, "CPU: cannot allocate microcode info structure\n");
> +		return -ENOMEM;
> +	}
>  
>  	if (c->x86_vendor == X86_VENDOR_INTEL)
>  		microcode_ops = init_intel_microcode();

look how this code continues:

        else if (c->x86_vendor == X86_VENDOR_AMD)
                microcode_ops = init_amd_microcode();

        if (!microcode_ops) {
                printk(KERN_ERR "microcode: no support for this CPU vendor\n");
                return -ENODEV;
        }

see the memory leak? Again, this patch too is trivially broken.

> commit beec9183a43f8a42f5b790326a3b120a3b513590
> Author: Mike Travis <travis@sgi.com>
> Date:   Fri Jan 16 00:22:33 2009 -0800
> 
>     xen: reduce static memory usage

this one looks good in a quick check but please send it to Jeremy first or 
get his Ack.

	Ingo

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PULL}: latest tip/cpus4096 changes
  2009-01-16  9:34 ` Ingo Molnar
@ 2009-01-16 17:08   ` Jeremy Fitzhardinge
  2009-01-16 19:55     ` Mike Travis
  2009-01-16 17:55   ` Mike Travis
  1 sibling, 1 reply; 21+ messages in thread
From: Jeremy Fitzhardinge @ 2009-01-16 17:08 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Mike Travis, Rusty Russell, LKML

Ingo Molnar wrote:
>> commit beec9183a43f8a42f5b790326a3b120a3b513590
>> Author: Mike Travis <travis@sgi.com>
>> Date:   Fri Jan 16 00:22:33 2009 -0800
>>
>>     xen: reduce static memory usage
>>     
>
> this one looks good in a quick check but please send it to Jeremy first or 
> get his Ack.
>   

Yeah, send me a copy (I can't find it if you already have).

Thanks,
    J

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PULL}: latest tip/cpus4096 changes
  2009-01-16 17:08   ` Jeremy Fitzhardinge
@ 2009-01-16 19:55     ` Mike Travis
  2009-01-16 21:15       ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 21+ messages in thread
From: Mike Travis @ 2009-01-16 19:55 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: Ingo Molnar, Rusty Russell, LKML

Jeremy Fitzhardinge wrote:
> Ingo Molnar wrote:
>>> commit beec9183a43f8a42f5b790326a3b120a3b513590
>>> Author: Mike Travis <travis@sgi.com>
>>> Date:   Fri Jan 16 00:22:33 2009 -0800
>>>
>>>     xen: reduce static memory usage
>>>     
>>
>> this one looks good in a quick check but please send it to Jeremy
>> first or get his Ack.
>>   
> 
> Yeah, send me a copy (I can't find it if you already have).
> 
> Thanks,
>    J

Hi Jeremy,

Here tis... Christophe was kind enough to test it for me.  (I believe
he said you were on vacation, though he did mention that you had some
changes in the queue?)

Thanks!
Mike
--- 

Subject: xen: reduce static memory usage

Impact: reduce memory usage

By allocating the irq_info and irq_bindcount based
on nr_irqs instead of NR_IRQS, it will contain only
enough entries as needed by the running system.

This addresses this memory bump when NR_CPUS bumped
from 128 to 4096:

  17408   132096  +114688  +658%  irq_info(.bss)
  17408   132096  +114688  +658%  irq_bindcount(.bss)

This is only effective when CONFIG_SPARSE_IRQS=y.

Signed-off-by: Mike Travis <travis@sgi.com>
Tested-by: Christophe Saout <christophe@saout.de>
---
 drivers/xen/events.c |   10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

--- linux-2.6-for-ingo.orig/drivers/xen/events.c
+++ linux-2.6-for-ingo/drivers/xen/events.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/bootmem.h>
+#include <linux/irqnr.h>
 
 #include <asm/ptrace.h>
 #include <asm/irq.h>
@@ -59,7 +60,7 @@ struct packed_irq
 	unsigned char type;
 };
 
-static struct packed_irq irq_info[NR_IRQS];
+static struct packed_irq *irq_info;
 
 /* Binding types. */
 enum {
@@ -87,7 +88,7 @@ static inline unsigned long *cpu_evtchn_
 static u8 cpu_evtchn[NR_EVENT_CHANNELS];
 
 /* Reference counts for bindings to IRQs. */
-static int irq_bindcount[NR_IRQS];
+static int *irq_bindcount;
 
 /* Xen will never allocate port zero for any purpose. */
 #define VALID_EVTCHN(chn)	((chn) != 0)
@@ -833,7 +834,10 @@ void __init xen_init_IRQ(void)
 	size_t size = nr_cpu_ids * sizeof(struct cpu_evtchn_s);
 
 	cpu_evtchn_mask_p = alloc_bootmem(size);
-	BUG_ON(cpu_evtchn_mask_p == NULL);
+
+	irq_info = alloc_bootmem(nr_irqs * sizeof(struct packed_irq));
+
+	irq_bindcount = alloc_bootmem(nr_irqs * sizeof(int));
 
 	init_evtchn_cpu_bindings();
 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PULL}: latest tip/cpus4096 changes
  2009-01-16 19:55     ` Mike Travis
@ 2009-01-16 21:15       ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 21+ messages in thread
From: Jeremy Fitzhardinge @ 2009-01-16 21:15 UTC (permalink / raw)
  To: Mike Travis; +Cc: Ingo Molnar, Rusty Russell, LKML

Mike Travis wrote:
>
> Here tis... Christophe was kind enough to test it for me.  (I believe
> he said you were on vacation, though he did mention that you had some
> changes in the queue?)
>   

That looks OK, but it will clash with some of the pending patches.  I'll 
fix it up.

    J

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PULL}: latest tip/cpus4096 changes
  2009-01-16  9:34 ` Ingo Molnar
  2009-01-16 17:08   ` Jeremy Fitzhardinge
@ 2009-01-16 17:55   ` Mike Travis
  1 sibling, 0 replies; 21+ messages in thread
From: Mike Travis @ 2009-01-16 17:55 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Rusty Russell, LKML, Jeremy Fitzhardinge

Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
> 
>> --- a/arch/x86/kernel/microcode_core.c
>> +++ b/arch/x86/kernel/microcode_core.c
>> @@ -104,7 +104,7 @@ static struct microcode_ops *microcode_ops;
>>  /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
>>  static DEFINE_MUTEX(microcode_mutex);
>>  
>> -struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
>> +struct ucode_cpu_info *ucode_cpu_info;
>>  EXPORT_SYMBOL_GPL(ucode_cpu_info);
>>  
>>  #ifdef CONFIG_MICROCODE_OLD_INTERFACE
>> @@ -471,6 +471,13 @@ static int __init microcode_init(void)
>>  {
>>  	struct cpuinfo_x86 *c = &cpu_data(0);
>>  	int error;
>> +	size_t size = sizeof(*ucode_cpu_info) * nr_cpu_ids;
>> +
>> +	ucode_cpu_info = kmalloc(size, GFP_KERNEL);
>> +	if (!ucode_cpu_info) {
>> +		WARN(1, "CPU: cannot allocate microcode info structure\n");
>> +		return -ENOMEM;
>> +	}
>>  
>>  	if (c->x86_vendor == X86_VENDOR_INTEL)
>>  		microcode_ops = init_intel_microcode();
> 
> look how this code continues:
> 
>         else if (c->x86_vendor == X86_VENDOR_AMD)
>                 microcode_ops = init_amd_microcode();
> 
>         if (!microcode_ops) {
>                 printk(KERN_ERR "microcode: no support for this CPU vendor\n");
>                 return -ENODEV;
>         }
> 
> see the memory leak? Again, this patch too is trivially broken.
> 
>> commit beec9183a43f8a42f5b790326a3b120a3b513590
>> Author: Mike Travis <travis@sgi.com>
>> Date:   Fri Jan 16 00:22:33 2009 -0800
>>
>>     xen: reduce static memory usage
> 
> this one looks good in a quick check but please send it to Jeremy first or 
> get his Ack.
> 
> 	Ingo

Ok, thanks!


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PULL}: latest tip/cpus4096 changes
  2009-01-16  9:05 [PULL}: latest tip/cpus4096 changes Mike Travis
                   ` (2 preceding siblings ...)
  2009-01-16  9:34 ` Ingo Molnar
@ 2009-01-16 14:25 ` Ingo Molnar
  2009-01-16 18:03   ` Mike Travis
  3 siblings, 1 reply; 21+ messages in thread
From: Ingo Molnar @ 2009-01-16 14:25 UTC (permalink / raw)
  To: Mike Travis; +Cc: Rusty Russell, LKML


* Mike Travis <travis@sgi.com> wrote:

> commit b758cdbee5da0b8fb7e34a68651e6ccc5310b48a
> Author: Rusty Russell <rusty@rustcorp.com.au>
> Date:   Thu Jan 15 16:29:16 2009 -0800
> 
>     work_on_cpu: Use our own workqueue.
>     
>     Impact: remove potential circular lock dependency with generic kevent workqueue
>     
>     Annoyingly, some places we want to use work_on_cpu are already in
>     workqueues.  As per Ingo's suggestion, we create a different workqueue
>     for work_on_cpu.

btw., that's a nice fix - were you able to reproduce any of the lockdep 
asserts that i got in testing, and did those go away with this patch?

If yes then that's nice and makes work_on_cpu() a lot more usable IMO.

If not then that should generally be declared in the pull request: 
"beware, different approach than before but might still trigger lockdep 
warnings".

	Ingo

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PULL}: latest tip/cpus4096 changes
  2009-01-16 14:25 ` Ingo Molnar
@ 2009-01-16 18:03   ` Mike Travis
  2009-01-16 22:32     ` Ingo Molnar
  0 siblings, 1 reply; 21+ messages in thread
From: Mike Travis @ 2009-01-16 18:03 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Rusty Russell, LKML

Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
> 
>> commit b758cdbee5da0b8fb7e34a68651e6ccc5310b48a
>> Author: Rusty Russell <rusty@rustcorp.com.au>
>> Date:   Thu Jan 15 16:29:16 2009 -0800
>>
>>     work_on_cpu: Use our own workqueue.
>>     
>>     Impact: remove potential circular lock dependency with generic kevent workqueue
>>     
>>     Annoyingly, some places we want to use work_on_cpu are already in
>>     workqueues.  As per Ingo's suggestion, we create a different workqueue
>>     for work_on_cpu.
> 
> btw., that's a nice fix - were you able to reproduce any of the lockdep 
> asserts that i got in testing, and did those go away with this patch?
> 
> If yes then that's nice and makes work_on_cpu() a lot more usable IMO.
> 
> If not then that should generally be declared in the pull request: 
> "beware, different approach than before but might still trigger lockdep 
> warnings"
> 
> 	Ingo

Hi,

I've been trying all sorts of overlapping testing and I've not gotten the
lockdep warnings any more.  (I do occasionally get that debug object warning
that I mentioned to Thomas, though that comes and goes irregardless of any
patches.)  The two fixes that Rusty did (lose the get_online_cpus() and use
a separate work queue) seems to have relieved work_on_cpu of it's primary
problems, and I've not found any new ones to replace them yet.  ;-)

So I'll "un-push" the entire patchset, and then re-push the x86 only ones,
and send the others to their respective maintainers after fixing the problems
you mentioned.  (Posting patches to the list was way more fun, and less
"committable". ;-)

Thanks,
Mike

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PULL}: latest tip/cpus4096 changes
  2009-01-16 18:03   ` Mike Travis
@ 2009-01-16 22:32     ` Ingo Molnar
  0 siblings, 0 replies; 21+ messages in thread
From: Ingo Molnar @ 2009-01-16 22:32 UTC (permalink / raw)
  To: Mike Travis; +Cc: Rusty Russell, LKML


* Mike Travis <travis@sgi.com> wrote:

> Ingo Molnar wrote:
> > * Mike Travis <travis@sgi.com> wrote:
> > 
> >> commit b758cdbee5da0b8fb7e34a68651e6ccc5310b48a
> >> Author: Rusty Russell <rusty@rustcorp.com.au>
> >> Date:   Thu Jan 15 16:29:16 2009 -0800
> >>
> >>     work_on_cpu: Use our own workqueue.
> >>     
> >>     Impact: remove potential circular lock dependency with generic kevent workqueue
> >>     
> >>     Annoyingly, some places we want to use work_on_cpu are already in
> >>     workqueues.  As per Ingo's suggestion, we create a different workqueue
> >>     for work_on_cpu.
> > 
> > btw., that's a nice fix - were you able to reproduce any of the lockdep 
> > asserts that i got in testing, and did those go away with this patch?
> > 
> > If yes then that's nice and makes work_on_cpu() a lot more usable IMO.
> > 
> > If not then that should generally be declared in the pull request: 
> > "beware, different approach than before but might still trigger lockdep 
> > warnings"
> > 
> > 	Ingo
> 
> Hi,
> 
> I've been trying all sorts of overlapping testing and I've not gotten the
> lockdep warnings any more.  (I do occasionally get that debug object warning
> that I mentioned to Thomas, though that comes and goes irregardless of any
> patches.) [...]

That hpet warning is fixed in tip/timers/urgent [which you wont have if 
you try pure tip/cpus4096].

> [...]  The two fixes that Rusty did (lose the get_online_cpus() and use 
> a separate work queue) seems to have relieved work_on_cpu of it's 
> primary problems, and I've not found any new ones to replace them yet.  
> ;-)
> 
> So I'll "un-push" the entire patchset, and then re-push the x86 only 
> ones, and send the others to their respective maintainers after fixing 
> the problems you mentioned.  (Posting patches to the list was way more 
> fun, and less "committable". ;-)

Sounds good.

	Ingo

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2009-01-19 17:08 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-01-16  9:05 [PULL}: latest tip/cpus4096 changes Mike Travis
2009-01-16  9:25 ` Ingo Molnar
2009-01-16 17:53   ` Mike Travis
2009-01-16 22:30     ` Ingo Molnar
2009-01-16 23:22       ` [PATCH] x86: put trigger in to detect mismatched apic versions Mike Travis
2009-01-17  0:06         ` Mike Travis
2009-01-17  3:07         ` Jack Steiner
2009-01-18 19:08           ` Ingo Molnar
2009-01-18 21:25             ` Jack Steiner
2009-01-19 17:08               ` Mike Travis
2009-01-18 19:04         ` Ingo Molnar
2009-01-16  9:28 ` [PULL}: latest tip/cpus4096 changes Ingo Molnar
2009-01-16 17:54   ` Mike Travis
2009-01-16  9:34 ` Ingo Molnar
2009-01-16 17:08   ` Jeremy Fitzhardinge
2009-01-16 19:55     ` Mike Travis
2009-01-16 21:15       ` Jeremy Fitzhardinge
2009-01-16 17:55   ` Mike Travis
2009-01-16 14:25 ` Ingo Molnar
2009-01-16 18:03   ` Mike Travis
2009-01-16 22:32     ` Ingo Molnar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox