linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: shaohui.zheng@intel.com
To: akpm@linux-foundation.org, linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org, haicheng.li@linux.intel.com,
	lethal@linux-sh.org, ak@linux.intel.com,
	shaohui.zheng@linux.intel.com, Ingo Molnar <mingo@elte.hu>,
	Len Brown <len.brown@intel.com>, Yinghai Lu <Yinghai.Lu@Sun.COM>,
	Shaohui Zheng <shaohui.zheng@intel.com>,
	Haicheng Li <haicheng.li@intel.com>
Subject: [5/8,v3] NUMA Hotplug Emulator: support cpu probe/release in x86
Date: Wed, 17 Nov 2010 10:08:04 +0800	[thread overview]
Message-ID: <20101117021000.776651300@intel.com> (raw)
In-Reply-To: 20101117020759.016741414@intel.com

[-- Attachment #1: 005-hotplug-emulator-x86-support-cpu-probe-release-in-x86.patch --]
[-- Type: text/plain, Size: 10923 bytes --]

From: Shaohui Zheng <shaohui.zheng@intel.com>

Add cpu interface probe/release under sysfs for x86. User can use this
interface to emulate the cpu hot-add process, it is for cpu hotplug 
test purpose. Add a kernel option CONFIG_ARCH_CPU_PROBE_RELEASE for this
feature.

This interface provides a mechanism to emulate cpu hotplug with software
 methods, it becomes possible to do cpu hotplug automation and stress
testing.

Directive:
*) Reserve CPU throu grub parameter like:
	maxcpus=4

the rest CPUs will not be initiliazed. 

*) Probe CPU
we can use the probe interface to hot-add new CPUs:
	echo nid > /sys/devices/system/cpu/probe

*) Release a CPU
	echo cpu > /sys/devices/system/cpu/release

A reserved CPU will be hot-added to the specified node.
1) nid == 0, the CPU will be added to the real node which the CPU
should be in
2) nid != 0, add the CPU to node nid even through it is a fake node.

CC: Ingo Molnar <mingo@elte.hu>
CC: Len Brown <len.brown@intel.com>
CC: Yinghai Lu <Yinghai.Lu@Sun.COM>
Signed-off-by: Shaohui Zheng <shaohui.zheng@intel.com>
Signed-off-by: Haicheng Li <haicheng.li@intel.com>
---
Index: linux-hpe4/arch/x86/kernel/acpi/boot.c
===================================================================
--- linux-hpe4.orig/arch/x86/kernel/acpi/boot.c	2010-11-17 09:00:59.742608402 +0800
+++ linux-hpe4/arch/x86/kernel/acpi/boot.c	2010-11-17 09:01:10.202837209 +0800
@@ -647,8 +647,44 @@
 }
 EXPORT_SYMBOL(acpi_map_lsapic);
 
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+static void acpi_map_cpu2node_emu(int cpu, int physid, int nid)
+{
+#ifdef CONFIG_ACPI_NUMA
+#ifdef CONFIG_X86_64
+	apicid_to_node[physid] = nid;
+	numa_set_node(cpu, nid);
+#else /* CONFIG_X86_32 */
+	apicid_2_node[physid] = nid;
+	cpu_to_node_map[cpu] = nid;
+#endif
+#endif
+}
+
+static u16 cpu_to_apicid_saved[CONFIG_NR_CPUS];
+int __ref acpi_map_lsapic_emu(int pcpu, int nid)
+{
+	/* backup cpu apicid to array cpu_to_apicid_saved */
+	if (cpu_to_apicid_saved[pcpu] == 0 &&
+		per_cpu(x86_cpu_to_apicid, pcpu) != BAD_APICID)
+		cpu_to_apicid_saved[pcpu] = per_cpu(x86_cpu_to_apicid, pcpu);
+
+	per_cpu(x86_cpu_to_apicid, pcpu) = cpu_to_apicid_saved[pcpu];
+	acpi_map_cpu2node_emu(pcpu, per_cpu(x86_cpu_to_apicid, pcpu), nid);
+
+	return pcpu;
+}
+EXPORT_SYMBOL(acpi_map_lsapic_emu);
+#endif
+
 int acpi_unmap_lsapic(int cpu)
 {
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+	/* backup cpu apicid to array cpu_to_apicid_saved */
+	if (cpu_to_apicid_saved[cpu] == 0 &&
+		per_cpu(x86_cpu_to_apicid, cpu) != BAD_APICID)
+		cpu_to_apicid_saved[cpu] = per_cpu(x86_cpu_to_apicid, cpu);
+#endif
 	per_cpu(x86_cpu_to_apicid, cpu) = -1;
 	set_cpu_present(cpu, false);
 	num_processors--;
Index: linux-hpe4/arch/x86/kernel/smpboot.c
===================================================================
--- linux-hpe4.orig/arch/x86/kernel/smpboot.c	2010-11-17 09:00:59.753464132 +0800
+++ linux-hpe4/arch/x86/kernel/smpboot.c	2010-11-17 10:05:26.913464702 +0800
@@ -107,8 +107,6 @@
         mutex_unlock(&x86_cpu_hotplug_driver_mutex);
 }
 
-ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
-ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
 #else
 static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
 #define get_idle_for_cpu(x)      (idle_thread_array[(x)])
Index: linux-hpe4/arch/x86/kernel/topology.c
===================================================================
--- linux-hpe4.orig/arch/x86/kernel/topology.c	2010-11-17 09:01:10.192838977 +0800
+++ linux-hpe4/arch/x86/kernel/topology.c	2010-11-17 10:05:26.924085712 +0800
@@ -30,6 +30,9 @@
 #include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/cpu.h>
+#include <linux/cpu.h>
+#include <linux/topology.h>
+#include <linux/acpi.h>
 
 static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
 
@@ -66,6 +69,74 @@
 	unregister_cpu(&per_cpu(cpu_devices, num).cpu);
 }
 EXPORT_SYMBOL(arch_unregister_cpu);
+
+ssize_t arch_cpu_probe(const char *buf, size_t count)
+{
+	int nid = 0;
+	int num = 0, selected = 0;
+
+	/* check parameters */
+	if (!buf || count < 2)
+		return -EPERM;
+
+	nid = simple_strtoul(buf, NULL, 0);
+	printk(KERN_DEBUG "Add a cpu to node : %d\n", nid);
+
+	if (nid < 0 || nid > nr_node_ids - 1) {
+		printk(KERN_ERR "Invalid NUMA node id: %d (0 <= nid < %d).\n",
+			nid, nr_node_ids);
+		return -EPERM;
+	}
+
+	if (!node_online(nid)) {
+		printk(KERN_ERR "NUMA node %d is not online, give up.\n", nid);
+		return -EPERM;
+	}
+
+	/* find first uninitialized cpu */
+	for_each_present_cpu(num) {
+		if (per_cpu(cpu_sys_devices, num) == NULL) {
+			selected = num;
+			break;
+		}
+	}
+
+	if (selected >= num_possible_cpus()) {
+		printk(KERN_ERR "No free cpu, give up cpu probing.\n");
+		return -EPERM;
+	}
+
+	/* register cpu */
+	arch_register_cpu_node(selected, nid);
+	acpi_map_lsapic_emu(selected, nid);
+
+	return count;
+}
+EXPORT_SYMBOL(arch_cpu_probe);
+
+ssize_t arch_cpu_release(const char *buf, size_t count)
+{
+	int cpu = 0;
+
+	cpu =  simple_strtoul(buf, NULL, 0);
+	/* cpu 0 is not hotplugable */
+	if (cpu == 0) {
+		printk(KERN_ERR "can not release cpu 0.\n");
+		return -EPERM;
+	}
+
+	if (cpu_online(cpu)) {
+		printk(KERN_DEBUG "offline cpu %d.\n", cpu);
+		cpu_down(cpu);
+	}
+
+	arch_unregister_cpu(cpu);
+	acpi_unmap_lsapic(cpu);
+
+	return count;
+}
+EXPORT_SYMBOL(arch_cpu_release);
+
 #else /* CONFIG_HOTPLUG_CPU */
 
 static int __init arch_register_cpu(int num)
@@ -83,8 +154,14 @@
 		register_one_node(i);
 #endif
 
-	for_each_present_cpu(i)
-		arch_register_cpu(i);
+	/*
+	 * when cpu hotplug emulation enabled, register the online cpu only,
+	 * the rests are reserved for cpu probe.
+	 */
+	for_each_present_cpu(i) {
+		if ((cpu_hpe_on && cpu_online(i)) || !cpu_hpe_on)
+			arch_register_cpu(i);
+	}
 
 	return 0;
 }
Index: linux-hpe4/arch/x86/mm/numa_64.c
===================================================================
--- linux-hpe4.orig/arch/x86/mm/numa_64.c	2010-11-17 09:01:10.132837502 +0800
+++ linux-hpe4/arch/x86/mm/numa_64.c	2010-11-17 09:01:10.202837209 +0800
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/nodemask.h>
 #include <linux/sched.h>
+#include <linux/cpu.h>
 
 #include <asm/e820.h>
 #include <asm/proto.h>
@@ -915,6 +916,19 @@
 }
 #endif
 
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+static __init int cpu_hpe_setup(char *opt)
+{
+	if (!opt)
+		return -EINVAL;
+
+	if (!strncmp(opt, "on", 2) || !strncmp(opt, "1", 1))
+		cpu_hpe_on = 1;
+
+	return 0;
+}
+early_param("cpu_hpe", cpu_hpe_setup);
+#endif  /* CONFIG_ARCH_CPU_PROBE_RELEASE */
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
Index: linux-hpe4/drivers/acpi/processor_driver.c
===================================================================
--- linux-hpe4.orig/drivers/acpi/processor_driver.c	2010-11-17 09:00:59.765335724 +0800
+++ linux-hpe4/drivers/acpi/processor_driver.c	2010-11-17 09:01:10.212839478 +0800
@@ -530,6 +530,14 @@
 		goto err_free_cpumask;
 
 	sysdev = get_cpu_sysdev(pr->id);
+	/*
+	 * Reserve cpu for hotplug emulation, the reserved cpu can be hot-added
+	 * throu the cpu probe interface. Return directly.
+	 */
+	if (sysdev == NULL) {
+		goto out;
+	}
+
 	if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) {
 		result = -EFAULT;
 		goto err_remove_fs;
@@ -570,6 +578,7 @@
 		goto err_remove_sysfs;
 	}
 
+out:
 	return 0;
 
 err_remove_sysfs:
Index: linux-hpe4/drivers/base/cpu.c
===================================================================
--- linux-hpe4.orig/drivers/base/cpu.c	2010-11-17 09:01:10.192838977 +0800
+++ linux-hpe4/drivers/base/cpu.c	2010-11-17 09:01:10.212839478 +0800
@@ -22,9 +22,15 @@
 };
 EXPORT_SYMBOL(cpu_sysdev_class);
 
-static DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);
+DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);
 
 #ifdef CONFIG_HOTPLUG_CPU
+/*
+ * cpu_hpe_on is a switch to enable/disable cpu hotplug emulation. it is
+ * disabled in default, we can enable it throu grub parameter cpu_hpe=on
+ */
+int cpu_hpe_on;
+
 static ssize_t show_online(struct sys_device *dev, struct sysdev_attribute *attr,
 			   char *buf)
 {
Index: linux-hpe4/include/linux/acpi.h
===================================================================
--- linux-hpe4.orig/include/linux/acpi.h	2010-11-17 09:00:59.772898926 +0800
+++ linux-hpe4/include/linux/acpi.h	2010-11-17 09:01:10.212839478 +0800
@@ -102,6 +102,7 @@
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 /* Arch dependent functions for cpu hotplug support */
 int acpi_map_lsapic(acpi_handle handle, int *pcpu);
+int acpi_map_lsapic_emu(int pcpu, int nid);
 int acpi_unmap_lsapic(int cpu);
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
Index: linux-hpe4/include/linux/cpu.h
===================================================================
--- linux-hpe4.orig/include/linux/cpu.h	2010-11-17 09:01:10.192838977 +0800
+++ linux-hpe4/include/linux/cpu.h	2010-11-17 09:01:10.212839478 +0800
@@ -30,6 +30,8 @@
 	struct sys_device sysdev;
 };
 
+DECLARE_PER_CPU(struct sys_device *, cpu_sys_devices);
+
 extern int register_cpu_node(struct cpu *cpu, int num, int nid);
 
 static inline int register_cpu(struct cpu *cpu, int num)
@@ -149,6 +151,7 @@
 #define register_hotcpu_notifier(nb)	register_cpu_notifier(nb)
 #define unregister_hotcpu_notifier(nb)	unregister_cpu_notifier(nb)
 int cpu_down(unsigned int cpu);
+extern int cpu_hpe_on;
 
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
 extern void cpu_hotplug_driver_lock(void);
@@ -171,6 +174,7 @@
 /* These aren't inline functions due to a GCC bug. */
 #define register_hotcpu_notifier(nb)	({ (void)(nb); 0; })
 #define unregister_hotcpu_notifier(nb)	({ (void)(nb); })
+static int cpu_hpe_on;
 #endif		/* CONFIG_HOTPLUG_CPU */
 
 #ifdef CONFIG_PM_SLEEP_SMP
Index: linux-hpe4/mm/Kconfig
===================================================================
--- linux-hpe4.orig/mm/Kconfig	2010-11-17 09:01:10.192838977 +0800
+++ linux-hpe4/mm/Kconfig	2010-11-17 10:05:20.994710783 +0800
@@ -162,6 +162,17 @@
 	  N is the number of hidden nodes, size is the memory size per
 	  hidden node. This is only useful for debugging.
 
+config ARCH_CPU_PROBE_RELEASE
+	def_bool y
+	bool "CPU hotplug emulation"
+	depends on NUMA_HOTPLUG_EMU
+	---help---
+	  Enable cpu hotplug emulation. Reserve cpu with grub parameter
+	  "maxcpus=N", where N is the initial CPU number, the rest physical
+	  CPUs will not be initialized; there is a probe/release interface
+	  is for cpu hot-add/hot-remove to specified node in software method.
+	  This is for debuging and testing purpose
+
 #
 # If we have space for more page flags then we can enable additional
 # optimizations and functionality.

-- 
Thanks & Regards,
Shaohui


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2010-11-17  4:46 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-17  2:07 [0/8,v3] NUMA Hotplug Emulator - Introduction & Feedbacks shaohui.zheng
2010-11-17  2:08 ` [1/8,v3] NUMA Hotplug Emulator: add function to hide memory region via e820 table shaohui.zheng
2010-11-17  8:16   ` David Rientjes
2010-11-18  9:20     ` Shaohui Zheng
2010-11-18 21:16       ` David Rientjes
2010-11-19  0:12         ` Shaohui Zheng
2010-11-21  0:45           ` David Rientjes
2010-11-21 14:00             ` Américo Wang
2010-11-21 21:33               ` David Rientjes
2010-11-17  2:08 ` [2/8,v3] NUMA Hotplug Emulator: infrastructure of NUMA hotplug emulation shaohui.zheng
2010-11-17  8:16   ` David Rientjes
2010-11-17  7:51     ` Shaohui Zheng
2010-11-17 21:10       ` David Rientjes
2010-11-18  4:14         ` Shaohui Zheng
2010-11-18  6:27           ` Paul Mundt
2010-11-18  5:27             ` Shaohui Zheng
2010-11-18 21:24               ` David Rientjes
2010-11-19  0:32                 ` Shaohui Zheng
2010-11-21  0:48                   ` David Rientjes
2010-11-21  2:28                     ` [patch 1/2] x86: add numa=possible command line option David Rientjes
2010-11-21  2:28                       ` [patch 2/2] mm: add node hotplug emulation David Rientjes
2010-11-21 17:34                         ` Greg KH
2010-11-21 21:48                           ` David Rientjes
2010-11-21 23:08                             ` [patch 2/2 v2] " David Rientjes
2010-11-22  0:56                               ` Greg KH
2010-11-28  1:52                                 ` David Rientjes
2010-11-28  5:17                                   ` Greg KH
2010-11-30  0:04                                     ` David Rientjes
2010-11-21 14:26                       ` [patch 1/2] x86: add numa=possible command line option Américo Wang
2010-11-21 21:46                         ` David Rientjes
2010-11-22 15:43                           ` Américo Wang
2010-11-21 15:14                     ` [2/8,v3] NUMA Hotplug Emulator: infrastructure of NUMA hotplug emulation Li, Haicheng
2010-11-21 21:42                       ` David Rientjes
2010-11-18 21:19           ` David Rientjes
2010-11-17  2:08 ` [3/8,v3] NUMA Hotplug Emulator: Userland interface to hotplug-add fake offlined nodes shaohui.zheng
2010-11-17  8:16   ` David Rientjes
2010-11-17  2:08 ` [4/8,v3] NUMA Hotplug Emulator: Abstract cpu register functions shaohui.zheng
2010-11-17  2:08 ` shaohui.zheng [this message]
2010-11-21 14:45   ` [5/8,v3] NUMA Hotplug Emulator: support cpu probe/release in x86 Américo Wang
2010-11-22  0:01     ` Shaohui Zheng
2010-11-22 15:51       ` Américo Wang
2010-11-22 23:29         ` Shaohui Zheng
2010-11-17  2:08 ` [6/8,v3] NUMA Hotplug Emulator: Fake CPU socket with logical CPU on x86 shaohui.zheng
2010-11-17  2:08 ` [7/8,v3] NUMA Hotplug Emulator: extend memory probe interface to support NUMA shaohui.zheng
2010-11-17 18:50   ` Dave Hansen
2010-11-17 21:18     ` David Rientjes
2010-11-17 21:55       ` Dave Hansen
2010-11-17 22:44         ` David Rientjes
2010-11-17 23:00           ` Dave Hansen
2010-11-17 23:17             ` David Rientjes
2010-11-18 16:59           ` Aaron Durbin
2010-11-18  4:48       ` Shaohui Zheng
2010-11-18  6:24         ` Paul Mundt
2010-11-18 21:28           ` David Rientjes
2010-11-18 21:31         ` David Rientjes
2010-11-18  4:36     ` Shaohui Zheng
2010-11-19  7:51     ` Shaohui Zheng
2010-11-19 16:36       ` Dave Hansen
2010-11-17  2:08 ` [8/8,v3] NUMA Hotplug Emulator: documentation shaohui.zheng
2010-11-17 23:06   ` Randy Dunlap
2010-11-18  2:31     ` Shaohui Zheng
2010-11-21 15:03   ` Américo Wang
2010-11-21 15:16     ` Li, Haicheng
2010-11-21 23:33     ` Shaohui Zheng
2010-11-22 16:04       ` Américo Wang
2010-11-22 23:23         ` Shaohui Zheng
2010-11-17  5:22 ` [0/8,v3] NUMA Hotplug Emulator - Introduction & Feedbacks Paul Mundt
2010-11-19  5:54   ` Shaohui Zheng
2010-11-17  9:26 ` Yinghai Lu
2010-11-18  2:03   ` Shaohui Zheng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20101117021000.776651300@intel.com \
    --to=shaohui.zheng@intel.com \
    --cc=Yinghai.Lu@Sun.COM \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=haicheng.li@intel.com \
    --cc=haicheng.li@linux.intel.com \
    --cc=len.brown@intel.com \
    --cc=lethal@linux-sh.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    --cc=shaohui.zheng@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).