All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] patch [1/1] x86_64 numa aware sparsemem add_memory functinality
@ 2006-06-21  5:43 keith mannthey
  2006-06-21  6:06 ` [Lhms-devel] " KAMEZAWA Hiroyuki
  2006-06-23 17:13 ` Dave Hansen
  0 siblings, 2 replies; 8+ messages in thread
From: keith mannthey @ 2006-06-21  5:43 UTC (permalink / raw)
  To: lhms-devel; +Cc: linux-mm, konrad, Prarit Bhargava--redhat, ak

[-- Attachment #1: Type: text/plain, Size: 1179 bytes --]

Hello all,
  This patch is an attempt to add a numa ware add_memory functionality
to x86_64 using CONFIG_SPARSEMEM.  The add memory function today just
grabs the pgdat from node 0 and adds the memory there.  On a numa system
this is functional but not optimal/correct. 

  The SRAT can expose future memory locality.  This information is
already tracked by the nodes_add data structure (it keeps the
memory/node locality information) from the SRAT code.  The code in
srat.c is built around RESERVE_HOTADD.  This patch is a little subtle in
the way it uses the existing code for use with sparsemem.  Perhaps
acpi_numa_memory_affinity_init needs a larger refactor to fit both
RESERVE_HOTADD and sparsemem.  

  This patch still hotadd_percent as a flag to the whole srat parsing
code to disable and contain broken bios.  It's functionality is retained
and an on off switch to sparsemem hot-add.  Without changing the safety
mechanisms build into the current SRAT code I have provided a path for
the sparsemem hot-add path to get to the nodes_add data for use at
runtime. 

  This is a 1st run at the patch, it works with 2.6.17

Signed-off-by:  Keith Mannthey <kmannth@us.ibm.com>

[-- Attachment #2: patch-2.6.17-nodes-add-v1.patch --]
[-- Type: text/x-patch, Size: 3550 bytes --]

diff -urN linux-2.6.17/arch/x86_64/mm/init.c linux-2.6.17-work/arch/x86_64/mm/init.c
--- linux-2.6.17/arch/x86_64/mm/init.c	2006-06-17 21:49:35.000000000 -0400
+++ linux-2.6.17-work/arch/x86_64/mm/init.c	2006-06-20 21:41:30.000000000 -0400
@@ -553,7 +553,7 @@
  */
 int add_memory(u64 start, u64 size)
 {
-	struct pglist_data *pgdat = NODE_DATA(0);
+	struct pglist_data *pgdat = NODE_DATA(new_memory_to_node(start,start+size));
 	struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
diff -urN linux-2.6.17/arch/x86_64/mm/srat.c linux-2.6.17-work/arch/x86_64/mm/srat.c
--- linux-2.6.17/arch/x86_64/mm/srat.c	2006-06-20 20:25:33.000000000 -0400
+++ linux-2.6.17-work/arch/x86_64/mm/srat.c	2006-06-20 21:44:54.000000000 -0400
@@ -32,10 +32,10 @@
 static nodemask_t nodes_parsed __initdata;
 static nodemask_t nodes_found __initdata;
 static struct bootnode nodes[MAX_NUMNODES] __initdata;
-static struct bootnode nodes_add[MAX_NUMNODES] __initdata;
+static struct bootnode nodes_add[MAX_NUMNODES];
 static int found_add_area __initdata;
 int hotadd_percent __initdata = 0;
-#ifndef RESERVE_HOTADD 
+#if !defined(RESERVE_HOTADD) && !defined(CONFIG_MEMORY_HOTPLUG)
 #define hotadd_percent 0	/* Ignore all settings */
 #endif
 static u8 pxm2node[256] = { [0 ... 255] = 0xff };
@@ -219,9 +219,9 @@
 	allocated += mem;
 	return 1;
 }
-
+#endif
 /*
- * It is fine to add this area to the nodes data it will be used later
+ * It is fine to add this area to the nodes_add data it will be used later
  * This code supports one contigious hot add area per node.
  */
 static int reserve_hotadd(int node, unsigned long start, unsigned long end)
@@ -247,15 +247,14 @@
 		printk(KERN_ERR "SRAT: Hotplug area has existing memory\n");
 		return -1;
 	}
-
+#ifdef RESERVE_HOTADD
 	if (!hotadd_enough_memory(&nodes_add[node]))  {
 		printk(KERN_ERR "SRAT: Hotplug area too large\n");
 		return -1;
 	}
-
+#endif 
 	/* Looks good */
 
- 	found_add_area = 1;
 	if (nd->start == nd->end) {
  		nd->start = start;
  		nd->end = end;
@@ -273,14 +272,16 @@
 			printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
  	}
 
- 	if ((nd->end >> PAGE_SHIFT) > end_pfn)
- 		end_pfn = nd->end >> PAGE_SHIFT;
-
 	if (changed)
 	 	printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
+#ifdef RESERVE_HOTADD	
+ 	found_add_area = 1;
+	if ((nd->end >> PAGE_SHIFT) > end_pfn)
+ 		end_pfn = nd->end >> PAGE_SHIFT;
 	return 0;
+#endif 
+	return -1;
 }
-#endif
 
 /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
 void __init
@@ -338,7 +339,6 @@
 	printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
 	       nd->start, nd->end);
 
-#ifdef RESERVE_HOTADD
  	if (ma->flags.hot_pluggable && reserve_hotadd(node, start, end) < 0) {
 		/* Ignore hotadd region. Undo damage */
 		printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
@@ -346,7 +346,6 @@
 		if ((nd->start | nd->end) == 0)
 			node_clear(node, nodes_parsed);
 	}
-#endif
 }
 
 /* Sanity check to catch more bad SRATs (they are amazingly common).
@@ -479,5 +478,15 @@
 	index = acpi_slit->localities * node_to_pxm(a);
 	return acpi_slit->entry[index + node_to_pxm(b)];
 }
-
 EXPORT_SYMBOL(__node_distance);
+
+int new_memory_to_node(unsigned long start, unsigned long end) {
+	int i,ret;
+	ret=0;
+	for_each_node(i){
+		if (nodes_add[i].start <= start && nodes_add[i].end >= end)
+			ret = i;		
+	}
+	return ret;
+}
+EXPORT_SYMBOL(new_memory_to_node);

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2006-06-24  2:05 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-06-21  5:43 [RFC] patch [1/1] x86_64 numa aware sparsemem add_memory functinality keith mannthey
2006-06-21  6:06 ` [Lhms-devel] " KAMEZAWA Hiroyuki
2006-06-21  6:25   ` keith mannthey
2006-06-21  6:37     ` KAMEZAWA Hiroyuki
2006-06-21  6:31   ` Yasunori Goto
2006-06-23 17:13 ` Dave Hansen
2006-06-23 17:57   ` [Lhms-devel] " keith mannthey
2006-06-24  2:05   ` [RFC] Patch [1/4] x86_64 sparsmem add- save nodes_add data for later keith mannthey

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.