From: keith mannthey <kmannth@us.ibm.com>
To: lhms-devel <lhms-devel@lists.sourceforge.net>
Cc: linux-mm <linux-mm@kvack.org>, konrad <darnok@us.ibm.com>,
Prarit Bhargava--redhat <prarit@redhat.com>,
ak@suse.de
Subject: [RFC] patch [1/1] x86_64 numa aware sparsemem add_memory functinality
Date: Tue, 20 Jun 2006 22:43:01 -0700 [thread overview]
Message-ID: <1150868581.8518.28.camel@keithlap> (raw)
[-- Attachment #1: Type: text/plain, Size: 1179 bytes --]
Hello all,
This patch is an attempt to add a numa ware add_memory functionality
to x86_64 using CONFIG_SPARSEMEM. The add memory function today just
grabs the pgdat from node 0 and adds the memory there. On a numa system
this is functional but not optimal/correct.
The SRAT can expose future memory locality. This information is
already tracked by the nodes_add data structure (it keeps the
memory/node locality information) from the SRAT code. The code in
srat.c is built around RESERVE_HOTADD. This patch is a little subtle in
the way it uses the existing code for use with sparsemem. Perhaps
acpi_numa_memory_affinity_init needs a larger refactor to fit both
RESERVE_HOTADD and sparsemem.
This patch still hotadd_percent as a flag to the whole srat parsing
code to disable and contain broken bios. It's functionality is retained
and an on off switch to sparsemem hot-add. Without changing the safety
mechanisms build into the current SRAT code I have provided a path for
the sparsemem hot-add path to get to the nodes_add data for use at
runtime.
This is a 1st run at the patch, it works with 2.6.17
Signed-off-by: Keith Mannthey <kmannth@us.ibm.com>
[-- Attachment #2: patch-2.6.17-nodes-add-v1.patch --]
[-- Type: text/x-patch, Size: 3550 bytes --]
diff -urN linux-2.6.17/arch/x86_64/mm/init.c linux-2.6.17-work/arch/x86_64/mm/init.c
--- linux-2.6.17/arch/x86_64/mm/init.c 2006-06-17 21:49:35.000000000 -0400
+++ linux-2.6.17-work/arch/x86_64/mm/init.c 2006-06-20 21:41:30.000000000 -0400
@@ -553,7 +553,7 @@
*/
int add_memory(u64 start, u64 size)
{
- struct pglist_data *pgdat = NODE_DATA(0);
+ struct pglist_data *pgdat = NODE_DATA(new_memory_to_node(start,start+size));
struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
diff -urN linux-2.6.17/arch/x86_64/mm/srat.c linux-2.6.17-work/arch/x86_64/mm/srat.c
--- linux-2.6.17/arch/x86_64/mm/srat.c 2006-06-20 20:25:33.000000000 -0400
+++ linux-2.6.17-work/arch/x86_64/mm/srat.c 2006-06-20 21:44:54.000000000 -0400
@@ -32,10 +32,10 @@
static nodemask_t nodes_parsed __initdata;
static nodemask_t nodes_found __initdata;
static struct bootnode nodes[MAX_NUMNODES] __initdata;
-static struct bootnode nodes_add[MAX_NUMNODES] __initdata;
+static struct bootnode nodes_add[MAX_NUMNODES];
static int found_add_area __initdata;
int hotadd_percent __initdata = 0;
-#ifndef RESERVE_HOTADD
+#if !defined(RESERVE_HOTADD) && !defined(CONFIG_MEMORY_HOTPLUG)
#define hotadd_percent 0 /* Ignore all settings */
#endif
static u8 pxm2node[256] = { [0 ... 255] = 0xff };
@@ -219,9 +219,9 @@
allocated += mem;
return 1;
}
-
+#endif
/*
- * It is fine to add this area to the nodes data it will be used later
+ * It is fine to add this area to the nodes_add data it will be used later
* This code supports one contigious hot add area per node.
*/
static int reserve_hotadd(int node, unsigned long start, unsigned long end)
@@ -247,15 +247,14 @@
printk(KERN_ERR "SRAT: Hotplug area has existing memory\n");
return -1;
}
-
+#ifdef RESERVE_HOTADD
if (!hotadd_enough_memory(&nodes_add[node])) {
printk(KERN_ERR "SRAT: Hotplug area too large\n");
return -1;
}
-
+#endif
/* Looks good */
- found_add_area = 1;
if (nd->start == nd->end) {
nd->start = start;
nd->end = end;
@@ -273,14 +272,16 @@
printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
}
- if ((nd->end >> PAGE_SHIFT) > end_pfn)
- end_pfn = nd->end >> PAGE_SHIFT;
-
if (changed)
printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
+#ifdef RESERVE_HOTADD
+ found_add_area = 1;
+ if ((nd->end >> PAGE_SHIFT) > end_pfn)
+ end_pfn = nd->end >> PAGE_SHIFT;
return 0;
+#endif
+ return -1;
}
-#endif
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
void __init
@@ -338,7 +339,6 @@
printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
nd->start, nd->end);
-#ifdef RESERVE_HOTADD
if (ma->flags.hot_pluggable && reserve_hotadd(node, start, end) < 0) {
/* Ignore hotadd region. Undo damage */
printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
@@ -346,7 +346,6 @@
if ((nd->start | nd->end) == 0)
node_clear(node, nodes_parsed);
}
-#endif
}
/* Sanity check to catch more bad SRATs (they are amazingly common).
@@ -479,5 +478,15 @@
index = acpi_slit->localities * node_to_pxm(a);
return acpi_slit->entry[index + node_to_pxm(b)];
}
-
EXPORT_SYMBOL(__node_distance);
+
+int new_memory_to_node(unsigned long start, unsigned long end) {
+ int i,ret;
+ ret=0;
+ for_each_node(i){
+ if (nodes_add[i].start <= start && nodes_add[i].end >= end)
+ ret = i;
+ }
+ return ret;
+}
+EXPORT_SYMBOL(new_memory_to_node);
next reply other threads:[~2006-06-21 5:43 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-06-21 5:43 keith mannthey [this message]
2006-06-21 6:06 ` [Lhms-devel] [RFC] patch [1/1] x86_64 numa aware sparsemem add_memory functinality KAMEZAWA Hiroyuki
2006-06-21 6:25 ` keith mannthey
2006-06-21 6:37 ` KAMEZAWA Hiroyuki
2006-06-21 6:31 ` Yasunori Goto
2006-06-23 17:13 ` Dave Hansen
2006-06-23 17:57 ` [Lhms-devel] " keith mannthey
2006-06-24 2:05 ` [RFC] Patch [1/4] x86_64 sparsmem add- save nodes_add data for later keith mannthey
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1150868581.8518.28.camel@keithlap \
--to=kmannth@us.ibm.com \
--cc=ak@suse.de \
--cc=darnok@us.ibm.com \
--cc=lhms-devel@lists.sourceforge.net \
--cc=linux-mm@kvack.org \
--cc=prarit@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.