All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mel Gorman <mel@csn.ul.ie>
To: akpm@osdl.org
Cc: davej@codemonkey.org.uk, tony.luck@intel.com,
	linuxppc-dev@ozlabs.org, Mel Gorman <mel@csn.ul.ie>,
	linux-kernel@vger.kernel.org, bob.picco@hp.com, ak@suse.de,
	linux-mm@kvack.org
Subject: [PATCH 3/6] Have x86 use add_active_range() and free_area_init_nodes
Date: Mon,  8 May 2006 15:11:31 +0100 (IST)	[thread overview]
Message-ID: <20060508141131.26912.92202.sendpatchset@skynet> (raw)
In-Reply-To: <20060508141030.26912.93090.sendpatchset@skynet>


Size zones and holes in an architecture independent manner for x86.


 Kconfig        |    8 +---
 kernel/setup.c |   19 +++------
 kernel/srat.c  |  100 +---------------------------------------------------
 mm/discontig.c |   65 +++++++--------------------------
 4 files changed, 25 insertions(+), 167 deletions(-)

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/Kconfig linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/Kconfig
--- linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/Kconfig	2006-05-01 11:36:54.000000000 +0100
+++ linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/Kconfig	2006-05-08 09:18:57.000000000 +0100
@@ -577,12 +577,10 @@ config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
 	depends on ARCH_SPARSEMEM_ENABLE
 
-source "mm/Kconfig"
+config ARCH_POPULATES_NODE_MAP
+	def_bool y
 
-config HAVE_ARCH_EARLY_PFN_TO_NID
-	bool
-	default y
-	depends on NUMA
+source "mm/Kconfig"
 
 config HIGHPTE
 	bool "Allocate 3rd-level pagetables from highmem"
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/kernel/setup.c linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/kernel/setup.c
--- linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/kernel/setup.c	2006-05-01 11:36:54.000000000 +0100
+++ linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/kernel/setup.c	2006-05-08 09:18:57.000000000 +0100
@@ -1207,22 +1207,15 @@ static unsigned long __init setup_memory
 
 void __init zone_sizes_init(void)
 {
-	unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
-	unsigned int max_dma, low;
+	unsigned int max_dma;
+#ifndef CONFIG_HIGHMEM
+	unsigned long highend_pfn = max_low_pfn;
+#endif
 
 	max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-	low = max_low_pfn;
 
-	if (low < max_dma)
-		zones_size[ZONE_DMA] = low;
-	else {
-		zones_size[ZONE_DMA] = max_dma;
-		zones_size[ZONE_NORMAL] = low - max_dma;
-#ifdef CONFIG_HIGHMEM
-		zones_size[ZONE_HIGHMEM] = highend_pfn - low;
-#endif
-	}
-	free_area_init(zones_size);
+	add_active_range(0, 0, highend_pfn);
+	free_area_init_nodes(max_dma, max_dma, max_low_pfn, highend_pfn);
 }
 #else
 extern unsigned long __init setup_memory(void);
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/kernel/srat.c linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/kernel/srat.c
--- linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/kernel/srat.c	2006-05-01 11:36:54.000000000 +0100
+++ linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/kernel/srat.c	2006-05-08 09:18:57.000000000 +0100
@@ -55,8 +55,6 @@ struct node_memory_chunk_s {
 static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS];
 
 static int num_memory_chunks;		/* total number of memory chunks */
-static int zholes_size_init;
-static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES];
 
 extern void * boot_ioremap(unsigned long, unsigned long);
 
@@ -136,50 +134,6 @@ static void __init parse_memory_affinity
 		 "enabled and removable" : "enabled" ) );
 }
 
-#if MAX_NR_ZONES != 4
-#error "MAX_NR_ZONES != 4, chunk_to_zone requires review"
-#endif
-/* Take a chunk of pages from page frame cstart to cend and count the number
- * of pages in each zone, returned via zones[].
- */
-static __init void chunk_to_zones(unsigned long cstart, unsigned long cend, 
-		unsigned long *zones)
-{
-	unsigned long max_dma;
-	extern unsigned long max_low_pfn;
-
-	int z;
-	unsigned long rend;
-
-	/* FIXME: MAX_DMA_ADDRESS and max_low_pfn are trying to provide
-	 * similarly scoped information and should be handled in a consistant
-	 * manner.
-	 */
-	max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-
-	/* Split the hole into the zones in which it falls.  Repeatedly
-	 * take the segment in which the remaining hole starts, round it
-	 * to the end of that zone.
-	 */
-	memset(zones, 0, MAX_NR_ZONES * sizeof(long));
-	while (cstart < cend) {
-		if (cstart < max_dma) {
-			z = ZONE_DMA;
-			rend = (cend < max_dma)? cend : max_dma;
-
-		} else if (cstart < max_low_pfn) {
-			z = ZONE_NORMAL;
-			rend = (cend < max_low_pfn)? cend : max_low_pfn;
-
-		} else {
-			z = ZONE_HIGHMEM;
-			rend = cend;
-		}
-		zones[z] += rend - cstart;
-		cstart = rend;
-	}
-}
-
 /*
  * The SRAT table always lists ascending addresses, so can always
  * assume that the first "start" address that you see is the real
@@ -224,7 +178,6 @@ static int __init acpi20_parse_srat(stru
 
 	memset(pxm_bitmap, 0, sizeof(pxm_bitmap));	/* init proximity domain bitmap */
 	memset(node_memory_chunk, 0, sizeof(node_memory_chunk));
-	memset(zholes_size, 0, sizeof(zholes_size));
 
 	num_memory_chunks = 0;
 	while (p < end) {
@@ -288,6 +241,7 @@ static int __init acpi20_parse_srat(stru
 		printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
 		       j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
 		node_read_chunk(chunk->nid, chunk);
+		add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn);
 	}
  
 	for_each_online_node(nid) {
@@ -396,57 +350,7 @@ int __init get_memcfg_from_srat(void)
 		return acpi20_parse_srat((struct acpi_table_srat *)header);
 	}
 out_err:
+	remove_all_active_ranges();
 	printk("failed to get NUMA memory information from SRAT table\n");
 	return 0;
 }
-
-/* For each node run the memory list to determine whether there are
- * any memory holes.  For each hole determine which ZONE they fall
- * into.
- *
- * NOTE#1: this requires knowledge of the zone boundries and so
- * _cannot_ be performed before those are calculated in setup_memory.
- * 
- * NOTE#2: we rely on the fact that the memory chunks are ordered by
- * start pfn number during setup.
- */
-static void __init get_zholes_init(void)
-{
-	int nid;
-	int c;
-	int first;
-	unsigned long end = 0;
-
-	for_each_online_node(nid) {
-		first = 1;
-		for (c = 0; c < num_memory_chunks; c++){
-			if (node_memory_chunk[c].nid == nid) {
-				if (first) {
-					end = node_memory_chunk[c].end_pfn;
-					first = 0;
-
-				} else {
-					/* Record any gap between this chunk
-					 * and the previous chunk on this node
-					 * against the zones it spans.
-					 */
-					chunk_to_zones(end,
-						node_memory_chunk[c].start_pfn,
-						&zholes_size[nid * MAX_NR_ZONES]);
-				}
-			}
-		}
-	}
-}
-
-unsigned long * __init get_zholes_size(int nid)
-{
-	if (!zholes_size_init) {
-		zholes_size_init++;
-		get_zholes_init();
-	}
-	if (nid >= MAX_NUMNODES || !node_online(nid))
-		printk("%s: nid = %d is invalid/offline. num_online_nodes = %d",
-		       __FUNCTION__, nid, num_online_nodes());
-	return &zholes_size[nid * MAX_NR_ZONES];
-}
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/mm/discontig.c linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/mm/discontig.c
--- linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/mm/discontig.c	2006-04-27 03:19:25.000000000 +0100
+++ linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/mm/discontig.c	2006-05-08 09:18:57.000000000 +0100
@@ -157,21 +157,6 @@ static void __init find_max_pfn_node(int
 		BUG();
 }
 
-/* Find the owning node for a pfn. */
-int early_pfn_to_nid(unsigned long pfn)
-{
-	int nid;
-
-	for_each_node(nid) {
-		if (node_end_pfn[nid] == 0)
-			break;
-		if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn)
-			return nid;
-	}
-
-	return 0;
-}
-
 /* 
  * Allocate memory for the pg_data_t for this node via a crude pre-bootmem
  * method.  For node zero take this from the bottom of memory, for
@@ -227,6 +212,8 @@ static unsigned long calculate_numa_rema
 	unsigned long pfn;
 
 	for_each_online_node(nid) {
+		unsigned old_end_pfn = node_end_pfn[nid];
+
 		/*
 		 * The acpi/srat node info can show hot-add memroy zones
 		 * where memory could be added but not currently present.
@@ -276,6 +263,7 @@ static unsigned long calculate_numa_rema
 
 		node_end_pfn[nid] -= size;
 		node_remap_start_pfn[nid] = node_end_pfn[nid];
+		shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]);
 	}
 	printk("Reserving total of %ld pages for numa KVA remap\n",
 			reserve_pages);
@@ -352,45 +340,20 @@ unsigned long __init setup_memory(void)
 void __init zone_sizes_init(void)
 {
 	int nid;
+	unsigned long max_dma_pfn;
 
-
-	for_each_online_node(nid) {
-		unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
-		unsigned long *zholes_size;
-		unsigned int max_dma;
-
-		unsigned long low = max_low_pfn;
-		unsigned long start = node_start_pfn[nid];
-		unsigned long high = node_end_pfn[nid];
-
-		max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-
-		if (node_has_online_mem(nid)){
-			if (start > low) {
-#ifdef CONFIG_HIGHMEM
-				BUG_ON(start > high);
-				zones_size[ZONE_HIGHMEM] = high - start;
-#endif
-			} else {
-				if (low < max_dma)
-					zones_size[ZONE_DMA] = low;
-				else {
-					BUG_ON(max_dma > low);
-					BUG_ON(low > high);
-					zones_size[ZONE_DMA] = max_dma;
-					zones_size[ZONE_NORMAL] = low - max_dma;
-#ifdef CONFIG_HIGHMEM
-					zones_size[ZONE_HIGHMEM] = high - low;
-#endif
-				}
-			}
+	/* If SRAT has not registered memory, register it now */
+	if (find_max_pfn_with_active_regions() == 0) {
+		for_each_online_node(nid) {
+			if (node_has_online_mem(nid))
+				add_active_range(nid, node_start_pfn[nid],
+							node_end_pfn[nid]);
 		}
-
-		zholes_size = get_zholes_size(nid);
-
-		free_area_init_node(nid, NODE_DATA(nid), zones_size, start,
-				zholes_size);
 	}
+
+	max_dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+	free_area_init_nodes(max_dma_pfn, max_dma_pfn,
+						max_low_pfn, highend_pfn);
 	return;
 }
 

WARNING: multiple messages have this Message-ID (diff)
From: Mel Gorman <mel@csn.ul.ie>
To: akpm@osdl.org
Cc: davej@codemonkey.org.uk, tony.luck@intel.com,
	Mel Gorman <mel@csn.ul.ie>,
	linux-kernel@vger.kernel.org, bob.picco@hp.com, ak@suse.de,
	linux-mm@kvack.org, linuxppc-dev@ozlabs.org
Subject: [PATCH 3/6] Have x86 use add_active_range() and free_area_init_nodes
Date: Mon,  8 May 2006 15:11:31 +0100 (IST)	[thread overview]
Message-ID: <20060508141131.26912.92202.sendpatchset@skynet> (raw)
In-Reply-To: <20060508141030.26912.93090.sendpatchset@skynet>


Size zones and holes in an architecture independent manner for x86.


 Kconfig        |    8 +---
 kernel/setup.c |   19 +++------
 kernel/srat.c  |  100 +---------------------------------------------------
 mm/discontig.c |   65 +++++++--------------------------
 4 files changed, 25 insertions(+), 167 deletions(-)

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/Kconfig linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/Kconfig
--- linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/Kconfig	2006-05-01 11:36:54.000000000 +0100
+++ linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/Kconfig	2006-05-08 09:18:57.000000000 +0100
@@ -577,12 +577,10 @@ config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
 	depends on ARCH_SPARSEMEM_ENABLE
 
-source "mm/Kconfig"
+config ARCH_POPULATES_NODE_MAP
+	def_bool y
 
-config HAVE_ARCH_EARLY_PFN_TO_NID
-	bool
-	default y
-	depends on NUMA
+source "mm/Kconfig"
 
 config HIGHPTE
 	bool "Allocate 3rd-level pagetables from highmem"
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/kernel/setup.c linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/kernel/setup.c
--- linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/kernel/setup.c	2006-05-01 11:36:54.000000000 +0100
+++ linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/kernel/setup.c	2006-05-08 09:18:57.000000000 +0100
@@ -1207,22 +1207,15 @@ static unsigned long __init setup_memory
 
 void __init zone_sizes_init(void)
 {
-	unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
-	unsigned int max_dma, low;
+	unsigned int max_dma;
+#ifndef CONFIG_HIGHMEM
+	unsigned long highend_pfn = max_low_pfn;
+#endif
 
 	max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-	low = max_low_pfn;
 
-	if (low < max_dma)
-		zones_size[ZONE_DMA] = low;
-	else {
-		zones_size[ZONE_DMA] = max_dma;
-		zones_size[ZONE_NORMAL] = low - max_dma;
-#ifdef CONFIG_HIGHMEM
-		zones_size[ZONE_HIGHMEM] = highend_pfn - low;
-#endif
-	}
-	free_area_init(zones_size);
+	add_active_range(0, 0, highend_pfn);
+	free_area_init_nodes(max_dma, max_dma, max_low_pfn, highend_pfn);
 }
 #else
 extern unsigned long __init setup_memory(void);
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/kernel/srat.c linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/kernel/srat.c
--- linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/kernel/srat.c	2006-05-01 11:36:54.000000000 +0100
+++ linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/kernel/srat.c	2006-05-08 09:18:57.000000000 +0100
@@ -55,8 +55,6 @@ struct node_memory_chunk_s {
 static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS];
 
 static int num_memory_chunks;		/* total number of memory chunks */
-static int zholes_size_init;
-static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES];
 
 extern void * boot_ioremap(unsigned long, unsigned long);
 
@@ -136,50 +134,6 @@ static void __init parse_memory_affinity
 		 "enabled and removable" : "enabled" ) );
 }
 
-#if MAX_NR_ZONES != 4
-#error "MAX_NR_ZONES != 4, chunk_to_zone requires review"
-#endif
-/* Take a chunk of pages from page frame cstart to cend and count the number
- * of pages in each zone, returned via zones[].
- */
-static __init void chunk_to_zones(unsigned long cstart, unsigned long cend, 
-		unsigned long *zones)
-{
-	unsigned long max_dma;
-	extern unsigned long max_low_pfn;
-
-	int z;
-	unsigned long rend;
-
-	/* FIXME: MAX_DMA_ADDRESS and max_low_pfn are trying to provide
-	 * similarly scoped information and should be handled in a consistant
-	 * manner.
-	 */
-	max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-
-	/* Split the hole into the zones in which it falls.  Repeatedly
-	 * take the segment in which the remaining hole starts, round it
-	 * to the end of that zone.
-	 */
-	memset(zones, 0, MAX_NR_ZONES * sizeof(long));
-	while (cstart < cend) {
-		if (cstart < max_dma) {
-			z = ZONE_DMA;
-			rend = (cend < max_dma)? cend : max_dma;
-
-		} else if (cstart < max_low_pfn) {
-			z = ZONE_NORMAL;
-			rend = (cend < max_low_pfn)? cend : max_low_pfn;
-
-		} else {
-			z = ZONE_HIGHMEM;
-			rend = cend;
-		}
-		zones[z] += rend - cstart;
-		cstart = rend;
-	}
-}
-
 /*
  * The SRAT table always lists ascending addresses, so can always
  * assume that the first "start" address that you see is the real
@@ -224,7 +178,6 @@ static int __init acpi20_parse_srat(stru
 
 	memset(pxm_bitmap, 0, sizeof(pxm_bitmap));	/* init proximity domain bitmap */
 	memset(node_memory_chunk, 0, sizeof(node_memory_chunk));
-	memset(zholes_size, 0, sizeof(zholes_size));
 
 	num_memory_chunks = 0;
 	while (p < end) {
@@ -288,6 +241,7 @@ static int __init acpi20_parse_srat(stru
 		printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
 		       j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
 		node_read_chunk(chunk->nid, chunk);
+		add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn);
 	}
  
 	for_each_online_node(nid) {
@@ -396,57 +350,7 @@ int __init get_memcfg_from_srat(void)
 		return acpi20_parse_srat((struct acpi_table_srat *)header);
 	}
 out_err:
+	remove_all_active_ranges();
 	printk("failed to get NUMA memory information from SRAT table\n");
 	return 0;
 }
-
-/* For each node run the memory list to determine whether there are
- * any memory holes.  For each hole determine which ZONE they fall
- * into.
- *
- * NOTE#1: this requires knowledge of the zone boundries and so
- * _cannot_ be performed before those are calculated in setup_memory.
- * 
- * NOTE#2: we rely on the fact that the memory chunks are ordered by
- * start pfn number during setup.
- */
-static void __init get_zholes_init(void)
-{
-	int nid;
-	int c;
-	int first;
-	unsigned long end = 0;
-
-	for_each_online_node(nid) {
-		first = 1;
-		for (c = 0; c < num_memory_chunks; c++){
-			if (node_memory_chunk[c].nid == nid) {
-				if (first) {
-					end = node_memory_chunk[c].end_pfn;
-					first = 0;
-
-				} else {
-					/* Record any gap between this chunk
-					 * and the previous chunk on this node
-					 * against the zones it spans.
-					 */
-					chunk_to_zones(end,
-						node_memory_chunk[c].start_pfn,
-						&zholes_size[nid * MAX_NR_ZONES]);
-				}
-			}
-		}
-	}
-}
-
-unsigned long * __init get_zholes_size(int nid)
-{
-	if (!zholes_size_init) {
-		zholes_size_init++;
-		get_zholes_init();
-	}
-	if (nid >= MAX_NUMNODES || !node_online(nid))
-		printk("%s: nid = %d is invalid/offline. num_online_nodes = %d",
-		       __FUNCTION__, nid, num_online_nodes());
-	return &zholes_size[nid * MAX_NR_ZONES];
-}
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/mm/discontig.c linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/mm/discontig.c
--- linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/mm/discontig.c	2006-04-27 03:19:25.000000000 +0100
+++ linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/mm/discontig.c	2006-05-08 09:18:57.000000000 +0100
@@ -157,21 +157,6 @@ static void __init find_max_pfn_node(int
 		BUG();
 }
 
-/* Find the owning node for a pfn. */
-int early_pfn_to_nid(unsigned long pfn)
-{
-	int nid;
-
-	for_each_node(nid) {
-		if (node_end_pfn[nid] == 0)
-			break;
-		if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn)
-			return nid;
-	}
-
-	return 0;
-}
-
 /* 
  * Allocate memory for the pg_data_t for this node via a crude pre-bootmem
  * method.  For node zero take this from the bottom of memory, for
@@ -227,6 +212,8 @@ static unsigned long calculate_numa_rema
 	unsigned long pfn;
 
 	for_each_online_node(nid) {
+		unsigned old_end_pfn = node_end_pfn[nid];
+
 		/*
 		 * The acpi/srat node info can show hot-add memroy zones
 		 * where memory could be added but not currently present.
@@ -276,6 +263,7 @@ static unsigned long calculate_numa_rema
 
 		node_end_pfn[nid] -= size;
 		node_remap_start_pfn[nid] = node_end_pfn[nid];
+		shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]);
 	}
 	printk("Reserving total of %ld pages for numa KVA remap\n",
 			reserve_pages);
@@ -352,45 +340,20 @@ unsigned long __init setup_memory(void)
 void __init zone_sizes_init(void)
 {
 	int nid;
+	unsigned long max_dma_pfn;
 
-
-	for_each_online_node(nid) {
-		unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
-		unsigned long *zholes_size;
-		unsigned int max_dma;
-
-		unsigned long low = max_low_pfn;
-		unsigned long start = node_start_pfn[nid];
-		unsigned long high = node_end_pfn[nid];
-
-		max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-
-		if (node_has_online_mem(nid)){
-			if (start > low) {
-#ifdef CONFIG_HIGHMEM
-				BUG_ON(start > high);
-				zones_size[ZONE_HIGHMEM] = high - start;
-#endif
-			} else {
-				if (low < max_dma)
-					zones_size[ZONE_DMA] = low;
-				else {
-					BUG_ON(max_dma > low);
-					BUG_ON(low > high);
-					zones_size[ZONE_DMA] = max_dma;
-					zones_size[ZONE_NORMAL] = low - max_dma;
-#ifdef CONFIG_HIGHMEM
-					zones_size[ZONE_HIGHMEM] = high - low;
-#endif
-				}
-			}
+	/* If SRAT has not registered memory, register it now */
+	if (find_max_pfn_with_active_regions() == 0) {
+		for_each_online_node(nid) {
+			if (node_has_online_mem(nid))
+				add_active_range(nid, node_start_pfn[nid],
+							node_end_pfn[nid]);
 		}
-
-		zholes_size = get_zholes_size(nid);
-
-		free_area_init_node(nid, NODE_DATA(nid), zones_size, start,
-				zholes_size);
 	}
+
+	max_dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+	free_area_init_nodes(max_dma_pfn, max_dma_pfn,
+						max_low_pfn, highend_pfn);
 	return;
 }
 

WARNING: multiple messages have this Message-ID (diff)
From: Mel Gorman <mel@csn.ul.ie>
To: akpm@osdl.org
Cc: davej@codemonkey.org.uk, tony.luck@intel.com,
	Mel Gorman <mel@csn.ul.ie>,
	linux-kernel@vger.kernel.org, bob.picco@hp.com, ak@suse.de,
	linux-mm@kvack.org, linuxppc-dev@ozlabs.org
Subject: [PATCH 3/6] Have x86 use add_active_range() and free_area_init_nodes
Date: Mon,  8 May 2006 15:11:31 +0100 (IST)	[thread overview]
Message-ID: <20060508141131.26912.92202.sendpatchset@skynet> (raw)
In-Reply-To: <20060508141030.26912.93090.sendpatchset@skynet>

Size zones and holes in an architecture independent manner for x86.


 Kconfig        |    8 +---
 kernel/setup.c |   19 +++------
 kernel/srat.c  |  100 +---------------------------------------------------
 mm/discontig.c |   65 +++++++--------------------------
 4 files changed, 25 insertions(+), 167 deletions(-)

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/Kconfig linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/Kconfig
--- linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/Kconfig	2006-05-01 11:36:54.000000000 +0100
+++ linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/Kconfig	2006-05-08 09:18:57.000000000 +0100
@@ -577,12 +577,10 @@ config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
 	depends on ARCH_SPARSEMEM_ENABLE
 
-source "mm/Kconfig"
+config ARCH_POPULATES_NODE_MAP
+	def_bool y
 
-config HAVE_ARCH_EARLY_PFN_TO_NID
-	bool
-	default y
-	depends on NUMA
+source "mm/Kconfig"
 
 config HIGHPTE
 	bool "Allocate 3rd-level pagetables from highmem"
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/kernel/setup.c linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/kernel/setup.c
--- linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/kernel/setup.c	2006-05-01 11:36:54.000000000 +0100
+++ linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/kernel/setup.c	2006-05-08 09:18:57.000000000 +0100
@@ -1207,22 +1207,15 @@ static unsigned long __init setup_memory
 
 void __init zone_sizes_init(void)
 {
-	unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
-	unsigned int max_dma, low;
+	unsigned int max_dma;
+#ifndef CONFIG_HIGHMEM
+	unsigned long highend_pfn = max_low_pfn;
+#endif
 
 	max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-	low = max_low_pfn;
 
-	if (low < max_dma)
-		zones_size[ZONE_DMA] = low;
-	else {
-		zones_size[ZONE_DMA] = max_dma;
-		zones_size[ZONE_NORMAL] = low - max_dma;
-#ifdef CONFIG_HIGHMEM
-		zones_size[ZONE_HIGHMEM] = highend_pfn - low;
-#endif
-	}
-	free_area_init(zones_size);
+	add_active_range(0, 0, highend_pfn);
+	free_area_init_nodes(max_dma, max_dma, max_low_pfn, highend_pfn);
 }
 #else
 extern unsigned long __init setup_memory(void);
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/kernel/srat.c linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/kernel/srat.c
--- linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/kernel/srat.c	2006-05-01 11:36:54.000000000 +0100
+++ linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/kernel/srat.c	2006-05-08 09:18:57.000000000 +0100
@@ -55,8 +55,6 @@ struct node_memory_chunk_s {
 static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS];
 
 static int num_memory_chunks;		/* total number of memory chunks */
-static int zholes_size_init;
-static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES];
 
 extern void * boot_ioremap(unsigned long, unsigned long);
 
@@ -136,50 +134,6 @@ static void __init parse_memory_affinity
 		 "enabled and removable" : "enabled" ) );
 }
 
-#if MAX_NR_ZONES != 4
-#error "MAX_NR_ZONES != 4, chunk_to_zone requires review"
-#endif
-/* Take a chunk of pages from page frame cstart to cend and count the number
- * of pages in each zone, returned via zones[].
- */
-static __init void chunk_to_zones(unsigned long cstart, unsigned long cend, 
-		unsigned long *zones)
-{
-	unsigned long max_dma;
-	extern unsigned long max_low_pfn;
-
-	int z;
-	unsigned long rend;
-
-	/* FIXME: MAX_DMA_ADDRESS and max_low_pfn are trying to provide
-	 * similarly scoped information and should be handled in a consistant
-	 * manner.
-	 */
-	max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-
-	/* Split the hole into the zones in which it falls.  Repeatedly
-	 * take the segment in which the remaining hole starts, round it
-	 * to the end of that zone.
-	 */
-	memset(zones, 0, MAX_NR_ZONES * sizeof(long));
-	while (cstart < cend) {
-		if (cstart < max_dma) {
-			z = ZONE_DMA;
-			rend = (cend < max_dma)? cend : max_dma;
-
-		} else if (cstart < max_low_pfn) {
-			z = ZONE_NORMAL;
-			rend = (cend < max_low_pfn)? cend : max_low_pfn;
-
-		} else {
-			z = ZONE_HIGHMEM;
-			rend = cend;
-		}
-		zones[z] += rend - cstart;
-		cstart = rend;
-	}
-}
-
 /*
  * The SRAT table always lists ascending addresses, so can always
  * assume that the first "start" address that you see is the real
@@ -224,7 +178,6 @@ static int __init acpi20_parse_srat(stru
 
 	memset(pxm_bitmap, 0, sizeof(pxm_bitmap));	/* init proximity domain bitmap */
 	memset(node_memory_chunk, 0, sizeof(node_memory_chunk));
-	memset(zholes_size, 0, sizeof(zholes_size));
 
 	num_memory_chunks = 0;
 	while (p < end) {
@@ -288,6 +241,7 @@ static int __init acpi20_parse_srat(stru
 		printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
 		       j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
 		node_read_chunk(chunk->nid, chunk);
+		add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn);
 	}
  
 	for_each_online_node(nid) {
@@ -396,57 +350,7 @@ int __init get_memcfg_from_srat(void)
 		return acpi20_parse_srat((struct acpi_table_srat *)header);
 	}
 out_err:
+	remove_all_active_ranges();
 	printk("failed to get NUMA memory information from SRAT table\n");
 	return 0;
 }
-
-/* For each node run the memory list to determine whether there are
- * any memory holes.  For each hole determine which ZONE they fall
- * into.
- *
- * NOTE#1: this requires knowledge of the zone boundries and so
- * _cannot_ be performed before those are calculated in setup_memory.
- * 
- * NOTE#2: we rely on the fact that the memory chunks are ordered by
- * start pfn number during setup.
- */
-static void __init get_zholes_init(void)
-{
-	int nid;
-	int c;
-	int first;
-	unsigned long end = 0;
-
-	for_each_online_node(nid) {
-		first = 1;
-		for (c = 0; c < num_memory_chunks; c++){
-			if (node_memory_chunk[c].nid == nid) {
-				if (first) {
-					end = node_memory_chunk[c].end_pfn;
-					first = 0;
-
-				} else {
-					/* Record any gap between this chunk
-					 * and the previous chunk on this node
-					 * against the zones it spans.
-					 */
-					chunk_to_zones(end,
-						node_memory_chunk[c].start_pfn,
-						&zholes_size[nid * MAX_NR_ZONES]);
-				}
-			}
-		}
-	}
-}
-
-unsigned long * __init get_zholes_size(int nid)
-{
-	if (!zholes_size_init) {
-		zholes_size_init++;
-		get_zholes_init();
-	}
-	if (nid >= MAX_NUMNODES || !node_online(nid))
-		printk("%s: nid = %d is invalid/offline. num_online_nodes = %d",
-		       __FUNCTION__, nid, num_online_nodes());
-	return &zholes_size[nid * MAX_NR_ZONES];
-}
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/mm/discontig.c linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/mm/discontig.c
--- linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/i386/mm/discontig.c	2006-04-27 03:19:25.000000000 +0100
+++ linux-2.6.17-rc3-mm1-103-x86_use_init_nodes/arch/i386/mm/discontig.c	2006-05-08 09:18:57.000000000 +0100
@@ -157,21 +157,6 @@ static void __init find_max_pfn_node(int
 		BUG();
 }
 
-/* Find the owning node for a pfn. */
-int early_pfn_to_nid(unsigned long pfn)
-{
-	int nid;
-
-	for_each_node(nid) {
-		if (node_end_pfn[nid] == 0)
-			break;
-		if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn)
-			return nid;
-	}
-
-	return 0;
-}
-
 /* 
  * Allocate memory for the pg_data_t for this node via a crude pre-bootmem
  * method.  For node zero take this from the bottom of memory, for
@@ -227,6 +212,8 @@ static unsigned long calculate_numa_rema
 	unsigned long pfn;
 
 	for_each_online_node(nid) {
+		unsigned old_end_pfn = node_end_pfn[nid];
+
 		/*
 		 * The acpi/srat node info can show hot-add memroy zones
 		 * where memory could be added but not currently present.
@@ -276,6 +263,7 @@ static unsigned long calculate_numa_rema
 
 		node_end_pfn[nid] -= size;
 		node_remap_start_pfn[nid] = node_end_pfn[nid];
+		shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]);
 	}
 	printk("Reserving total of %ld pages for numa KVA remap\n",
 			reserve_pages);
@@ -352,45 +340,20 @@ unsigned long __init setup_memory(void)
 void __init zone_sizes_init(void)
 {
 	int nid;
+	unsigned long max_dma_pfn;
 
-
-	for_each_online_node(nid) {
-		unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
-		unsigned long *zholes_size;
-		unsigned int max_dma;
-
-		unsigned long low = max_low_pfn;
-		unsigned long start = node_start_pfn[nid];
-		unsigned long high = node_end_pfn[nid];
-
-		max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-
-		if (node_has_online_mem(nid)){
-			if (start > low) {
-#ifdef CONFIG_HIGHMEM
-				BUG_ON(start > high);
-				zones_size[ZONE_HIGHMEM] = high - start;
-#endif
-			} else {
-				if (low < max_dma)
-					zones_size[ZONE_DMA] = low;
-				else {
-					BUG_ON(max_dma > low);
-					BUG_ON(low > high);
-					zones_size[ZONE_DMA] = max_dma;
-					zones_size[ZONE_NORMAL] = low - max_dma;
-#ifdef CONFIG_HIGHMEM
-					zones_size[ZONE_HIGHMEM] = high - low;
-#endif
-				}
-			}
+	/* If SRAT has not registered memory, register it now */
+	if (find_max_pfn_with_active_regions() == 0) {
+		for_each_online_node(nid) {
+			if (node_has_online_mem(nid))
+				add_active_range(nid, node_start_pfn[nid],
+							node_end_pfn[nid]);
 		}
-
-		zholes_size = get_zholes_size(nid);
-
-		free_area_init_node(nid, NODE_DATA(nid), zones_size, start,
-				zholes_size);
 	}
+
+	max_dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+	free_area_init_nodes(max_dma_pfn, max_dma_pfn,
+						max_low_pfn, highend_pfn);
 	return;
 }
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2006-05-08 14:11 UTC|newest]

Thread overview: 106+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-05-08 14:10 [PATCH 0/6] Sizing zones and holes in an architecture independent manner V6 Mel Gorman
2006-05-08 14:10 ` Mel Gorman
2006-05-08 14:10 ` Mel Gorman
2006-05-08 14:10 ` [PATCH 1/6] Introduce mechanism for registering active regions of memory Mel Gorman
2006-05-08 14:10   ` Mel Gorman
2006-05-08 14:10   ` Mel Gorman
2006-05-08 14:11 ` [PATCH 2/6] Have Power use add_active_range() and free_area_init_nodes() Mel Gorman
2006-05-08 14:11   ` Mel Gorman
2006-05-08 14:11   ` Mel Gorman
2006-05-08 14:11 ` Mel Gorman [this message]
2006-05-08 14:11   ` [PATCH 3/6] Have x86 use add_active_range() and free_area_init_nodes Mel Gorman
2006-05-08 14:11   ` Mel Gorman
2006-05-08 14:11 ` [PATCH 4/6] Have x86_64 " Mel Gorman
2006-05-08 14:11   ` Mel Gorman
2006-05-08 14:11   ` Mel Gorman
2006-05-20 20:59   ` Andrew Morton
2006-05-20 20:59     ` Andrew Morton
2006-05-20 20:59     ` Andrew Morton
2006-05-20 21:27     ` Andi Kleen
2006-05-20 21:27       ` Andi Kleen
2006-05-20 21:27       ` Andi Kleen
2006-05-20 21:40       ` Andrew Morton
2006-05-20 21:40         ` Andrew Morton
2006-05-20 21:40         ` Andrew Morton
2006-05-20 22:17         ` Andi Kleen
2006-05-20 22:17           ` Andi Kleen
2006-05-20 22:17           ` Andi Kleen
2006-05-20 22:54           ` Andrew Morton
2006-05-20 22:54             ` Andrew Morton
2006-05-20 22:54             ` Andrew Morton
2006-05-21 16:20       ` Mel Gorman
2006-05-21 16:20         ` Mel Gorman
2006-05-21 16:20         ` Mel Gorman
2006-05-21 15:50     ` Mel Gorman
2006-05-21 15:50       ` Mel Gorman
2006-05-21 15:50       ` Mel Gorman
2006-05-21 19:08       ` Andrew Morton
2006-05-21 19:08         ` Andrew Morton
2006-05-21 19:08         ` Andrew Morton
2006-05-21 22:23         ` Mel Gorman
2006-05-21 22:23           ` Mel Gorman
2006-05-21 22:23           ` Mel Gorman
2006-05-23 18:01     ` Mel Gorman
2006-05-23 18:01       ` Mel Gorman
2006-05-23 18:01       ` Mel Gorman
2006-05-08 14:12 ` [PATCH 5/6] Have ia64 " Mel Gorman
2006-05-08 14:12   ` Mel Gorman
2006-05-08 14:12   ` Mel Gorman
2006-05-15  3:31   ` Andrew Morton
2006-05-15  3:31     ` Andrew Morton
2006-05-15  3:31     ` Andrew Morton
2006-05-15  8:21     ` Andy Whitcroft
2006-05-15  8:21       ` Andy Whitcroft
2006-05-15  8:21       ` Andy Whitcroft
2006-05-15 10:00       ` Nick Piggin
2006-05-15 10:00         ` Nick Piggin
2006-05-15 10:00         ` Nick Piggin
2006-05-15 10:19         ` Andy Whitcroft
2006-05-15 10:19           ` Andy Whitcroft
2006-05-15 10:19           ` Andy Whitcroft
2006-05-15 10:29           ` KAMEZAWA Hiroyuki
2006-05-15 10:29             ` KAMEZAWA Hiroyuki
2006-05-15 10:29             ` KAMEZAWA Hiroyuki
2006-05-15 10:47             ` KAMEZAWA Hiroyuki
2006-05-15 10:47               ` KAMEZAWA Hiroyuki
2006-05-15 10:47               ` KAMEZAWA Hiroyuki
2006-05-15 11:02             ` Andy Whitcroft
2006-05-15 11:02               ` Andy Whitcroft
2006-05-15 11:02               ` Andy Whitcroft
2006-05-16  0:31             ` Nick Piggin
2006-05-16  0:31               ` Nick Piggin
2006-05-16  0:31               ` Nick Piggin
2006-05-16  1:34               ` KAMEZAWA Hiroyuki
2006-05-16  1:34                 ` KAMEZAWA Hiroyuki
2006-05-16  1:34                 ` KAMEZAWA Hiroyuki
2006-05-16  2:11                 ` Nick Piggin
2006-05-16  2:11                   ` Nick Piggin
2006-05-16  2:11                   ` Nick Piggin
2006-05-15 12:27     ` Mel Gorman
2006-05-15 12:27       ` Mel Gorman
2006-05-15 12:27       ` Mel Gorman
2006-05-15 22:44       ` Mel Gorman
2006-05-15 22:44         ` Mel Gorman
2006-05-15 22:44         ` Mel Gorman
2006-05-19 14:03     ` Mel Gorman
2006-05-19 14:03       ` Mel Gorman
2006-05-19 14:03       ` Mel Gorman
2006-05-19 14:23       ` Andy Whitcroft
2006-05-19 14:23         ` Andy Whitcroft
2006-05-19 14:23         ` Andy Whitcroft
2006-05-08 14:12 ` [PATCH 6/6] Break out memory initialisation code from page_alloc.c to mem_init.c Mel Gorman
2006-05-08 14:12   ` Mel Gorman
2006-05-08 14:12   ` Mel Gorman
2006-05-09  1:47   ` Nick Piggin
2006-05-09  1:47     ` Nick Piggin
2006-05-09  1:47     ` Nick Piggin
2006-05-09  8:24     ` Mel Gorman
2006-05-09  8:24       ` Mel Gorman
2006-05-09  8:24       ` Mel Gorman
  -- strict thread matches above, loose matches on Subject: below --
2006-08-21 13:45 [PATCH 0/6] Sizing zones and holes in an architecture independent manner V9 Mel Gorman
2006-08-21 13:46 ` [PATCH 3/6] Have x86 use add_active_range() and free_area_init_nodes Mel Gorman
2006-08-21 13:46   ` Mel Gorman
2006-08-21 13:46   ` Mel Gorman
2006-07-08 11:10 [PATCH 0/6] Sizing zones and holes in an architecture independent manner V8 Mel Gorman
2006-07-08 11:11 ` [PATCH 3/6] Have x86 use add_active_range() and free_area_init_nodes Mel Gorman
2006-07-08 11:11   ` Mel Gorman
2006-07-08 11:11   ` Mel Gorman
2006-04-11 10:39 [PATCH 0/6] [RFC] Sizing zones and holes in an architecture independent manner Mel Gorman
2006-04-11 10:40 ` [PATCH 3/6] Have x86 use add_active_range() and free_area_init_nodes Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060508141131.26912.92202.sendpatchset@skynet \
    --to=mel@csn.ul.ie \
    --cc=ak@suse.de \
    --cc=akpm@osdl.org \
    --cc=bob.picco@hp.com \
    --cc=davej@codemonkey.org.uk \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linuxppc-dev@ozlabs.org \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.