linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4] [RFC] Verification and debugging of memory initialisation
@ 2008-04-16 13:50 Mel Gorman
  2008-04-16 13:51 ` [PATCH 1/4] Add a basic debugging framework for " Mel Gorman
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Mel Gorman @ 2008-04-16 13:50 UTC (permalink / raw)
  To: linux-mm; +Cc: Mel Gorman, mingo, linux-kernel

Boot initialisation has always been a bit of a mess with a number
of ugly points. While significant amounts of the initialisation
is architecture-independent, it trusts of the data received from the
architecture layer. This was a mistake in retrospect as it has resulted in
a number of difficult-to-diagnose bugs.

This patchset is an RFC to add some validation and tracing to memory
initialisation. It also introduces a few basic defencive measures and
depending on a boot parameter, will perform additional tests for errors
"that should never occur". I think this would have reduced debugging time
for some boot-related problems. The last part of the patchset is a similar
fix for the patch "[patch] mm: sparsemem memory_present() memory corruption"
that corrects a few more areas where similar errors were made.

I'm not looking to merge this as-is obviously but are there opinions on
whether this is a good idea in principal? Should it be done differently or
not at all?

-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/4] Add a basic debugging framework for memory initialisation
  2008-04-16 13:50 [PATCH 0/4] [RFC] Verification and debugging of memory initialisation Mel Gorman
@ 2008-04-16 13:51 ` Mel Gorman
  2008-04-16 14:04   ` Ingo Molnar
  2008-04-16 13:51 ` [PATCH 2/4] Verify the page links and memory model Mel Gorman
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 11+ messages in thread
From: Mel Gorman @ 2008-04-16 13:51 UTC (permalink / raw)
  To: linux-mm; +Cc: Mel Gorman, mingo, linux-kernel

This patch creates a new file mm/mm_init.c which memory initialisation should
be moved to over time to avoid further polluting page_alloc.c. This patch
introduces a simple mminit_debug_printk() function and an (undocumented)
mminit_debug_level commmand-line parameter for setting the level of tracing
and verification that should be done.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
---

 mm/Makefile     |    2 +-
 mm/internal.h   |    9 +++++++++
 mm/mm_init.c    |   40 ++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c |   16 ++++++++++------
 4 files changed, 60 insertions(+), 7 deletions(-)

diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.25-rc9-clean/mm/internal.h linux-2.6.25-rc9-0010_mminit_debug_framework/mm/internal.h
--- linux-2.6.25-rc9-clean/mm/internal.h	2008-04-11 21:32:29.000000000 +0100
+++ linux-2.6.25-rc9-0010_mminit_debug_framework/mm/internal.h	2008-04-16 14:44:19.000000000 +0100
@@ -60,4 +60,13 @@ static inline unsigned long page_order(s
 #define __paginginit __init
 #endif
 
+/* Memory initilisation debug and verification */
+enum mminit_levels {
+	MMINIT_NORMAL,
+	MMINIT_VERIFY,
+	MMINIT_TRACE
+};
+
+extern void mminit_debug_printk(unsigned int level, const char *prefix,
+				const char *fmt, ...);
 #endif
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.25-rc9-clean/mm/Makefile linux-2.6.25-rc9-0010_mminit_debug_framework/mm/Makefile
--- linux-2.6.25-rc9-clean/mm/Makefile	2008-04-11 21:32:29.000000000 +0100
+++ linux-2.6.25-rc9-0010_mminit_debug_framework/mm/Makefile	2008-04-16 14:44:19.000000000 +0100
@@ -11,7 +11,7 @@ obj-y			:= bootmem.o filemap.o mempool.o
 			   page_alloc.o page-writeback.o pdflush.o \
 			   readahead.o swap.o truncate.o vmscan.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   page_isolation.o $(mmu-y)
+			   page_isolation.o mm_init.o $(mmu-y)
 
 obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
 obj-$(CONFIG_BOUNCE)	+= bounce.o
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.25-rc9-clean/mm/mm_init.c linux-2.6.25-rc9-0010_mminit_debug_framework/mm/mm_init.c
--- linux-2.6.25-rc9-clean/mm/mm_init.c	2008-04-16 10:42:54.000000000 +0100
+++ linux-2.6.25-rc9-0010_mminit_debug_framework/mm/mm_init.c	2008-04-16 14:44:19.000000000 +0100
@@ -0,0 +1,40 @@
+/*
+ * mm_init.c - Memory initialisation verification and debugging
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+int __initdata mminit_debug_level;
+
+#define MMINIT_BUF_LEN 256
+
+void __meminit mminit_debug_printk(unsigned int level, const char *prefix,
+			const char *fmt, ...)
+{
+	char s[MMINIT_BUF_LEN];
+	va_list args;
+	unsigned int len;
+
+	WARN_ON(!prefix);
+	if (level < mminit_debug_level) {
+		len = snprintf(s, MMINIT_BUF_LEN, KERN_INFO "mminit::%s ",
+								prefix);
+
+		va_start(args, fmt);
+		len += vsnprintf(&s[len], (MMINIT_BUF_LEN - len), fmt, args);
+		va_end(args);
+
+		printk(s);
+
+		WARN_ON(len < 5);
+		WARN_ON(len == MMINIT_BUF_LEN);
+	}
+}
+
+static __init int set_mminit_debug_level(char *str)
+{
+	get_option(&str, &mminit_debug_level);
+	return 0;
+}
+early_param("mminit_debug_level", set_mminit_debug_level);
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.25-rc9-clean/mm/page_alloc.c linux-2.6.25-rc9-0010_mminit_debug_framework/mm/page_alloc.c
--- linux-2.6.25-rc9-clean/mm/page_alloc.c	2008-04-11 21:32:29.000000000 +0100
+++ linux-2.6.25-rc9-0010_mminit_debug_framework/mm/page_alloc.c	2008-04-16 14:44:19.000000000 +0100
@@ -2958,7 +2958,8 @@ void __init sparse_memory_present_with_a
 void __init push_node_boundaries(unsigned int nid,
 		unsigned long start_pfn, unsigned long end_pfn)
 {
-	printk(KERN_DEBUG "Entering push_node_boundaries(%u, %lu, %lu)\n",
+	mminit_debug_printk(MMINIT_TRACE, "zoneboundary",
+			"Entering push_node_boundaries(%u, %lu, %lu)\n",
 			nid, start_pfn, end_pfn);
 
 	/* Initialise the boundary for this node if necessary */
@@ -2976,7 +2977,8 @@ void __init push_node_boundaries(unsigne
 static void __meminit account_node_boundary(unsigned int nid,
 		unsigned long *start_pfn, unsigned long *end_pfn)
 {
-	printk(KERN_DEBUG "Entering account_node_boundary(%u, %lu, %lu)\n",
+	mminit_debug_printk(MMINIT_TRACE, "zoneboundary",
+			"Entering account_node_boundary(%u, %lu, %lu)\n",
 			nid, *start_pfn, *end_pfn);
 
 	/* Return if boundary information has not been provided */
@@ -3350,8 +3352,8 @@ static void __paginginit free_area_init_
 		memmap_pages = (size * sizeof(struct page)) >> PAGE_SHIFT;
 		if (realsize >= memmap_pages) {
 			realsize -= memmap_pages;
-			printk(KERN_DEBUG
-				"  %s zone: %lu pages used for memmap\n",
+			mminit_debug_printk(MMINIT_TRACE, "memmap_init",
+				"%s zone: %lu pages used for memmap\n",
 				zone_names[j], memmap_pages);
 		} else
 			printk(KERN_WARNING
@@ -3361,7 +3363,8 @@ static void __paginginit free_area_init_
 		/* Account for reserved pages */
 		if (j == 0 && realsize > dma_reserve) {
 			realsize -= dma_reserve;
-			printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
+			mminit_debug_printk(MMINIT_TRACE, "memmap_init",
+					"%s zone: %lu pages reserved\n",
 					zone_names[0], dma_reserve);
 		}
 
@@ -3496,7 +3499,8 @@ void __init add_active_range(unsigned in
 {
 	int i;
 
-	printk(KERN_DEBUG "Entering add_active_range(%d, %lu, %lu) "
+	mminit_debug_printk(MMINIT_TRACE, "memory_register",
+			"Entering add_active_range(%d, %lu, %lu) "
 			  "%d entries of %d used\n",
 			  nid, start_pfn, end_pfn,
 			  nr_nodemap_entries, MAX_ACTIVE_REGIONS);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 2/4] Verify the page links and memory model
  2008-04-16 13:50 [PATCH 0/4] [RFC] Verification and debugging of memory initialisation Mel Gorman
  2008-04-16 13:51 ` [PATCH 1/4] Add a basic debugging framework for " Mel Gorman
@ 2008-04-16 13:51 ` Mel Gorman
  2008-04-16 19:12   ` Christoph Lameter
  2008-04-16 13:51 ` [PATCH 3/4] Print out the zonelists on request for manual verification Mel Gorman
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 11+ messages in thread
From: Mel Gorman @ 2008-04-16 13:51 UTC (permalink / raw)
  To: linux-mm; +Cc: Mel Gorman, mingo, linux-kernel

This patch prints out information on how the page flags are being used and
verifies they are correct if mminit_debug_level is MMINIT_VERIFY or higher.
When the page flags are updated with section, node and zone information, an
additional check is made to ensure the values can be retrieved correctly. The
final check made with respect to pages is that pfn_to_page() and page_to_pfn()
are returning sensible values.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
---

 mm/internal.h   |    3 ++
 mm/mm_init.c    |   65 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 mm/page_alloc.c |    6 ++++
 3 files changed, 73 insertions(+), 1 deletion(-)

diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.25-rc9-0010_mminit_debug_framework/mm/internal.h linux-2.6.25-rc9-0020_memmap_init_debug/mm/internal.h
--- linux-2.6.25-rc9-0010_mminit_debug_framework/mm/internal.h	2008-04-16 14:44:19.000000000 +0100
+++ linux-2.6.25-rc9-0020_memmap_init_debug/mm/internal.h	2008-04-16 14:44:32.000000000 +0100
@@ -67,6 +67,9 @@ enum mminit_levels {
 	MMINIT_TRACE
 };
 
+extern void mminit_verify_pageflags(void);
+extern void mminit_verify_page_links(struct page *page, enum zone_type zone,
+				unsigned long nid, unsigned long pfn);
 extern void mminit_debug_printk(unsigned int level, const char *prefix,
 				const char *fmt, ...);
 #endif
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.25-rc9-0010_mminit_debug_framework/mm/mm_init.c linux-2.6.25-rc9-0020_memmap_init_debug/mm/mm_init.c
--- linux-2.6.25-rc9-0010_mminit_debug_framework/mm/mm_init.c	2008-04-16 14:44:19.000000000 +0100
+++ linux-2.6.25-rc9-0020_memmap_init_debug/mm/mm_init.c	2008-04-16 14:44:32.000000000 +0100
@@ -4,11 +4,74 @@
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include "internal.h"
 
 int __initdata mminit_debug_level;
 
 #define MMINIT_BUF_LEN 256
 
+void __init mminit_verify_pageflags(void)
+{
+	unsigned long shift = 0;
+	if (mminit_debug_level < MMINIT_VERIFY)
+		return;
+
+	mminit_debug_printk(MMINIT_TRACE, "pageflags_layout_widths",
+		"Section %d Node %d Zone %d Reserved %d\n",
+		SECTIONS_WIDTH,
+		NODES_WIDTH,
+		ZONES_WIDTH,
+		FLAGS_RESERVED);
+	mminit_debug_printk(MMINIT_TRACE, "pageflags_layout_shifts",
+		"Section %d Node %d Zone %d\n",
+#ifdef SECTION_SHIFT
+		SECTIONS_SHIFT,
+#else
+		0,
+#endif
+		NODES_SHIFT,
+		ZONES_SHIFT,
+		ZONEID_SHIFT);
+	mminit_debug_printk(MMINIT_TRACE, "pageflags_layout_offsets",
+		"Section %d Node %d Zone %d\n",
+		SECTIONS_PGSHIFT,
+		NODES_PGSHIFT,
+		ZONES_PGSHIFT);
+	mminit_debug_printk(MMINIT_TRACE, "pageflags_layout_zoneid",
+		"Zone ID: %d -> %d\n",
+		ZONEID_PGOFF, ZONEID_PGOFF + ZONEID_SHIFT);
+#ifdef NODE_NOT_IN_PAGE_FLAGS
+	mminit_debug_printk(MMINIT_TRACE, "pageflags_layout_nodeflags",
+		"Node not in page flags");
+#endif
+
+	shift = 8 * sizeof(unsigned long);
+	if (SECTIONS_WIDTH) {
+		shift -= SECTIONS_WIDTH;
+		BUG_ON(shift != SECTIONS_PGSHIFT);
+	}
+	if (NODES_WIDTH) {
+		shift -= NODES_WIDTH;
+		BUG_ON(shift != NODES_PGSHIFT);
+	}
+	if (ZONES_WIDTH) {
+		shift -= ZONES_WIDTH;
+		BUG_ON(shift != ZONES_PGSHIFT);
+	}
+	BUG_ON(ZONES_MASK & NODES_MASK & SECTIONS_MASK);
+}
+
+void __meminit mminit_verify_page_links(struct page *page, enum zone_type zone,
+			unsigned long nid, unsigned long pfn)
+{
+	if (mminit_debug_level < MMINIT_VERIFY)
+		return;
+
+	BUG_ON(page_to_nid(page) != nid);
+	BUG_ON(page_zone_id(page) != zone);
+	BUG_ON(page_to_pfn(page) != pfn);
+}
+
 void __meminit mminit_debug_printk(unsigned int level, const char *prefix,
 			const char *fmt, ...)
 {
@@ -28,7 +91,7 @@ void __meminit mminit_debug_printk(unsig
 		printk(s);
 
 		WARN_ON(len < 5);
-		WARN_ON(len == MMINIT_BUF_LEN);
+		WARN_ON(len > MMINIT_BUF_LEN);
 	}
 }
 
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.25-rc9-0010_mminit_debug_framework/mm/page_alloc.c linux-2.6.25-rc9-0020_memmap_init_debug/mm/page_alloc.c
--- linux-2.6.25-rc9-0010_mminit_debug_framework/mm/page_alloc.c	2008-04-16 14:44:19.000000000 +0100
+++ linux-2.6.25-rc9-0020_memmap_init_debug/mm/page_alloc.c	2008-04-16 14:44:32.000000000 +0100
@@ -2533,6 +2533,7 @@ void __meminit memmap_init_zone(unsigned
 		}
 		page = pfn_to_page(pfn);
 		set_page_links(page, zone, nid, pfn);
+		mminit_verify_page_links(page, zone, nid, pfn);
 		init_page_count(page);
 		reset_page_mapcount(page);
 		SetPageReserved(page);
@@ -2829,6 +2830,10 @@ __meminit int init_currently_empty_zone(
 
 	zone->zone_start_pfn = zone_start_pfn;
 
+	mminit_debug_printk(MMINIT_TRACE, "memmap_init",
+			"Initialising map node %d zone %d pfns %lu -> %lu\n",
+			pgdat->node_id, zone_idx(zone),
+			zone_start_pfn, (zone_start_pfn + size));
 	memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
 
 	zone_init_free_lists(zone);
@@ -3896,6 +3901,7 @@ void __init free_area_init_nodes(unsigne
 						early_node_map[i].end_pfn);
 
 	/* Initialise every node */
+	mminit_verify_pageflags();
 	setup_nr_node_ids();
 	for_each_online_node(nid) {
 		pg_data_t *pgdat = NODE_DATA(nid);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 3/4] Print out the zonelists on request for manual verification
  2008-04-16 13:50 [PATCH 0/4] [RFC] Verification and debugging of memory initialisation Mel Gorman
  2008-04-16 13:51 ` [PATCH 1/4] Add a basic debugging framework for " Mel Gorman
  2008-04-16 13:51 ` [PATCH 2/4] Verify the page links and memory model Mel Gorman
@ 2008-04-16 13:51 ` Mel Gorman
  2008-04-16 13:52 ` [PATCH 4/4] Make defencive checks around PFN values registered for memory usage Mel Gorman
  2008-04-16 14:00 ` [PATCH 0/4] [RFC] Verification and debugging of memory initialisation Ingo Molnar
  4 siblings, 0 replies; 11+ messages in thread
From: Mel Gorman @ 2008-04-16 13:51 UTC (permalink / raw)
  To: linux-mm; +Cc: Mel Gorman, mingo, linux-kernel

This patch prints out the zonelists during boot for manual verification by
the user. This is useful for checking if the zonelists were somehow corrupt
during initialisation.

Note that this patch will not work in -mm due to differences in how zonelists
are used. This is specific to how 2.6.25-rc9 works but a similar version for -mm
would be straight-forward enough.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
---

 mm/internal.h   |    1 +
 mm/mm_init.c    |   40 ++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c |    1 +
 3 files changed, 42 insertions(+)

diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.25-rc9-0020_memmap_init_debug/mm/internal.h linux-2.6.25-rc9-0030_display_zonelist/mm/internal.h
--- linux-2.6.25-rc9-0020_memmap_init_debug/mm/internal.h	2008-04-16 14:44:32.000000000 +0100
+++ linux-2.6.25-rc9-0030_display_zonelist/mm/internal.h	2008-04-16 14:44:46.000000000 +0100
@@ -67,6 +67,7 @@ enum mminit_levels {
 	MMINIT_TRACE
 };
 
+extern void mminit_verify_zonelist(void);
 extern void mminit_verify_pageflags(void);
 extern void mminit_verify_page_links(struct page *page, enum zone_type zone,
 				unsigned long nid, unsigned long pfn);
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.25-rc9-0020_memmap_init_debug/mm/mm_init.c linux-2.6.25-rc9-0030_display_zonelist/mm/mm_init.c
--- linux-2.6.25-rc9-0020_memmap_init_debug/mm/mm_init.c	2008-04-16 14:44:32.000000000 +0100
+++ linux-2.6.25-rc9-0030_display_zonelist/mm/mm_init.c	2008-04-16 14:44:46.000000000 +0100
@@ -10,6 +10,46 @@ int __initdata mminit_debug_level;
 
 #define MMINIT_BUF_LEN 256
 
+/* Note that the verification of correctness is required from the user */
+void mminit_verify_zonelist(void)
+{
+	int nid;
+
+	if (mminit_debug_level < MMINIT_VERIFY)
+		return;
+
+	for_each_online_node(nid) {
+		pg_data_t *pgdat = NODE_DATA(nid);
+		struct zone *zone;
+		struct zone **z;
+		int zoneid;
+
+		for (zoneid = 0; zoneid < MAX_ZONELISTS; zoneid++) {
+			zone = &pgdat->node_zones[zoneid];
+
+			if (!populated_zone(zone))
+				continue;
+
+			printk(KERN_INFO "Zonelist %s %d:%s = ",
+				zoneid >= MAX_NR_ZONES ? "thisnode" : "general",
+				nid,
+				zone->name);
+			z = pgdat->node_zonelists[zoneid].zones;
+
+			while (*z != NULL) {
+#ifdef CONFIG_NUMA
+				printk(KERN_INFO "%d:%s ",
+						(*z)->node, (*z)->name);
+#else
+				printk(KERN_INFO "0:%s ", (*z)->name);
+#endif
+				z++;
+			}
+			printk(KERN_INFO "\n");
+		}
+	}
+}
+
 void __init mminit_verify_pageflags(void)
 {
 	unsigned long shift = 0;
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.25-rc9-0020_memmap_init_debug/mm/page_alloc.c linux-2.6.25-rc9-0030_display_zonelist/mm/page_alloc.c
--- linux-2.6.25-rc9-0020_memmap_init_debug/mm/page_alloc.c	2008-04-16 14:44:32.000000000 +0100
+++ linux-2.6.25-rc9-0030_display_zonelist/mm/page_alloc.c	2008-04-16 14:44:46.000000000 +0100
@@ -2353,6 +2353,7 @@ void build_all_zonelists(void)
 
 	if (system_state == SYSTEM_BOOTING) {
 		__build_all_zonelists(NULL);
+		mminit_verify_zonelist();
 		cpuset_init_current_mems_allowed();
 	} else {
 		/* we have to stop all cpus to guarantee there is no user

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 4/4] Make defencive checks around PFN values registered for memory usage
  2008-04-16 13:50 [PATCH 0/4] [RFC] Verification and debugging of memory initialisation Mel Gorman
                   ` (2 preceding siblings ...)
  2008-04-16 13:51 ` [PATCH 3/4] Print out the zonelists on request for manual verification Mel Gorman
@ 2008-04-16 13:52 ` Mel Gorman
  2008-04-16 14:02   ` Ingo Molnar
  2008-04-16 14:00 ` [PATCH 0/4] [RFC] Verification and debugging of memory initialisation Ingo Molnar
  4 siblings, 1 reply; 11+ messages in thread
From: Mel Gorman @ 2008-04-16 13:52 UTC (permalink / raw)
  To: linux-mm; +Cc: Mel Gorman, mingo, linux-kernel

There are a number of different views to how much memory is currently
active. There is the arch-independent zone-sizing view, the bootmem allocator
and SPARSEMEMs view.  Architectures register this information at different
times and is not necessarily in sync particularly with view to some SPARSEMEM
limitations.

This patch introduces mminit_validate_physlimits() which is able to validate
and correct PFN ranges with respect to SPARSEMEM limitations. Ordinarily
they will be fixed silently but if mminit_debug_level is MMINIT_VERIFY or
higher, a message will be printed to dmesg.

This fixes the same problem as fixed by "[patch] mm: sparsemem
memory_present() memory corruption fix" in a slightly different way. This
patch would obviously be rebased on top of that fix.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
---

 mm/bootmem.c    |    1 +
 mm/internal.h   |    9 +++++++++
 mm/page_alloc.c |    2 ++
 mm/sparse.c     |   24 ++++++++++++++++++++++++
 4 files changed, 36 insertions(+)

diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.25-rc9-0030_display_zonelist/mm/bootmem.c linux-2.6.25-rc9-0040_defensive_pfn_checks/mm/bootmem.c
--- linux-2.6.25-rc9-0030_display_zonelist/mm/bootmem.c	2008-04-11 21:32:29.000000000 +0100
+++ linux-2.6.25-rc9-0040_defensive_pfn_checks/mm/bootmem.c	2008-04-16 14:45:08.000000000 +0100
@@ -91,6 +91,7 @@ static unsigned long __init init_bootmem
 	bootmem_data_t *bdata = pgdat->bdata;
 	unsigned long mapsize;
 
+	mminit_validate_physlimits(&start, &end);
 	bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
 	bdata->node_boot_start = PFN_PHYS(start);
 	bdata->node_low_pfn = end;
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.25-rc9-0030_display_zonelist/mm/internal.h linux-2.6.25-rc9-0040_defensive_pfn_checks/mm/internal.h
--- linux-2.6.25-rc9-0030_display_zonelist/mm/internal.h	2008-04-16 14:44:46.000000000 +0100
+++ linux-2.6.25-rc9-0040_defensive_pfn_checks/mm/internal.h	2008-04-16 14:45:08.000000000 +0100
@@ -67,6 +67,15 @@ enum mminit_levels {
 	MMINIT_TRACE
 };
 
+#ifdef CONFIG_SPARSEMEM
+extern void mminit_validate_physlimits(unsigned long *start_pfn,
+				unsigned long *end_pfn);
+#else
+static inline void mminit_validate_physlimits(unsigned long *start_pfn,
+				unsigned long *end_pfn)
+{
+}
+#endif /* CONFIG_SPARSEMEM */
 extern void mminit_verify_zonelist(void);
 extern void mminit_verify_pageflags(void);
 extern void mminit_verify_page_links(struct page *page, enum zone_type zone,
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.25-rc9-0030_display_zonelist/mm/page_alloc.c linux-2.6.25-rc9-0040_defensive_pfn_checks/mm/page_alloc.c
--- linux-2.6.25-rc9-0030_display_zonelist/mm/page_alloc.c	2008-04-16 14:44:46.000000000 +0100
+++ linux-2.6.25-rc9-0040_defensive_pfn_checks/mm/page_alloc.c	2008-04-16 14:45:08.000000000 +0100
@@ -3511,6 +3511,8 @@ void __init add_active_range(unsigned in
 			  nid, start_pfn, end_pfn,
 			  nr_nodemap_entries, MAX_ACTIVE_REGIONS);
 
+	mminit_validate_physlimits(&start_pfn, &end_pfn);
+
 	/* Merge with existing active regions if possible */
 	for (i = 0; i < nr_nodemap_entries; i++) {
 		if (early_node_map[i].nid != nid)
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.25-rc9-0030_display_zonelist/mm/sparse.c linux-2.6.25-rc9-0040_defensive_pfn_checks/mm/sparse.c
--- linux-2.6.25-rc9-0030_display_zonelist/mm/sparse.c	2008-04-11 21:32:29.000000000 +0100
+++ linux-2.6.25-rc9-0040_defensive_pfn_checks/mm/sparse.c	2008-04-16 14:45:08.000000000 +0100
@@ -11,6 +11,7 @@
 #include <asm/dma.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
+#include "internal.h"
 
 /*
  * Permanent SPARSEMEM data:
@@ -146,12 +147,34 @@ static inline int sparse_early_nid(struc
 	return (section->section_mem_map >> SECTION_NID_SHIFT);
 }
 
+/* Validate the physical addressing limitations of the model */
+void __meminit mminit_validate_physlimits(unsigned long *start_pfn,
+						unsigned long *end_pfn)
+{
+	unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
+	if (*start_pfn > max_sparsemem_pfn) {
+		mminit_debug_printk(MMINIT_VERIFY, "pfnvalidation",
+			"Start of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
+			*start_pfn, *end_pfn, max_sparsemem_pfn);
+		*start_pfn = max_sparsemem_pfn;
+		*end_pfn = max_sparsemem_pfn;
+	}
+
+	if (*end_pfn > max_sparsemem_pfn) {
+		mminit_debug_printk(MMINIT_VERIFY, "pfnvalidation",
+			"End of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
+			*start_pfn, *end_pfn, max_sparsemem_pfn);
+		*end_pfn = max_sparsemem_pfn;
+	}
+}
+
 /* Record a memory area against a node. */
 void __init memory_present(int nid, unsigned long start, unsigned long end)
 {
 	unsigned long pfn;
 
 	start &= PAGE_SECTION_MASK;
+	mminit_validate_physlimits(&start, &end);
 	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
 		unsigned long section = pfn_to_section_nr(pfn);
 		struct mem_section *ms;
@@ -176,6 +199,7 @@ unsigned long __init node_memmap_size_by
 	unsigned long pfn;
 	unsigned long nr_pages = 0;
 
+	mminit_validate_physlimits(&start_pfn, &end_pfn);
 	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
 		if (nid != early_pfn_to_nid(pfn))
 			continue;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/4] [RFC] Verification and debugging of memory initialisation
  2008-04-16 13:50 [PATCH 0/4] [RFC] Verification and debugging of memory initialisation Mel Gorman
                   ` (3 preceding siblings ...)
  2008-04-16 13:52 ` [PATCH 4/4] Make defencive checks around PFN values registered for memory usage Mel Gorman
@ 2008-04-16 14:00 ` Ingo Molnar
  4 siblings, 0 replies; 11+ messages in thread
From: Ingo Molnar @ 2008-04-16 14:00 UTC (permalink / raw)
  To: Mel Gorman; +Cc: linux-mm, linux-kernel, Andrew Morton

* Mel Gorman <mel@csn.ul.ie> wrote:

> Boot initialisation has always been a bit of a mess with a number of 
> ugly points. While significant amounts of the initialisation is 
> architecture-independent, it trusts of the data received from the 
> architecture layer. This was a mistake in retrospect as it has 
> resulted in a number of difficult-to-diagnose bugs.
> 
> This patchset is an RFC to add some validation and tracing to memory 
> initialisation. It also introduces a few basic defencive measures and 
> depending on a boot parameter, will perform additional tests for 
> errors "that should never occur". I think this would have reduced 
> debugging time for some boot-related problems. The last part of the 
> patchset is a similar fix for the patch "[patch] mm: sparsemem 
> memory_present() memory corruption" that corrects a few more areas 
> where similar errors were made.
> 
> I'm not looking to merge this as-is obviously but are there opinions 
> on whether this is a good idea in principal? Should it be done 
> differently or not at all?

very nice stuff!

  Acked-by: Ingo Molnar <mingo@elte.hu>

or rather:

  Very-Strongly-Acked-by: Ingo Molnar <mingo@elte.hu>

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/4] Make defencive checks around PFN values registered for memory usage
  2008-04-16 13:52 ` [PATCH 4/4] Make defencive checks around PFN values registered for memory usage Mel Gorman
@ 2008-04-16 14:02   ` Ingo Molnar
  0 siblings, 0 replies; 11+ messages in thread
From: Ingo Molnar @ 2008-04-16 14:02 UTC (permalink / raw)
  To: Mel Gorman; +Cc: linux-mm, linux-kernel

* Mel Gorman <mel@csn.ul.ie> wrote:

> +	if (*start_pfn > max_sparsemem_pfn) {
> +		mminit_debug_printk(MMINIT_VERIFY, "pfnvalidation",
> +			"Start of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
> +			*start_pfn, *end_pfn, max_sparsemem_pfn);

small request: please emit a WARN_ON_ONCE() as well, so that 
kerneloops.org (and automated test setups) picks it up.

> +		mminit_debug_printk(MMINIT_VERIFY, "pfnvalidation",
> +			"End of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
> +			*start_pfn, *end_pfn, max_sparsemem_pfn);

ditto - all errors should be fixed up and we should try to continue as 
far as possible, but emitting a WARN_ON_ONCE() will be very useful in 
making sure people notice the warning.

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] Add a basic debugging framework for memory initialisation
  2008-04-16 13:51 ` [PATCH 1/4] Add a basic debugging framework for " Mel Gorman
@ 2008-04-16 14:04   ` Ingo Molnar
  2008-04-16 19:30     ` Mel Gorman
  0 siblings, 1 reply; 11+ messages in thread
From: Ingo Molnar @ 2008-04-16 14:04 UTC (permalink / raw)
  To: Mel Gorman; +Cc: linux-mm, linux-kernel

* Mel Gorman <mel@csn.ul.ie> wrote:

> +static __init int set_mminit_debug_level(char *str)
> +{
> +	get_option(&str, &mminit_debug_level);
> +	return 0;
> +}
> +early_param("mminit_debug_level", set_mminit_debug_level);

another small suggestion: could you please also add a Kconfig method of 
enabling it, dependent on KERNEL_DEBUG, default-off (for now). The best 
would be not a numeric switch but something that gets randomized by 
"make randconfig". I.e. an on/off switch kind of things.

	Ingo

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/4] Verify the page links and memory model
  2008-04-16 13:51 ` [PATCH 2/4] Verify the page links and memory model Mel Gorman
@ 2008-04-16 19:12   ` Christoph Lameter
  2008-04-16 20:16     ` Mel Gorman
  0 siblings, 1 reply; 11+ messages in thread
From: Christoph Lameter @ 2008-04-16 19:12 UTC (permalink / raw)
  To: Mel Gorman; +Cc: linux-mm, mingo, linux-kernel

On Wed, 16 Apr 2008, Mel Gorman wrote:

> +		FLAGS_RESERVED);

FLAGS_RESERVED no longer exists in mm. Its dynamically calculated.

It may be useful to print out NR_PAGEFLAGS instead and show the area in 
the middle of page flags that is left unused and that may be used by 
arches such as sparc64.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] Add a basic debugging framework for memory initialisation
  2008-04-16 14:04   ` Ingo Molnar
@ 2008-04-16 19:30     ` Mel Gorman
  0 siblings, 0 replies; 11+ messages in thread
From: Mel Gorman @ 2008-04-16 19:30 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-mm, linux-kernel

On (16/04/08 16:04), Ingo Molnar didst pronounce:
> 
> * Mel Gorman <mel@csn.ul.ie> wrote:
> 
> > +static __init int set_mminit_debug_level(char *str)
> > +{
> > +	get_option(&str, &mminit_debug_level);
> > +	return 0;
> > +}
> > +early_param("mminit_debug_level", set_mminit_debug_level);
> 
> another small suggestion: could you please also add a Kconfig method of 
> enabling it, dependent on KERNEL_DEBUG, default-off (for now). The best 
> would be not a numeric switch but something that gets randomized by 
> "make randconfig". I.e. an on/off switch kind of things.
> 

Makes sense. I've this and your other suggestions incorporated. It'll be
part of V2.

-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/4] Verify the page links and memory model
  2008-04-16 19:12   ` Christoph Lameter
@ 2008-04-16 20:16     ` Mel Gorman
  0 siblings, 0 replies; 11+ messages in thread
From: Mel Gorman @ 2008-04-16 20:16 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: linux-mm, mingo, linux-kernel

On (16/04/08 12:12), Christoph Lameter didst pronounce:
> On Wed, 16 Apr 2008, Mel Gorman wrote:
> 
> > +		FLAGS_RESERVED);
> 
> FLAGS_RESERVED no longer exists in mm. Its dynamically calculated.
> 
> It may be useful to print out NR_PAGEFLAGS instead and show the area in 
> the middle of page flags that is left unused and that may be used by 
> arches such as sparc64.
> 

That's a good point. I'll do that on a version I rebase to -mm. V2 will
still be based on mainline.

Thanks.

-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-04-16 20:16 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-04-16 13:50 [PATCH 0/4] [RFC] Verification and debugging of memory initialisation Mel Gorman
2008-04-16 13:51 ` [PATCH 1/4] Add a basic debugging framework for " Mel Gorman
2008-04-16 14:04   ` Ingo Molnar
2008-04-16 19:30     ` Mel Gorman
2008-04-16 13:51 ` [PATCH 2/4] Verify the page links and memory model Mel Gorman
2008-04-16 19:12   ` Christoph Lameter
2008-04-16 20:16     ` Mel Gorman
2008-04-16 13:51 ` [PATCH 3/4] Print out the zonelists on request for manual verification Mel Gorman
2008-04-16 13:52 ` [PATCH 4/4] Make defencive checks around PFN values registered for memory usage Mel Gorman
2008-04-16 14:02   ` Ingo Molnar
2008-04-16 14:00 ` [PATCH 0/4] [RFC] Verification and debugging of memory initialisation Ingo Molnar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).