linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [RFC 1/3 repost to correct ML] mm: more intensive memory corruption debug
@ 2011-10-17 14:24 Stanislaw Gruszka
  2011-10-17 14:24 ` [RFC 2/3] PM / Hibernate : do not count debug pages as savable Stanislaw Gruszka
  2011-10-17 14:24 ` [RFC 3/3] slub: min order when corrupt_dbg Stanislaw Gruszka
  0 siblings, 2 replies; 4+ messages in thread
From: Stanislaw Gruszka @ 2011-10-17 14:24 UTC (permalink / raw)
  To: linux-mm
  Cc: linux-kernel, Mel Gorman, Andrea Arcangeli, Andrew Morton,
	Rafael J. Wysocki, Christoph Lameter, Stanislaw Gruszka

With CONFIG_DEBUG_PAGEALLOC configured, cpu will generate exception on
access (read,write) to not allocated page, what allow to catch code
which corrupt memory. However kernel is trying to maximalise memory
usage, hence there is usually not much free pages in the system and
buggy code usually corrupt some crucial data.

This patch change buddy allocator to keep more free/protected pages
and interlace free/protected and allocated pages to increase probability
of catch a corruption.

When kernel is compiled with CONFIG_DEBUG_PAGEALLOC, corrupt_dbg
parameter is available to specify page order that should be kept free.

I.e:

* corrupt_dbg=1:
  - order=0 allocation will result of 1 page allocated and 1 consecutive
    page protected
  - order > 0 allocations are not affected
* corrupt_dbg=2
  - order=0 allocation will result 1 allocated page and 3 consecutive
    pages protected
  - order=1 allocation will result 2 allocated pages and 2 consecutive
    pages protected
  - order > 1 allocations are not affected
* and so on

Probably only practical usage is corrupt_dbg=1, as long someone is not
really desperate by memory corruption bug and have huge amount of RAM.

Patch should not cause any executable code change when kernel is
compiled without CONFIG_DEBUG_PAGEALLOC, but I did not check that yet.

There some issues with patch:

- With corrupt_dbg=1 I expect /proc/buddyinfo will always show 0-order
  allocation count equal to 0, however this is not true, sometimes this
  value shows 1, I'm not able to explain that.

- When dropping caches system may hang on:

  RIP: 0010:[<ffffffff81266337>]  [<ffffffff81266337>] radix_tree_gang_lookup_slot+0x47/0xf0
  Call Trace:
    [<ffffffff8111ecd0>] find_get_pages+0x70/0x1b0
    [<ffffffff8111ec60>] ? find_get_pages_contig+0x180/0x180
    [<ffffffff811297b2>] pagevec_lookup+0x22/0x30
    [<ffffffff8112b404>] invalidate_mapping_pages+0x84/0x1e0
    [<ffffffff811ab797>] drop_pagecache_sb+0xb7/0xf0

 Not sure if this is problem of my patch, or pagevec_lookup has some
 corner case problem.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
 include/linux/mm.h               |   12 ++++++
 include/linux/page-debug-flags.h |    4 ++-
 mm/Kconfig.debug                 |    1 +
 mm/page_alloc.c                  |   69 +++++++++++++++++++++++++++++++++----
 4 files changed, 77 insertions(+), 9 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7438071..17e3658 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1627,5 +1627,17 @@ extern void copy_user_huge_page(struct page *dst, struct page *src,
 				unsigned int pages_per_huge_page);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+extern unsigned int _corrupt_dbg;
+
+
+static inline unsigned int corrupt_dbg(void)
+{
+	return _corrupt_dbg;
+}
+#else
+static inline unsigned int corrupt_dbg(void) { return 0; }
+#endif /* CONFIG_DEBUG_PAGEALLOC */
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/page-debug-flags.h b/include/linux/page-debug-flags.h
index b0638fd..f63c905 100644
--- a/include/linux/page-debug-flags.h
+++ b/include/linux/page-debug-flags.h
@@ -13,6 +13,7 @@
 
 enum page_debug_flags {
 	PAGE_DEBUG_FLAG_POISON,		/* Page is poisoned */
+	PAGE_DEBUG_FLAG_CORRUPT,
 };
 
 /*
@@ -21,7 +22,8 @@ enum page_debug_flags {
  */
 
 #ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
-#if !defined(CONFIG_PAGE_POISONING) \
+#if !defined(CONFIG_PAGE_POISONING) && \
+    !defined(CONFIG_DEBUG_PAGEALLOC) \
 /* && !defined(CONFIG_PAGE_DEBUG_SOMETHING_ELSE) && ... */
 #error WANT_PAGE_DEBUG_FLAGS is turned on with no debug features!
 #endif
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 8b1a477..3c554f0 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -4,6 +4,7 @@ config DEBUG_PAGEALLOC
 	depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC
 	depends on !KMEMCHECK
 	select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC
+	select WANT_PAGE_DEBUG_FLAGS
 	---help---
 	  Unmap pages from the kernel linear mapping after free_pages().
 	  This results in a large slowdown, but helps to find certain types
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6e8ecb6..8d18ae4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -57,6 +57,7 @@
 #include <linux/ftrace_event.h>
 #include <linux/memcontrol.h>
 #include <linux/prefetch.h>
+#include <linux/page-debug-flags.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -402,6 +403,39 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
 		clear_highpage(page + i);
 }
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+unsigned int _corrupt_dbg;
+
+static int __init corrupt_dbg_setup(char *buf)
+{
+	_corrupt_dbg = simple_strtoul(buf, &buf, 10);
+	/* FIXME: check range ? */
+	printk(KERN_INFO "Setting corrupt debug order to %d\n", _corrupt_dbg);
+	return 0;
+}
+__setup("corrupt_dbg=", corrupt_dbg_setup);
+
+static inline void set_page_corrupt_dbg(struct page *page)
+{
+	__set_bit(PAGE_DEBUG_FLAG_CORRUPT, &page->debug_flags);
+}
+
+static inline void clear_page_corrupt_dbg(struct page *page)
+{
+	__clear_bit(PAGE_DEBUG_FLAG_CORRUPT, &page->debug_flags);
+}
+
+static inline bool page_is_corrupt_dbg(struct page *page)
+{
+	return test_bit(PAGE_DEBUG_FLAG_CORRUPT, &page->debug_flags);
+}
+
+#else
+static inline void set_page_corrupt_dbg(struct page *page) { }
+static inline void clear_page_corrupt_dbg(struct page *page) { }
+static inline bool page_is_corrupt_dbg(struct page *page) { return false; }
+#endif
+
 static inline void set_page_order(struct page *page, int order)
 {
 	set_page_private(page, order);
@@ -459,6 +493,11 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
 	if (page_zone_id(page) != page_zone_id(buddy))
 		return 0;
 
+	if (page_is_corrupt_dbg(buddy) && page_order(buddy) == order) {
+		VM_BUG_ON(page_count(buddy) != 0);
+		return 1;
+	}
+
 	if (PageBuddy(buddy) && page_order(buddy) == order) {
 		VM_BUG_ON(page_count(buddy) != 0);
 		return 1;
@@ -517,9 +556,15 @@ static inline void __free_one_page(struct page *page,
 			break;
 
 		/* Our buddy is free, merge with it and move up one order. */
-		list_del(&buddy->lru);
-		zone->free_area[order].nr_free--;
-		rmv_page_order(buddy);
+		if (page_is_corrupt_dbg(buddy)) {
+			clear_page_corrupt_dbg(buddy);
+			set_page_private(page, 0);
+			__mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
+		} else {
+			list_del(&buddy->lru);
+			zone->free_area[order].nr_free--;
+			rmv_page_order(buddy);
+		}
 		combined_idx = buddy_idx & page_idx;
 		page = page + (combined_idx - page_idx);
 		page_idx = combined_idx;
@@ -735,7 +780,7 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
  * -- wli
  */
 static inline void expand(struct zone *zone, struct page *page,
-	int low, int high, struct free_area *area,
+	unsigned int low, unsigned int high, struct free_area *area,
 	int migratetype)
 {
 	unsigned long size = 1 << high;
@@ -745,9 +790,16 @@ static inline void expand(struct zone *zone, struct page *page,
 		high--;
 		size >>= 1;
 		VM_BUG_ON(bad_range(zone, &page[size]));
-		list_add(&page[size].lru, &area->free_list[migratetype]);
-		area->nr_free++;
-		set_page_order(&page[size], high);
+		if (high < corrupt_dbg()) {
+			INIT_LIST_HEAD(&page[size].lru);
+			set_page_corrupt_dbg(&page[size]);
+			set_page_private(&page[size], high);
+			__mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << high));
+		} else {
+			set_page_order(&page[size], high);
+			list_add(&page[size].lru, &area->free_list[migratetype]);
+			area->nr_free++;
+		}
 	}
 }
 
@@ -1756,7 +1808,8 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
 	va_list args;
 	unsigned int filter = SHOW_MEM_FILTER_NODES;
 
-	if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
+	if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) ||
+	    corrupt_dbg() > 0)
 		return;
 
 	/*
-- 
1.7.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [RFC 2/3] PM / Hibernate : do not count debug pages as savable
  2011-10-17 14:24 [RFC 1/3 repost to correct ML] mm: more intensive memory corruption debug Stanislaw Gruszka
@ 2011-10-17 14:24 ` Stanislaw Gruszka
  2011-10-17 22:16   ` Rafael J. Wysocki
  2011-10-17 14:24 ` [RFC 3/3] slub: min order when corrupt_dbg Stanislaw Gruszka
  1 sibling, 1 reply; 4+ messages in thread
From: Stanislaw Gruszka @ 2011-10-17 14:24 UTC (permalink / raw)
  To: linux-mm
  Cc: linux-kernel, Mel Gorman, Andrea Arcangeli, Andrew Morton,
	Rafael J. Wysocki, Christoph Lameter, Stanislaw Gruszka

When debugging memory corruption with CONFIG_DEBUG_PAGEALLOC and
corrupt_dbg > 0, we have lot of free pages that are not marked so.
Snapshot code account them as savable, what cause hibernate memory
preallocation failure.

It is pretty hard to make hibernate allocation succeed with
corrupt_dbg=1. This change at least make it possible when system has
relatively big amount of RAM.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
 include/linux/mm.h      |    7 ++++++-
 kernel/power/snapshot.c |    6 ++++++
 mm/page_alloc.c         |    6 ------
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 17e3658..651785b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1630,13 +1630,18 @@ extern void copy_user_huge_page(struct page *dst, struct page *src,
 #ifdef CONFIG_DEBUG_PAGEALLOC
 extern unsigned int _corrupt_dbg;
 
-
 static inline unsigned int corrupt_dbg(void)
 {
 	return _corrupt_dbg;
 }
+
+static inline bool page_is_corrupt_dbg(struct page *page)
+{
+	return test_bit(PAGE_DEBUG_FLAG_CORRUPT, &page->debug_flags);
+}
 #else
 static inline unsigned int corrupt_dbg(void) { return 0; }
+static inline bool page_is_corrupt_dbg(struct page *page) { return false; }
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 
 #endif /* __KERNEL__ */
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 06efa54..45cf1b1 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -858,6 +858,9 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
 	    PageReserved(page))
 		return NULL;
 
+	if (page_is_corrupt_dbg(page))
+		return NULL;
+
 	return page;
 }
 
@@ -920,6 +923,9 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn)
 	    && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
 		return NULL;
 
+	if (page_is_corrupt_dbg(page))
+		return NULL;
+
 	return page;
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8d18ae4..8a7770a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -425,15 +425,9 @@ static inline void clear_page_corrupt_dbg(struct page *page)
 	__clear_bit(PAGE_DEBUG_FLAG_CORRUPT, &page->debug_flags);
 }
 
-static inline bool page_is_corrupt_dbg(struct page *page)
-{
-	return test_bit(PAGE_DEBUG_FLAG_CORRUPT, &page->debug_flags);
-}
-
 #else
 static inline void set_page_corrupt_dbg(struct page *page) { }
 static inline void clear_page_corrupt_dbg(struct page *page) { }
-static inline bool page_is_corrupt_dbg(struct page *page) { return false; }
 #endif
 
 static inline void set_page_order(struct page *page, int order)
-- 
1.7.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [RFC 3/3] slub: min order when corrupt_dbg
  2011-10-17 14:24 [RFC 1/3 repost to correct ML] mm: more intensive memory corruption debug Stanislaw Gruszka
  2011-10-17 14:24 ` [RFC 2/3] PM / Hibernate : do not count debug pages as savable Stanislaw Gruszka
@ 2011-10-17 14:24 ` Stanislaw Gruszka
  1 sibling, 0 replies; 4+ messages in thread
From: Stanislaw Gruszka @ 2011-10-17 14:24 UTC (permalink / raw)
  To: linux-mm
  Cc: linux-kernel, Mel Gorman, Andrea Arcangeli, Andrew Morton,
	Rafael J. Wysocki, Christoph Lameter, Stanislaw Gruszka

Disable slub debug facilities and allocate slabs at minimal order when
corrupt_dbg > 0 to increase probability to catch random memory
corruption by cpu exception.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
 mm/slub.c |   10 ++++++++--
 1 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 7c54fe8..fec88b5 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2755,7 +2755,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 	unsigned long flags = s->flags;
 	unsigned long size = s->objsize;
 	unsigned long align = s->align;
-	int order;
+	int order, min_order;
 
 	/*
 	 * Round up object size to the next word boundary. We can only
@@ -2840,8 +2840,11 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 	 */
 	size = ALIGN(size, align);
 	s->size = size;
+	min_order = get_order(size);
 	if (forced_order >= 0)
 		order = forced_order;
+	else if (corrupt_dbg())
+		order = min_order;
 	else
 		order = calculate_order(size, s->reserved);
 
@@ -2862,7 +2865,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 	 * Determine the number of objects per slab
 	 */
 	s->oo = oo_make(order, size, s->reserved);
-	s->min = oo_make(get_order(size), size, s->reserved);
+	s->min = oo_make(min_order, size, s->reserved);
 	if (oo_objects(s->oo) > oo_objects(s->max))
 		s->max = s->oo;
 
@@ -3529,6 +3532,9 @@ void __init kmem_cache_init(void)
 	struct kmem_cache *temp_kmem_cache_node;
 	unsigned long kmalloc_size;
 
+	if (corrupt_dbg())
+		slub_debug = 0;
+
 	kmem_size = offsetof(struct kmem_cache, node) +
 				nr_node_ids * sizeof(struct kmem_cache_node *);
 
-- 
1.7.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [RFC 2/3] PM / Hibernate : do not count debug pages as savable
  2011-10-17 14:24 ` [RFC 2/3] PM / Hibernate : do not count debug pages as savable Stanislaw Gruszka
@ 2011-10-17 22:16   ` Rafael J. Wysocki
  0 siblings, 0 replies; 4+ messages in thread
From: Rafael J. Wysocki @ 2011-10-17 22:16 UTC (permalink / raw)
  To: Stanislaw Gruszka
  Cc: linux-mm, linux-kernel, Mel Gorman, Andrea Arcangeli,
	Andrew Morton, Christoph Lameter

On Monday, October 17, 2011, Stanislaw Gruszka wrote:
> When debugging memory corruption with CONFIG_DEBUG_PAGEALLOC and
> corrupt_dbg > 0, we have lot of free pages that are not marked so.
> Snapshot code account them as savable, what cause hibernate memory
> preallocation failure.
> 
> It is pretty hard to make hibernate allocation succeed with
> corrupt_dbg=1. This change at least make it possible when system has
> relatively big amount of RAM.
> 
> Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>

Acked-by: Rafael J. Wysocki <rjw@sisk.pl>

> ---
>  include/linux/mm.h      |    7 ++++++-
>  kernel/power/snapshot.c |    6 ++++++
>  mm/page_alloc.c         |    6 ------
>  3 files changed, 12 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 17e3658..651785b 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1630,13 +1630,18 @@ extern void copy_user_huge_page(struct page *dst, struct page *src,
>  #ifdef CONFIG_DEBUG_PAGEALLOC
>  extern unsigned int _corrupt_dbg;
>  
> -
>  static inline unsigned int corrupt_dbg(void)
>  {
>  	return _corrupt_dbg;
>  }
> +
> +static inline bool page_is_corrupt_dbg(struct page *page)
> +{
> +	return test_bit(PAGE_DEBUG_FLAG_CORRUPT, &page->debug_flags);
> +}
>  #else
>  static inline unsigned int corrupt_dbg(void) { return 0; }
> +static inline bool page_is_corrupt_dbg(struct page *page) { return false; }
>  #endif /* CONFIG_DEBUG_PAGEALLOC */
>  
>  #endif /* __KERNEL__ */
> diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
> index 06efa54..45cf1b1 100644
> --- a/kernel/power/snapshot.c
> +++ b/kernel/power/snapshot.c
> @@ -858,6 +858,9 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
>  	    PageReserved(page))
>  		return NULL;
>  
> +	if (page_is_corrupt_dbg(page))
> +		return NULL;
> +
>  	return page;
>  }
>  
> @@ -920,6 +923,9 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn)
>  	    && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
>  		return NULL;
>  
> +	if (page_is_corrupt_dbg(page))
> +		return NULL;
> +
>  	return page;
>  }
>  
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 8d18ae4..8a7770a 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -425,15 +425,9 @@ static inline void clear_page_corrupt_dbg(struct page *page)
>  	__clear_bit(PAGE_DEBUG_FLAG_CORRUPT, &page->debug_flags);
>  }
>  
> -static inline bool page_is_corrupt_dbg(struct page *page)
> -{
> -	return test_bit(PAGE_DEBUG_FLAG_CORRUPT, &page->debug_flags);
> -}
> -
>  #else
>  static inline void set_page_corrupt_dbg(struct page *page) { }
>  static inline void clear_page_corrupt_dbg(struct page *page) { }
> -static inline bool page_is_corrupt_dbg(struct page *page) { return false; }
>  #endif
>  
>  static inline void set_page_order(struct page *page, int order)
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2011-10-17 22:14 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-10-17 14:24 [RFC 1/3 repost to correct ML] mm: more intensive memory corruption debug Stanislaw Gruszka
2011-10-17 14:24 ` [RFC 2/3] PM / Hibernate : do not count debug pages as savable Stanislaw Gruszka
2011-10-17 22:16   ` Rafael J. Wysocki
2011-10-17 14:24 ` [RFC 3/3] slub: min order when corrupt_dbg Stanislaw Gruszka

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).