linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Andrea Arcangeli <aarcange@redhat.com>
To: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>,
	linux-mm@kvack.org, Andrew Morton <akpm@linux-foundation.org>,
	Marcelo Tosatti <mtosatti@redhat.com>,
	Adam Litke <agl@us.ibm.com>, Avi Kivity <avi@redhat.com>,
	Izik Eidus <ieidus@redhat.com>,
	Hugh Dickins <hugh.dickins@tiscali.co.uk>,
	Nick Piggin <npiggin@suse.de>, Rik van Riel <riel@redhat.com>,
	Mel Gorman <mel@csn.ul.ie>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Ingo Molnar <mingo@elte.hu>, Mike Travis <travis@sgi.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Christoph Lameter <cl@linux-foundation.org>,
	Chris Wright <chrisw@sous-sol.org>,
	bpicco@redhat.com,
	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
	Balbir Singh <balbir@linux.vnet.ibm.com>,
	Arnd Bergmann <arnd@arndb.de>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Subject: Re: [PATCH 36 of 41] remove PG_buddy
Date: Thu, 1 Apr 2010 20:15:02 +0200	[thread overview]
Message-ID: <20100401181502.GC5825@random.random> (raw)
In-Reply-To: <20100330010627.GD5825@random.random>

This is my last version of PG_buddy, I'm not resending the whole
patchset to avoid unnecessary traffic as only this patch changed.

Let me know if you see problems here.

------
Subject: remove PG_buddy

From: Andrea Arcangeli <aarcange@redhat.com>

PG_buddy can be converted to _mapcount == -2. So the PG_compound_lock can be
added to page->flags without overflowing (because of the sparse section bits
increasing) with CONFIG_X86_PAE=y and CONFIG_X86_PAT=y. This also has to move
the memory hotplug code from _mapcount to lru.next to avoid any risk of
clashes. We can't use lru.next for PG_buddy removal, but memory hotplug can use
lru.next even more easily than the mapcount instead.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
---

diff --git a/fs/proc/page.c b/fs/proc/page.c
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -116,15 +116,17 @@ u64 stable_page_flags(struct page *page)
 	if (PageHuge(page))
 		u |= 1 << KPF_HUGE;
 
+	/*
+	 * Caveats on high order pages: page->_count will only be set
+	 * -1 on the head page; SLUB/SLQB do the same for PG_slab;
+	 * SLOB won't set PG_slab at all on compound pages.
+	 */
+	if (PageBuddy(page))
+		u |= 1 << KPF_BUDDY;
+
 	u |= kpf_copy_bit(k, KPF_LOCKED,	PG_locked);
 
-	/*
-	 * Caveats on high order pages:
-	 * PG_buddy will only be set on the head page; SLUB/SLQB do the same
-	 * for PG_slab; SLOB won't set PG_slab at all on compound pages.
-	 */
 	u |= kpf_copy_bit(k, KPF_SLAB,		PG_slab);
-	u |= kpf_copy_bit(k, KPF_BUDDY,		PG_buddy);
 
 	u |= kpf_copy_bit(k, KPF_ERROR,		PG_error);
 	u |= kpf_copy_bit(k, KPF_DIRTY,		PG_dirty);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -13,12 +13,16 @@ struct mem_section;
 #ifdef CONFIG_MEMORY_HOTPLUG
 
 /*
- * Types for free bootmem.
- * The normal smallest mapcount is -1. Here is smaller value than it.
+ * Types for free bootmem stored in page->lru.next. These have to be in
+ * some random range in unsigned long space for debugging purposes.
  */
-#define SECTION_INFO		(-1 - 1)
-#define MIX_SECTION_INFO	(-1 - 2)
-#define NODE_INFO		(-1 - 3)
+enum {
+	MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12,
+	SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE,
+	MIX_SECTION_INFO,
+	NODE_INFO,
+	MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
+};
 
 /*
  * pgdat resizing functions
diff --git a/include/linux/mm.h b/include/linux/mm.h
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -358,6 +358,27 @@ static inline void init_page_count(struc
 	atomic_set(&page->_count, 1);
 }
 
+/*
+ * PageBuddy() indicate that the page is free and in the buddy system
+ * (see mm/page_alloc.c).
+ */
+static inline int PageBuddy(struct page *page)
+{
+	return atomic_read(&page->_mapcount) == -2;
+}
+
+static inline void __SetPageBuddy(struct page *page)
+{
+	VM_BUG_ON(atomic_read(&page->_mapcount) != -1);
+	atomic_set(&page->_mapcount, -2);
+}
+
+static inline void __ClearPageBuddy(struct page *page)
+{
+	VM_BUG_ON(!PageBuddy(page));
+	atomic_set(&page->_mapcount, -1);
+}
+
 void put_page(struct page *page);
 void put_pages_list(struct list_head *pages);
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -48,9 +48,6 @@
  * struct page (these bits with information) are always mapped into kernel
  * address space...
  *
- * PG_buddy is set to indicate that the page is free and in the buddy system
- * (see mm/page_alloc.c).
- *
  * PG_hwpoison indicates that a page got corrupted in hardware and contains
  * data with incorrect ECC bits that triggered a machine check. Accessing is
  * not safe since it may cause another machine check. Don't touch!
@@ -96,7 +93,6 @@ enum pageflags {
 	PG_swapcache,		/* Swap page: swp_entry_t in private */
 	PG_mappedtodisk,	/* Has blocks allocated on-disk */
 	PG_reclaim,		/* To be reclaimed asap */
-	PG_buddy,		/* Page is free, on buddy lists */
 	PG_swapbacked,		/* Page is backed by RAM/swap */
 	PG_unevictable,		/* Page is "unevictable"  */
 #ifdef CONFIG_MMU
@@ -235,7 +231,6 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTC
  * risky: they bypass page accounting.
  */
 TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback)
-__PAGEFLAG(Buddy, buddy)
 PAGEFLAG(MappedToDisk, mappedtodisk)
 
 /* PG_readahead is only used for file reads; PG_reclaim is only for writes */
@@ -430,7 +425,7 @@ static inline void ClearPageCompound(str
 #define PAGE_FLAGS_CHECK_AT_FREE \
 	(1 << PG_lru	 | 1 << PG_locked    | \
 	 1 << PG_private | 1 << PG_private_2 | \
-	 1 << PG_buddy	 | 1 << PG_writeback | 1 << PG_reserved | \
+	 1 << PG_writeback | 1 << PG_reserved | \
 	 1 << PG_slab	 | 1 << PG_swapcache | 1 << PG_active | \
 	 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \
 	 __PG_COMPOUND_LOCK)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -65,9 +65,10 @@ static void release_memory_resource(stru
 
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 #ifndef CONFIG_SPARSEMEM_VMEMMAP
-static void get_page_bootmem(unsigned long info,  struct page *page, int type)
+static void get_page_bootmem(unsigned long info,  struct page *page,
+			     unsigned long type)
 {
-	atomic_set(&page->_mapcount, type);
+	page->lru.next = (struct list_head *) type;
 	SetPagePrivate(page);
 	set_page_private(page, info);
 	atomic_inc(&page->_count);
@@ -77,15 +78,16 @@ static void get_page_bootmem(unsigned lo
  * so use __ref to tell modpost not to generate a warning */
 void __ref put_page_bootmem(struct page *page)
 {
-	int type;
+	unsigned long type;
 
-	type = atomic_read(&page->_mapcount);
-	BUG_ON(type >= -1);
+	type = (unsigned long) page->lru.next;
+	BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
+	       type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
 
 	if (atomic_dec_return(&page->_count) == 1) {
 		ClearPagePrivate(page);
 		set_page_private(page, 0);
-		reset_page_mapcount(page);
+		INIT_LIST_HEAD(&page->lru);
 		__free_pages_bootmem(page, 0);
 	}
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -426,8 +426,8 @@ __find_combined_index(unsigned long page
  * (c) a page and its buddy have the same order &&
  * (d) a page and its buddy are in the same zone.
  *
- * For recording whether a page is in the buddy system, we use PG_buddy.
- * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
+ * For recording whether a page is in the buddy system, we set ->_mapcount -2.
+ * Setting, clearing, and testing _mapcount -2 is serialized by zone->lock.
  *
  * For recording page's order, we use page_private(page).
  */
@@ -460,7 +460,7 @@ static inline int page_is_buddy(struct p
  * as necessary, plus some accounting needed to play nicely with other
  * parts of the VM system.
  * At each level, we keep a list of pages, which are heads of continuous
- * free pages of length of (1 << order) and marked with PG_buddy. Page's
+ * free pages of length of (1 << order) and marked with _mapcount -2. Page's
  * order is recorded in page_private(page) field.
  * So when we are allocating or freeing one, we can derive the state of the
  * other.  That is, if we allocate a small block, and both were   
@@ -5251,7 +5251,6 @@ static struct trace_print_flags pageflag
 	{1UL << PG_swapcache,		"swapcache"	},
 	{1UL << PG_mappedtodisk,	"mappedtodisk"	},
 	{1UL << PG_reclaim,		"reclaim"	},
-	{1UL << PG_buddy,		"buddy"		},
 	{1UL << PG_swapbacked,		"swapbacked"	},
 	{1UL << PG_unevictable,		"unevictable"	},
 #ifdef CONFIG_MMU
diff --git a/mm/sparse.c b/mm/sparse.c
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -670,10 +670,10 @@ static void __kfree_section_memmap(struc
 static void free_map_bootmem(struct page *page, unsigned long nr_pages)
 {
 	unsigned long maps_section_nr, removing_section_nr, i;
-	int magic;
+	unsigned long magic;
 
 	for (i = 0; i < nr_pages; i++, page++) {
-		magic = atomic_read(&page->_mapcount);
+		magic = (unsigned long) page->lru.next;
 
 		BUG_ON(magic == NODE_INFO);
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2010-04-01 18:15 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-03-29 18:37 [PATCH 00 of 41] Transparent Hugepage Support #16 Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 01 of 41] define MADV_HUGEPAGE Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 02 of 41] compound_lock Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 03 of 41] alter compound get_page/put_page Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 04 of 41] update futex compound knowledge Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 05 of 41] fix bad_page to show the real reason the page is bad Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 06 of 41] clear compound mapping Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 07 of 41] add native_set_pmd_at Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 08 of 41] add pmd paravirt ops Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 09 of 41] no paravirt version of pmd ops Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 10 of 41] export maybe_mkwrite Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 11 of 41] comment reminder in destroy_compound_page Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 12 of 41] config_transparent_hugepage Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 13 of 41] special pmd_trans_* functions Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 14 of 41] add pmd mangling generic functions Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 15 of 41] add pmd mangling functions to x86 Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 16 of 41] bail out gup_fast on splitting pmd Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 17 of 41] pte alloc trans splitting Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 18 of 41] add pmd mmu_notifier helpers Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 19 of 41] clear page compound Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 20 of 41] add pmd_huge_pte to mm_struct Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 21 of 41] split_huge_page_mm/vma Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 22 of 41] split_huge_page paging Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 23 of 41] clear_copy_huge_page Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 24 of 41] kvm mmu transparent hugepage support Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 25 of 41] _GFP_NO_KSWAPD Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 26 of 41] don't alloc harder for gfp nomemalloc even if nowait Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 27 of 41] transparent hugepage core Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 28 of 41] verify pmd_trans_huge isn't leaking Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 29 of 41] madvise(MADV_HUGEPAGE) Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 30 of 41] pmd_trans_huge migrate bugcheck Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 31 of 41] memcg compound Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 32 of 41] memcg huge memory Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 33 of 41] transparent hugepage vmstat Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 34 of 41] khugepaged Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 35 of 41] skip transhuge pages in ksm for now Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 36 of 41] remove PG_buddy Andrea Arcangeli
2010-03-29 18:49   ` Peter Zijlstra
2010-03-29 20:18     ` Benjamin Herrenschmidt
2010-03-29 22:17     ` Andrea Arcangeli
2010-03-29 22:30       ` Dave Hansen
2010-03-30  0:15         ` Andrea Arcangeli
2010-03-30  1:06           ` Andrea Arcangeli
2010-04-01 18:15             ` Andrea Arcangeli [this message]
2010-03-30 16:35           ` Christoph Lameter
2010-03-30 16:44             ` Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 37 of 41] add x86 32bit support Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 38 of 41] mincore transparent hugepage support Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 39 of 41] add pmd_modify Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 40 of 41] mprotect: pass vma down to page table walkers Andrea Arcangeli
2010-03-29 18:37 ` [PATCH 41 of 41] mprotect: transparent huge page support Andrea Arcangeli
2010-03-31  5:10 ` [PATCH 00 of 41] Transparent Hugepage Support #16 KAMEZAWA Hiroyuki
2010-03-31 15:33   ` Andrea Arcangeli
2010-03-31 16:24     ` Christoph Lameter
2010-03-31 16:41       ` Andrea Arcangeli
2010-03-31 18:59         ` Christoph Lameter
  -- strict thread matches above, loose matches on Subject: below --
2010-04-02  0:41 [PATCH 00 of 41] Transparent Hugepage Support #17 Andrea Arcangeli
2010-04-02  0:42 ` [PATCH 36 of 41] remove PG_buddy Andrea Arcangeli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100401181502.GC5825@random.random \
    --to=aarcange@redhat.com \
    --cc=agl@us.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=arnd@arndb.de \
    --cc=avi@redhat.com \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=benh@kernel.crashing.org \
    --cc=bpicco@redhat.com \
    --cc=chrisw@sous-sol.org \
    --cc=cl@linux-foundation.org \
    --cc=dave@linux.vnet.ibm.com \
    --cc=hannes@cmpxchg.org \
    --cc=hugh.dickins@tiscali.co.uk \
    --cc=ieidus@redhat.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-mm@kvack.org \
    --cc=mel@csn.ul.ie \
    --cc=mingo@elte.hu \
    --cc=mst@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=nishimura@mxp.nes.nec.co.jp \
    --cc=npiggin@suse.de \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=travis@sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).