All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrea Arcangeli <aarcange@redhat.com>
To: linux-mm@kvack.org
Cc: Marcelo Tosatti <mtosatti@redhat.com>,
	Adam Litke <agl@us.ibm.com>, Avi Kivity <avi@redhat.com>,
	Izik Eidus <ieidus@redhat.com>,
	Hugh Dickins <hugh.dickins@tiscali.co.uk>,
	Nick Piggin <npiggin@suse.de>, Rik van Riel <riel@redhat.com>,
	Mel Gorman <mel@csn.ul.ie>, Andi Kleen <andi@firstfloor.org>,
	Dave Hansen <dave@linux.vnet.ibm.com>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Ingo Molnar <mingo@elte.hu>, Mike Travis <travis@sgi.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Christoph Lameter <cl@linux-foundation.org>,
	Chris Wright <chrisw@sous-sol.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	bpicco@redhat.com, Christoph Hellwig <chellwig@redhat.com>,
	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Subject: [PATCH 03 of 31] alter compound get_page/put_page
Date: Tue, 26 Jan 2010 14:51:58 +0100	[thread overview]
Message-ID: <936cd613e4ae2d20c62b.1264513918@v2.random> (raw)
In-Reply-To: <patchbomb.1264513915@v2.random>

From: Andrea Arcangeli <aarcange@redhat.com>

Alter compound get_page/put_page to keep references on subpages too, in order
to allow __split_huge_page_refcount to split an hugepage even while subpages
have been pinned by one of the get_user_pages() variants.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
---

diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -47,6 +47,14 @@ static noinline int gup_pte_range(pmd_t 
 			put_page(page);
 			return 0;
 		}
+		if (PageTail(page)) {
+			/*
+			 * __split_huge_page_refcount() cannot run
+			 * from under us.
+			 */
+			VM_BUG_ON(atomic_read(&page->_count) < 0);
+			atomic_inc(&page->_count);
+		}
 		pages[*nr] = page;
 		(*nr)++;
 
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -128,6 +128,14 @@ static noinline int gup_huge_pmd(pmd_t p
 	do {
 		VM_BUG_ON(compound_head(page) != head);
 		pages[*nr] = page;
+		if (PageTail(page)) {
+			/*
+			 * __split_huge_page_refcount() cannot run
+			 * from under us.
+			 */
+			VM_BUG_ON(atomic_read(&page->_count) < 0);
+			atomic_inc(&page->_count);
+		}
 		(*nr)++;
 		page++;
 		refs++;
diff --git a/include/linux/mm.h b/include/linux/mm.h
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -297,12 +297,16 @@ static inline int is_vmalloc_or_module_a
 
 static inline void compound_lock(struct page *page)
 {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	bit_spin_lock(PG_compound_lock, &page->flags);
+#endif
 }
 
 static inline void compound_unlock(struct page *page)
 {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	bit_spin_unlock(PG_compound_lock, &page->flags);
+#endif
 }
 
 static inline struct page *compound_head(struct page *page)
@@ -319,9 +323,17 @@ static inline int page_count(struct page
 
 static inline void get_page(struct page *page)
 {
-	page = compound_head(page);
-	VM_BUG_ON(atomic_read(&page->_count) == 0);
+	VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page));
 	atomic_inc(&page->_count);
+	if (unlikely(PageTail(page))) {
+		/*
+		 * This is safe only because
+		 * __split_huge_page_refcount can't run under
+		 * get_page().
+		 */
+		VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
+		atomic_inc(&page->first_page->_count);
+	}
 }
 
 static inline struct page *virt_to_head_page(const void *x)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -108,7 +108,9 @@ enum pageflags {
 #ifdef CONFIG_MEMORY_FAILURE
 	PG_hwpoison,		/* hardware poisoned page. Don't touch */
 #endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	PG_compound_lock,
+#endif
 	__NR_PAGEFLAGS,
 
 	/* Filesystems */
@@ -400,6 +402,12 @@ static inline void __ClearPageTail(struc
 #define __PG_MLOCKED		0
 #endif
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __PG_COMPOUND_LOCK		(1 << PG_compound_lock)
+#else
+#define __PG_COMPOUND_LOCK		0
+#endif
+
 /*
  * Flags checked when a page is freed.  Pages being freed should not have
  * these flags set.  It they are, there is a problem.
@@ -409,7 +417,8 @@ static inline void __ClearPageTail(struc
 	 1 << PG_private | 1 << PG_private_2 | \
 	 1 << PG_buddy	 | 1 << PG_writeback | 1 << PG_reserved | \
 	 1 << PG_slab	 | 1 << PG_swapcache | 1 << PG_active | \
-	 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON)
+	 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \
+	 1 << __PG_COMPOUND_LOCK)
 
 /*
  * Flags checked when a page is prepped for return by the page allocator.
diff --git a/mm/swap.c b/mm/swap.c
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -55,17 +55,82 @@ static void __page_cache_release(struct 
 		del_page_from_lru(zone, page);
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
+}
+
+static void __put_single_page(struct page *page)
+{
+	__page_cache_release(page);
 	free_hot_page(page);
 }
 
+static void __put_compound_page(struct page *page)
+{
+	compound_page_dtor *dtor;
+
+	__page_cache_release(page);
+	dtor = get_compound_page_dtor(page);
+	(*dtor)(page);
+}
+
 static void put_compound_page(struct page *page)
 {
-	page = compound_head(page);
-	if (put_page_testzero(page)) {
-		compound_page_dtor *dtor;
-
-		dtor = get_compound_page_dtor(page);
-		(*dtor)(page);
+	if (unlikely(PageTail(page))) {
+		/* __split_huge_page_refcount can run under us */
+		struct page *page_head = page->first_page;
+		smp_rmb();
+		if (likely(PageTail(page) && get_page_unless_zero(page_head))) {
+			if (unlikely(!PageHead(page_head))) {
+				/* PageHead is cleared after PageTail */
+				smp_rmb();
+				VM_BUG_ON(PageTail(page));
+				goto out_put_head;
+			}
+			/*
+			 * Only run compound_lock on a valid PageHead,
+			 * after having it pinned with
+			 * get_page_unless_zero() above.
+			 */
+			smp_mb();
+			/* page_head wasn't a dangling pointer */
+			compound_lock(page_head);
+			if (unlikely(!PageTail(page))) {
+				/* __split_huge_page_refcount run before us */
+				compound_unlock(page_head);
+				VM_BUG_ON(PageHead(page_head));
+			out_put_head:
+				if (put_page_testzero(page_head))
+					__put_single_page(page_head);
+			out_put_single:
+				if (put_page_testzero(page))
+					__put_single_page(page);
+				return;
+			}
+			VM_BUG_ON(page_head != page->first_page);
+			/*
+			 * We can release the refcount taken by
+			 * get_page_unless_zero now that
+			 * split_huge_page_refcount is blocked on the
+			 * compound_lock.
+			 */
+			if (put_page_testzero(page_head))
+				VM_BUG_ON(1);
+			/* __split_huge_page_refcount will wait now */
+			VM_BUG_ON(atomic_read(&page->_count) <= 0);
+			atomic_dec(&page->_count);
+			VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
+			compound_unlock(page_head);
+			if (put_page_testzero(page_head))
+				__put_compound_page(page_head);
+		} else {
+			/* page_head is a dangling pointer */
+			VM_BUG_ON(PageTail(page));
+			goto out_put_single;
+		}
+	} else if (put_page_testzero(page)) {
+		if (PageHead(page))
+			__put_compound_page(page);
+		else
+			__put_single_page(page);
 	}
 }
 
@@ -74,7 +139,7 @@ void put_page(struct page *page)
 	if (unlikely(PageCompound(page)))
 		put_compound_page(page);
 	else if (put_page_testzero(page))
-		__page_cache_release(page);
+		__put_single_page(page);
 }
 EXPORT_SYMBOL(put_page);
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2010-01-26 13:59 UTC|newest]

Thread overview: 118+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-01-26 13:51 [PATCH 00 of 31] Transparent Hugepage support #7 Andrea Arcangeli
2010-01-26 13:51 ` [PATCH 01 of 31] define MADV_HUGEPAGE Andrea Arcangeli
2010-01-26 14:49   ` Rik van Riel
2010-01-26 17:49   ` Mel Gorman
2010-01-27 16:37   ` Hugh Dickins
2010-01-27 17:13     ` Andrea Arcangeli
2010-01-27 17:29       ` Hugh Dickins
2010-01-27 17:45     ` Pekka Enberg
2010-01-27 21:44     ` Arnd Bergmann
2010-01-26 13:51 ` [PATCH 02 of 31] compound_lock Andrea Arcangeli
2010-01-26 14:51   ` Rik van Riel
2010-01-26 15:30     ` Andrea Arcangeli
2010-01-26 15:36     ` Christoph Lameter
2010-01-26 15:51       ` Andrea Arcangeli
2010-01-26 17:50   ` Mel Gorman
2010-01-26 13:51 ` Andrea Arcangeli [this message]
2010-01-26 15:00   ` [PATCH 03 of 31] alter compound get_page/put_page Rik van Riel
2010-01-26 15:31     ` Andrea Arcangeli
2010-01-26 16:00       ` Christoph Lameter
2010-01-26 18:02   ` Mel Gorman
2010-01-27 18:58     ` Andrea Arcangeli
2010-01-28 15:23       ` Mel Gorman
2010-01-26 13:51 ` [PATCH 04 of 31] update futex compound knowledge Andrea Arcangeli
2010-01-26 16:12   ` Rik van Riel
2010-01-26 17:10     ` Andrea Arcangeli
2010-01-26 19:48       ` Rik van Riel
2010-01-26 18:37   ` Mel Gorman
2010-01-27 19:45     ` Andrea Arcangeli
2010-01-28 15:33       ` Mel Gorman
2010-01-28 15:57         ` Andrea Arcangeli
2010-01-26 13:52 ` [PATCH 05 of 31] fix bad_page to show the real reason the page is bad Andrea Arcangeli
2010-01-26 16:13   ` Rik van Riel
2010-01-26 18:38   ` Mel Gorman
2010-01-26 13:52 ` [PATCH 06 of 31] clear compound mapping Andrea Arcangeli
2010-01-26 16:13   ` Rik van Riel
2010-01-26 18:39   ` Mel Gorman
2010-01-26 13:52 ` [PATCH 07 of 31] add native_set_pmd_at Andrea Arcangeli
2010-01-26 16:14   ` Rik van Riel
2010-01-26 19:10   ` Mel Gorman
2010-01-26 13:52 ` [PATCH 08 of 31] add pmd paravirt ops Andrea Arcangeli
2010-01-26 16:43   ` Rik van Riel
2010-01-26 19:13   ` Mel Gorman
2010-01-26 13:52 ` [PATCH 09 of 31] no paravirt version of pmd ops Andrea Arcangeli
2010-01-26 16:46   ` Rik van Riel
2010-01-26 19:15   ` Mel Gorman
2010-01-26 13:52 ` [PATCH 10 of 31] export maybe_mkwrite Andrea Arcangeli
2010-01-26 16:56   ` Rik van Riel
2010-01-26 19:23   ` Mel Gorman
2010-01-26 13:52 ` [PATCH 11 of 31] comment reminder in destroy_compound_page Andrea Arcangeli
2010-01-26 16:59   ` Rik van Riel
2010-01-26 19:24   ` Mel Gorman
2010-01-26 13:52 ` [PATCH 12 of 31] config_transparent_hugepage Andrea Arcangeli
2010-01-26 17:03   ` Rik van Riel
2010-01-26 19:34   ` Mel Gorman
2010-01-27 19:54     ` Andrea Arcangeli
2010-01-28 15:34       ` Mel Gorman
2010-01-26 13:52 ` [PATCH 13 of 31] add pmd mangling functions to x86 Andrea Arcangeli
2010-01-26 17:11   ` Rik van Riel
2010-01-26 19:41   ` Mel Gorman
2010-01-27 19:59     ` Andrea Arcangeli
2010-01-26 13:52 ` [PATCH 14 of 31] add pmd mangling generic functions Andrea Arcangeli
2010-01-26 17:14   ` Rik van Riel
2010-01-26 19:44   ` Mel Gorman
2010-01-28 15:57     ` Andrea Arcangeli
2010-01-26 13:52 ` [PATCH 15 of 31] special pmd_trans_* functions Andrea Arcangeli
2010-01-26 17:20   ` Rik van Riel
2010-01-26 13:52 ` [PATCH 16 of 31] bail out gup_fast on splitting pmd Andrea Arcangeli
2010-01-26 17:21   ` Rik van Riel
2010-01-26 19:50   ` Mel Gorman
2010-01-26 13:52 ` [PATCH 17 of 31] pte alloc trans splitting Andrea Arcangeli
2010-01-26 17:24   ` Rik van Riel
2010-01-26 19:53   ` Mel Gorman
2010-01-26 13:52 ` [PATCH 18 of 31] add pmd mmu_notifier helpers Andrea Arcangeli
2010-01-26 17:24   ` Rik van Riel
2010-01-26 13:52 ` [PATCH 19 of 31] clear page compound Andrea Arcangeli
2010-01-26 17:25   ` Rik van Riel
2010-01-26 19:56   ` Mel Gorman
2010-01-27 22:51     ` Andrea Arcangeli
2010-01-26 13:52 ` [PATCH 20 of 31] add pmd_huge_pte to mm_struct Andrea Arcangeli
2010-01-26 17:48   ` Rik van Riel
2010-01-26 13:52 ` [PATCH 21 of 31] split_huge_page_mm/vma Andrea Arcangeli
2010-01-26 17:34   ` Mel Gorman
2010-01-26 19:49     ` Andrea Arcangeli
2010-01-26 20:03       ` Mel Gorman
2010-01-26 17:58   ` Rik van Riel
2010-01-26 13:52 ` [PATCH 22 of 31] split_huge_page paging Andrea Arcangeli
2010-01-26 18:02   ` Rik van Riel
2010-01-27 18:43     ` Andrea Arcangeli
2010-01-26 20:08   ` Mel Gorman
2010-01-26 13:52 ` [PATCH 23 of 31] clear_copy_huge_page Andrea Arcangeli
2010-01-26 18:14   ` Rik van Riel
2010-01-26 20:10   ` Mel Gorman
2010-01-26 13:52 ` [PATCH 24 of 31] kvm mmu transparent hugepage support Andrea Arcangeli
2010-01-26 18:16   ` Rik van Riel
2010-01-26 13:52 ` [PATCH 25 of 31] transparent hugepage core Andrea Arcangeli
2010-01-26 22:34   ` Rik van Riel
2010-01-26 13:52 ` [PATCH 26 of 31] madvise(MADV_HUGEPAGE) Andrea Arcangeli
2010-01-26 22:50   ` Rik van Riel
2010-01-26 13:52 ` [PATCH 27 of 31] pmd_trans_huge migrate bugcheck Andrea Arcangeli
2010-01-26 22:51   ` Rik van Riel
2010-01-26 13:52 ` [PATCH 28 of 31] memcg compound Andrea Arcangeli
2010-01-26 22:53   ` Rik van Riel
2010-01-26 13:52 ` [PATCH 29 of 31] memcg huge memory Andrea Arcangeli
2010-01-26 22:53   ` Rik van Riel
2010-01-26 13:52 ` [PATCH 30 of 31] transparent hugepage vmstat Andrea Arcangeli
2010-01-26 22:55   ` Rik van Riel
2010-01-28  1:04     ` Andrea Arcangeli
2010-01-28  1:22       ` Rik van Riel
2010-01-26 13:52 ` [PATCH 31 of 31] khugepaged Andrea Arcangeli
2010-01-26 23:04   ` Rik van Riel
2010-01-26 17:55 ` [PATCH 00 of 31] Transparent Hugepage support #7 Michael S. Tsirkin
2010-01-27  0:00   ` Andrea Arcangeli
2010-01-27  0:32     ` Andrea Arcangeli
2010-01-27  0:47       ` Andrea Arcangeli
2010-01-27 20:20         ` Michael S. Tsirkin
2010-01-27 22:58           ` Andrea Arcangeli
  -- strict thread matches above, loose matches on Subject: below --
2010-01-28 14:33 [PATCH 00 of 31] Transparent Hugepage support #8 Andrea Arcangeli
2010-01-28 14:33 ` [PATCH 03 of 31] alter compound get_page/put_page Andrea Arcangeli
2010-01-25 17:18 [PATCH 00 of 31] Transparent Hugepage support #6 Andrea Arcangeli
2010-01-25 17:18 ` [PATCH 03 of 31] alter compound get_page/put_page Andrea Arcangeli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=936cd613e4ae2d20c62b.1264513918@v2.random \
    --to=aarcange@redhat.com \
    --cc=agl@us.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=avi@redhat.com \
    --cc=benh@kernel.crashing.org \
    --cc=bpicco@redhat.com \
    --cc=chellwig@redhat.com \
    --cc=chrisw@sous-sol.org \
    --cc=cl@linux-foundation.org \
    --cc=dave@linux.vnet.ibm.com \
    --cc=hugh.dickins@tiscali.co.uk \
    --cc=ieidus@redhat.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-mm@kvack.org \
    --cc=mel@csn.ul.ie \
    --cc=mingo@elte.hu \
    --cc=mtosatti@redhat.com \
    --cc=npiggin@suse.de \
    --cc=riel@redhat.com \
    --cc=travis@sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.