All of lore.kernel.org
 help / color / mirror / Atom feed
From: aarcange@redhat.com
To: linux-mm@kvack.org
Cc: Andrea Arcangeli <aarcange@redhat.com>, Rik van Riel <riel@redhat.com>
Subject: [patch 03/35] alter compound get_page/put_page
Date: Fri, 26 Feb 2010 21:04:36 +0100	[thread overview]
Message-ID: <20100226200859.159424521@redhat.com> (raw)
In-Reply-To: 20100226200433.516502198@redhat.com

[-- Attachment #1: compound_get_put --]
[-- Type: text/plain, Size: 5618 bytes --]

From: Andrea Arcangeli <aarcange@redhat.com>

Alter compound get_page/put_page to keep references on subpages too, in order
to allow __split_huge_page_refcount to split an hugepage even while subpages
have been pinned by one of the get_user_pages() variants.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
---
 arch/powerpc/mm/gup.c |   12 +++++++
 arch/x86/mm/gup.c     |   12 +++++++
 include/linux/mm.h    |   12 ++++++-
 mm/swap.c             |   79 +++++++++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 106 insertions(+), 9 deletions(-)

--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -16,6 +16,16 @@
 
 #ifdef __HAVE_ARCH_PTE_SPECIAL
 
+static inline void pin_huge_page_tail(struct page *page)
+{
+	/*
+	 * __split_huge_page_refcount() cannot run
+	 * from under us.
+	 */
+	VM_BUG_ON(atomic_read(&page->_count) < 0);
+	atomic_inc(&page->_count);
+}
+
 /*
  * The performance critical leaf functions are made noinline otherwise gcc
  * inlines everything into a single function which results in too much
@@ -47,6 +57,8 @@ static noinline int gup_pte_range(pmd_t 
 			put_page(page);
 			return 0;
 		}
+		if (PageTail(page))
+			pin_huge_page_tail(page);
 		pages[*nr] = page;
 		(*nr)++;
 
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -105,6 +105,16 @@ static inline void get_head_page_multipl
 	atomic_add(nr, &page->_count);
 }
 
+static inline void pin_huge_page_tail(struct page *page)
+{
+	/*
+	 * __split_huge_page_refcount() cannot run
+	 * from under us.
+	 */
+	VM_BUG_ON(atomic_read(&page->_count) < 0);
+	atomic_inc(&page->_count);
+}
+
 static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
@@ -128,6 +138,8 @@ static noinline int gup_huge_pmd(pmd_t p
 	do {
 		VM_BUG_ON(compound_head(page) != head);
 		pages[*nr] = page;
+		if (PageTail(page))
+			pin_huge_page_tail(page);
 		(*nr)++;
 		page++;
 		refs++;
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -325,9 +325,17 @@ static inline int page_count(struct page
 
 static inline void get_page(struct page *page)
 {
-	page = compound_head(page);
-	VM_BUG_ON(atomic_read(&page->_count) == 0);
+	VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page));
 	atomic_inc(&page->_count);
+	if (unlikely(PageTail(page))) {
+		/*
+		 * This is safe only because
+		 * __split_huge_page_refcount can't run under
+		 * get_page().
+		 */
+		VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
+		atomic_inc(&page->first_page->_count);
+	}
 }
 
 static inline struct page *virt_to_head_page(const void *x)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -55,17 +55,82 @@ static void __page_cache_release(struct 
 		del_page_from_lru(zone, page);
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
+}
+
+static void __put_single_page(struct page *page)
+{
+	__page_cache_release(page);
 	free_hot_cold_page(page, 0);
 }
 
-static void put_compound_page(struct page *page)
+static void __put_compound_page(struct page *page)
 {
-	page = compound_head(page);
-	if (put_page_testzero(page)) {
-		compound_page_dtor *dtor;
+	compound_page_dtor *dtor;
+
+	__page_cache_release(page);
+	dtor = get_compound_page_dtor(page);
+	(*dtor)(page);
+}
 
-		dtor = get_compound_page_dtor(page);
-		(*dtor)(page);
+static void put_compound_page(struct page *page)
+{
+	if (unlikely(PageTail(page))) {
+		/* __split_huge_page_refcount can run under us */
+		struct page *page_head = page->first_page;
+		smp_rmb();
+		if (likely(PageTail(page) && get_page_unless_zero(page_head))) {
+			if (unlikely(!PageHead(page_head))) {
+				/* PageHead is cleared after PageTail */
+				smp_rmb();
+				VM_BUG_ON(PageTail(page));
+				goto out_put_head;
+			}
+			/*
+			 * Only run compound_lock on a valid PageHead,
+			 * after having it pinned with
+			 * get_page_unless_zero() above.
+			 */
+			smp_mb();
+			/* page_head wasn't a dangling pointer */
+			compound_lock(page_head);
+			if (unlikely(!PageTail(page))) {
+				/* __split_huge_page_refcount run before us */
+				compound_unlock(page_head);
+				VM_BUG_ON(PageHead(page_head));
+			out_put_head:
+				if (put_page_testzero(page_head))
+					__put_single_page(page_head);
+			out_put_single:
+				if (put_page_testzero(page))
+					__put_single_page(page);
+				return;
+			}
+			VM_BUG_ON(page_head != page->first_page);
+			/*
+			 * We can release the refcount taken by
+			 * get_page_unless_zero now that
+			 * split_huge_page_refcount is blocked on the
+			 * compound_lock.
+			 */
+			if (put_page_testzero(page_head))
+				VM_BUG_ON(1);
+			/* __split_huge_page_refcount will wait now */
+			VM_BUG_ON(atomic_read(&page->_count) <= 0);
+			atomic_dec(&page->_count);
+			VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
+			compound_unlock(page_head);
+			if (put_page_testzero(page_head))
+				__put_compound_page(page_head);
+		} else {
+			/* page_head is a dangling pointer */
+			VM_BUG_ON(PageTail(page));
+			goto out_put_single;
+		}
+	} else if (put_page_testzero(page)) {
+		if (PageHead(page))
+			__put_compound_page(page);
+		else
+			__put_single_page(page);
 	}
 }
 
@@ -74,7 +139,7 @@ void put_page(struct page *page)
 	if (unlikely(PageCompound(page)))
 		put_compound_page(page);
 	else if (put_page_testzero(page))
-		__page_cache_release(page);
+		__put_single_page(page);
 }
 EXPORT_SYMBOL(put_page);
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2010-02-26 20:09 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-02-26 20:04 [patch 00/35] Transparent Hugepage support #12 aarcange
2010-02-26 20:04 ` [patch 01/35] define MADV_HUGEPAGE aarcange
2010-02-26 20:04 ` [patch 02/35] compound_lock aarcange
2010-02-26 20:04 ` aarcange [this message]
2010-02-26 20:04 ` [patch 04/35] update futex compound knowledge aarcange
2010-02-26 20:04 ` [patch 05/35] fix bad_page to show the real reason the page is bad aarcange
2010-02-26 20:04 ` [patch 06/35] clear compound mapping aarcange
2010-02-26 20:04 ` [patch 07/35] add native_set_pmd_at aarcange
2010-02-26 20:04 ` [patch 08/35] add pmd paravirt ops aarcange
2010-02-26 20:04 ` [patch 09/35] no paravirt version of pmd ops aarcange
2010-02-26 20:04 ` [patch 10/35] export maybe_mkwrite aarcange
2010-02-26 20:04 ` [patch 11/35] comment reminder in destroy_compound_page aarcange
2010-02-26 20:04 ` [patch 12/35] config_transparent_hugepage aarcange
2010-02-26 20:04 ` [patch 13/35] special pmd_trans_* functions aarcange
2010-02-26 20:04 ` [patch 14/35] add pmd mangling generic functions aarcange
2010-02-26 20:04 ` [patch 15/35] add pmd mangling functions to x86 aarcange
2010-02-26 20:04 ` [patch 16/35] bail out gup_fast on splitting pmd aarcange
2010-02-26 20:04 ` [patch 17/35] pte alloc trans splitting aarcange
2010-02-26 20:04 ` [patch 18/35] add pmd mmu_notifier helpers aarcange
2010-02-26 20:04 ` [patch 19/35] clear page compound aarcange
2010-02-26 20:04 ` [patch 20/35] add pmd_huge_pte to mm_struct aarcange
2010-02-26 20:04 ` [patch 21/35] split_huge_page_mm/vma aarcange
2010-02-26 20:04 ` [patch 22/35] split_huge_page paging aarcange
2010-02-26 20:04 ` [patch 23/35] clear_copy_huge_page aarcange
2010-02-26 20:04 ` [patch 24/35] kvm mmu transparent hugepage support aarcange
2010-02-26 20:04 ` [patch 25/35] _GFP_NO_KSWAPD aarcange
2010-02-26 20:04 ` [patch 26/35] dont alloc harder for gfp nomemalloc even if nowait aarcange
2010-02-26 20:05 ` [patch 27/35] transparent hugepage core aarcange
2010-02-26 20:05 ` [patch 28/35] adapt to mm_counter in -mm aarcange
2010-02-26 20:05 ` [patch 29/35] verify pmd_trans_huge isnt leaking aarcange
2010-02-26 20:05 ` [patch 30/35] madvise(MADV_HUGEPAGE) aarcange
2010-02-26 20:05 ` [patch 31/35] pmd_trans_huge migrate bugcheck aarcange
2010-02-26 20:05 ` [patch 32/35] memcg compound aarcange
2010-02-26 20:05 ` [patch 33/35] memcg huge memory aarcange
2010-02-26 20:05 ` [patch 34/35] transparent hugepage vmstat aarcange
2010-02-26 20:05 ` [patch 35/35] khugepaged aarcange
  -- strict thread matches above, loose matches on Subject: below --
2010-03-09 19:39 [patch 00/35] Transparent Hugepage support #13 aarcange
2010-03-09 19:39 ` [patch 03/35] alter compound get_page/put_page aarcange

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100226200859.159424521@redhat.com \
    --to=aarcange@redhat.com \
    --cc=linux-mm@kvack.org \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.