linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mel Gorman <mel@csn.ul.ie>
To: Andrea Arcangeli <aarcange@redhat.com>
Cc: linux-mm@kvack.org, Marcelo Tosatti <mtosatti@redhat.com>,
	Adam Litke <agl@us.ibm.com>, Avi Kivity <avi@redhat.com>,
	Izik Eidus <ieidus@redhat.com>,
	Hugh Dickins <hugh.dickins@tiscali.co.uk>,
	Nick Piggin <npiggin@suse.de>, Rik van Riel <riel@redhat.com>,
	Andi Kleen <andi@firstfloor.org>,
	Dave Hansen <dave@linux.vnet.ibm.com>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Ingo Molnar <mingo@elte.hu>, Mike Travis <travis@sgi.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Christoph Lameter <cl@linux-foundation.org>,
	Chris Wright <chrisw@sous-sol.org>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: Re: [PATCH 14 of 28] pte alloc trans splitting
Date: Fri, 18 Dec 2009 19:03:34 +0000	[thread overview]
Message-ID: <20091218190334.GF21194@csn.ul.ie> (raw)
In-Reply-To: <fb0a9a34367c5c9bf2e4.1261076417@v2.random>

On Thu, Dec 17, 2009 at 07:00:17PM -0000, Andrea Arcangeli wrote:
> From: Andrea Arcangeli <aarcange@redhat.com>
> 
> pte alloc routines must wait for split_huge_page if the pmd is not
> present and not null (i.e. pmd_trans_splitting).

More stupid questions. When a large page is about to be split, you clear the
present bit to cause faults and hold those accesses until the split completes?
Again, no doubt this is obvious later but a description in the leader of
the basic approach to splitting huge pages wouldn't kill.

> The additional
> branches are optimized away at compile time by pmd_trans_splitting if
> the config option is off. However we must pass the vma down in order
> to know the anon_vma lock to wait for.
> 
> Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
> ---
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -948,7 +948,8 @@ static inline int __pmd_alloc(struct mm_
>  int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
>  #endif
>  
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
> +int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
> +		pmd_t *pmd, unsigned long address);
>  int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
>  
>  /*
> @@ -1017,12 +1018,14 @@ static inline void pgtable_page_dtor(str
>  	pte_unmap(pte);					\
>  } while (0)
>  
> -#define pte_alloc_map(mm, pmd, address)			\
> -	((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
> -		NULL: pte_offset_map(pmd, address))
> +#define pte_alloc_map(mm, vma, pmd, address)				\
> +	((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, vma,	\
> +							pmd, address))?	\
> +	 NULL: pte_offset_map(pmd, address))
>  
>  #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
> -	((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
> +	((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, NULL,	\
> +							pmd, address))?	\
>  		NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
>  
>  #define pte_alloc_kernel(pmd, address)			\
> diff --git a/mm/memory.c b/mm/memory.c
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -324,9 +324,11 @@ void free_pgtables(struct mmu_gather *tl
>  	}
>  }
>  
> -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
> +int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
> +		pmd_t *pmd, unsigned long address)
>  {
>  	pgtable_t new = pte_alloc_one(mm, address);
> +	int wait_split_huge_page;
>  	if (!new)
>  		return -ENOMEM;
>  
> @@ -346,14 +348,18 @@ int __pte_alloc(struct mm_struct *mm, pm
>  	smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
>  
>  	spin_lock(&mm->page_table_lock);
> -	if (!pmd_present(*pmd)) {	/* Has another populated it ? */
> +	wait_split_huge_page = 0;
> +	if (likely(pmd_none(*pmd))) {	/* Has another populated it ? */
>  		mm->nr_ptes++;
>  		pmd_populate(mm, pmd, new);
>  		new = NULL;
> -	}
> +	} else if (unlikely(pmd_trans_splitting(*pmd)))
> +		wait_split_huge_page = 1;
>  	spin_unlock(&mm->page_table_lock);
>  	if (new)
>  		pte_free(mm, new);
> +	if (wait_split_huge_page)
> +		wait_split_huge_page(vma->anon_vma, pmd);
>  	return 0;
>  }
>  
> @@ -366,10 +372,11 @@ int __pte_alloc_kernel(pmd_t *pmd, unsig
>  	smp_wmb(); /* See comment in __pte_alloc */
>  
>  	spin_lock(&init_mm.page_table_lock);
> -	if (!pmd_present(*pmd)) {	/* Has another populated it ? */
> +	if (likely(pmd_none(*pmd))) {	/* Has another populated it ? */
>  		pmd_populate_kernel(&init_mm, pmd, new);
>  		new = NULL;
> -	}
> +	} else
> +		VM_BUG_ON(pmd_trans_splitting(*pmd));
>  	spin_unlock(&init_mm.page_table_lock);
>  	if (new)
>  		pte_free_kernel(&init_mm, new);
> @@ -3020,7 +3027,7 @@ int handle_mm_fault(struct mm_struct *mm
>  	pmd = pmd_alloc(mm, pud, address);
>  	if (!pmd)
>  		return VM_FAULT_OOM;
> -	pte = pte_alloc_map(mm, pmd, address);
> +	pte = pte_alloc_map(mm, vma, pmd, address);
>  	if (!pte)
>  		return VM_FAULT_OOM;
>  
> diff --git a/mm/mremap.c b/mm/mremap.c
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -48,7 +48,8 @@ static pmd_t *get_old_pmd(struct mm_stru
>  	return pmd;
>  }
>  
> -static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr)
> +static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
> +			    unsigned long addr)
>  {
>  	pgd_t *pgd;
>  	pud_t *pud;
> @@ -63,7 +64,7 @@ static pmd_t *alloc_new_pmd(struct mm_st
>  	if (!pmd)
>  		return NULL;
>  
> -	if (!pmd_present(*pmd) && __pte_alloc(mm, pmd, addr))
> +	if (!pmd_present(*pmd) && __pte_alloc(mm, vma, pmd, addr))
>  		return NULL;
>  
>  	return pmd;
> @@ -148,7 +149,7 @@ unsigned long move_page_tables(struct vm
>  		old_pmd = get_old_pmd(vma->vm_mm, old_addr);
>  		if (!old_pmd)
>  			continue;
> -		new_pmd = alloc_new_pmd(vma->vm_mm, new_addr);
> +		new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr);
>  		if (!new_pmd)
>  			break;
>  		next = (new_addr + PMD_SIZE) & PMD_MASK;
> 

-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2009-12-18 19:03 UTC|newest]

Thread overview: 89+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-12-17 19:00 [PATCH 00 of 28] Transparent Hugepage support #2 Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 01 of 28] compound_lock Andrea Arcangeli
2009-12-17 19:46   ` Christoph Lameter
2009-12-18 14:27     ` Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 02 of 28] alter compound get_page/put_page Andrea Arcangeli
2009-12-17 19:50   ` Christoph Lameter
2009-12-18 14:30     ` Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 03 of 28] clear compound mapping Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 04 of 28] add native_set_pmd_at Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 05 of 28] add pmd paravirt ops Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 06 of 28] no paravirt version of pmd ops Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 07 of 28] export maybe_mkwrite Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 08 of 28] comment reminder in destroy_compound_page Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 09 of 28] config_transparent_hugepage Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 10 of 28] add pmd mangling functions to x86 Andrea Arcangeli
2009-12-18 18:56   ` Mel Gorman
2009-12-19 15:27     ` Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 11 of 28] add pmd mangling generic functions Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 12 of 28] special pmd_trans_* functions Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 13 of 28] bail out gup_fast on freezed pmd Andrea Arcangeli
2009-12-18 18:59   ` Mel Gorman
2009-12-19 15:48     ` Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 14 of 28] pte alloc trans splitting Andrea Arcangeli
2009-12-18 19:03   ` Mel Gorman [this message]
2009-12-19 15:59     ` Andrea Arcangeli
2009-12-21 19:57       ` Mel Gorman
2009-12-17 19:00 ` [PATCH 15 of 28] add pmd mmu_notifier helpers Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 16 of 28] clear page compound Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 17 of 28] add pmd_huge_pte to mm_struct Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 18 of 28] ensure mapcount is taken on head pages Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 19 of 28] split_huge_page_mm/vma Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 20 of 28] split_huge_page paging Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 21 of 28] pmd_trans_huge migrate bugcheck Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 22 of 28] clear_huge_page fix Andrea Arcangeli
2009-12-18 19:16   ` Mel Gorman
2009-12-17 19:00 ` [PATCH 23 of 28] clear_copy_huge_page Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 24 of 28] kvm mmu transparent hugepage support Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 25 of 28] transparent hugepage core Andrea Arcangeli
2009-12-18 20:03   ` Mel Gorman
2009-12-19 16:41     ` Andrea Arcangeli
2009-12-21 20:31       ` Mel Gorman
2009-12-23  0:06         ` Andrea Arcangeli
2009-12-23  6:09           ` Paul Mundt
2010-01-03 18:38           ` Mel Gorman
2010-01-04 15:49             ` Andrea Arcangeli
2010-01-04 16:58             ` Christoph Lameter
2010-01-04  6:16   ` Daisuke Nishimura
2010-01-04 16:04     ` Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 26 of 28] madvise(MADV_HUGEPAGE) Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 27 of 28] memcg compound Andrea Arcangeli
2009-12-18  1:27   ` KAMEZAWA Hiroyuki
2009-12-18 16:02     ` Andrea Arcangeli
2009-12-17 19:00 ` [PATCH 28 of 28] memcg huge memory Andrea Arcangeli
2009-12-18  1:33   ` KAMEZAWA Hiroyuki
2009-12-18 16:04     ` Andrea Arcangeli
2009-12-18 23:06       ` KAMEZAWA Hiroyuki
2009-12-20 18:39         ` Andrea Arcangeli
2009-12-21  0:26           ` KAMEZAWA Hiroyuki
2009-12-21  1:24             ` Daisuke Nishimura
2009-12-21  3:52               ` KAMEZAWA Hiroyuki
2009-12-21  4:33                 ` Daisuke Nishimura
2009-12-25  4:17                   ` Daisuke Nishimura
2009-12-25  4:37                     ` KAMEZAWA Hiroyuki
2009-12-24 10:00   ` Balbir Singh
2009-12-24 11:40     ` Andrea Arcangeli
2009-12-24 12:07       ` Balbir Singh
2009-12-17 19:54 ` [PATCH 00 of 28] Transparent Hugepage support #2 Christoph Lameter
2009-12-17 19:58   ` Rik van Riel
2009-12-17 20:09     ` Christoph Lameter
2009-12-18  5:12       ` Ingo Molnar
2009-12-18  6:18         ` KOSAKI Motohiro
2009-12-18 18:28         ` Christoph Lameter
2009-12-18 18:41           ` Dave Hansen
2009-12-18 19:17             ` Mike Travis
2009-12-18 19:28               ` Swap on flash SSDs Dave Hansen
2009-12-18 19:38                 ` Andi Kleen
2009-12-18 19:39                 ` Ingo Molnar
2009-12-18 20:13                   ` Linus Torvalds
2009-12-18 20:31                     ` Ingo Molnar
2009-12-19 18:38                   ` Jörn Engel
2009-12-18 14:05       ` [PATCH 00 of 28] Transparent Hugepage support #2 Andrea Arcangeli
2009-12-18 18:33         ` Christoph Lameter
2009-12-19 15:09           ` Andrea Arcangeli
2009-12-17 20:47     ` Mike Travis
2009-12-18  3:28       ` Rik van Riel
2009-12-18 14:12       ` Andrea Arcangeli
2009-12-18 12:52     ` Avi Kivity
2009-12-18 18:47 ` Dave Hansen
2009-12-19 15:20   ` Andrea Arcangeli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091218190334.GF21194@csn.ul.ie \
    --to=mel@csn.ul.ie \
    --cc=aarcange@redhat.com \
    --cc=agl@us.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=avi@redhat.com \
    --cc=benh@kernel.crashing.org \
    --cc=chrisw@sous-sol.org \
    --cc=cl@linux-foundation.org \
    --cc=dave@linux.vnet.ibm.com \
    --cc=hugh.dickins@tiscali.co.uk \
    --cc=ieidus@redhat.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    --cc=mtosatti@redhat.com \
    --cc=npiggin@suse.de \
    --cc=riel@redhat.com \
    --cc=travis@sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).