public inbox for linux-arch@vger.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH 7/8] mm: reinstate ZERO_PAGE
       [not found] ` <Pine.LNX.4.64.0909072238320.15430@sister.anvils>
@ 2009-09-08  7:31   ` Nick Piggin
  2009-09-08  7:31     ` Nick Piggin
                       ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Nick Piggin @ 2009-09-08  7:31 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, KOSAKI Motohiro, Linus Torvalds,
	Rik van Riel, linux-kernel, linux-mm, linux-arch

On Mon, Sep 07, 2009 at 10:39:34PM +0100, Hugh Dickins wrote:
> KAMEZAWA Hiroyuki has observed customers of earlier kernels taking
> advantage of the ZERO_PAGE: which we stopped do_anonymous_page() from
> using in 2.6.24.  And there were a couple of regression reports on LKML.
> 
> Following suggestions from Linus, reinstate do_anonymous_page() use of
> the ZERO_PAGE; but this time avoid dirtying its struct page cacheline
> with (map)count updates - let vm_normal_page() regard it as abnormal.
> 
> Use it only on arches which __HAVE_ARCH_PTE_SPECIAL (x86, s390, sh32,
> most powerpc): that's not essential, but minimizes additional branches
> (keeping them in the unlikely pte_special case); and incidentally
> excludes mips (some models of which needed eight colours of ZERO_PAGE
> to avoid costly exceptions).

Without looking closely, why is it a big problem to have a
!HAVE PTE SPECIAL case? Couldn't it just be a check for
pfn == zero_pfn that is conditionally compiled away for pte
special architectures anyway?

If zero page is such a good idea, I don't see the logic of
limiting it like thisa. Your patch looks pretty clean though.

At any rate, I think it might be an idea to cc linux-arch. 

> 
> Don't be fanatical about avoiding ZERO_PAGE updates: get_user_pages()
> callers won't want to make exceptions for it, so increment its count
> there.  Changes to mlock and migration? happily seems not needed.
> 
> In most places it's quicker to check pfn than struct page address:
> prepare a __read_mostly zero_pfn for that.  Does get_dump_page()
> still need its ZERO_PAGE check? probably not, but keep it anyway.
> 
> Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
> ---
> I have not studied the performance of this at all: I'd rather it go
> into mmotm where others may decide whether it's a good thing or not.
> 
>  mm/memory.c |   53 +++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 44 insertions(+), 9 deletions(-)
> 
> --- mm6/mm/memory.c	2009-09-07 13:16:53.000000000 +0100
> +++ mm7/mm/memory.c	2009-09-07 13:17:01.000000000 +0100
> @@ -107,6 +107,17 @@ static int __init disable_randmaps(char
>  }
>  __setup("norandmaps", disable_randmaps);
>  
> +static unsigned long zero_pfn __read_mostly;
> +
> +/*
> + * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
> + */
> +static int __init init_zero_pfn(void)
> +{
> +	zero_pfn = page_to_pfn(ZERO_PAGE(0));
> +	return 0;
> +}
> +core_initcall(init_zero_pfn);
>  
>  /*
>   * If a p?d_bad entry is found while walking page tables, report
> @@ -499,7 +510,9 @@ struct page *vm_normal_page(struct vm_ar
>  	if (HAVE_PTE_SPECIAL) {
>  		if (likely(!pte_special(pte)))
>  			goto check_pfn;
> -		if (!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)))
> +		if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
> +			return NULL;
> +		if (pfn != zero_pfn)
>  			print_bad_pte(vma, addr, pte, NULL);
>  		return NULL;
>  	}
> @@ -1144,9 +1157,14 @@ struct page *follow_page(struct vm_area_
>  		goto no_page;
>  	if ((flags & FOLL_WRITE) && !pte_write(pte))
>  		goto unlock;
> +
>  	page = vm_normal_page(vma, address, pte);
> -	if (unlikely(!page))
> -		goto bad_page;
> +	if (unlikely(!page)) {
> +		if ((flags & FOLL_DUMP) ||
> +		    pte_pfn(pte) != zero_pfn)
> +			goto bad_page;
> +		page = pte_page(pte);
> +	}
>  
>  	if (flags & FOLL_GET)
>  		get_page(page);
> @@ -2085,10 +2103,19 @@ gotten:
>  
>  	if (unlikely(anon_vma_prepare(vma)))
>  		goto oom;
> -	VM_BUG_ON(old_page == ZERO_PAGE(0));
> -	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
> -	if (!new_page)
> -		goto oom;
> +
> +	if (pte_pfn(orig_pte) == zero_pfn) {
> +		new_page = alloc_zeroed_user_highpage_movable(vma, address);
> +		if (!new_page)
> +			goto oom;
> +	} else {
> +		new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
> +		if (!new_page)
> +			goto oom;
> +		cow_user_page(new_page, old_page, address, vma);
> +	}
> +	__SetPageUptodate(new_page);
> +
>  	/*
>  	 * Don't let another task, with possibly unlocked vma,
>  	 * keep the mlocked page.
> @@ -2098,8 +2125,6 @@ gotten:
>  		clear_page_mlock(old_page);
>  		unlock_page(old_page);
>  	}
> -	cow_user_page(new_page, old_page, address, vma);
> -	__SetPageUptodate(new_page);
>  
>  	if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
>  		goto oom_free_new;
> @@ -2594,6 +2619,15 @@ static int do_anonymous_page(struct mm_s
>  	spinlock_t *ptl;
>  	pte_t entry;
>  
> +	if (HAVE_PTE_SPECIAL && !(flags & FAULT_FLAG_WRITE)) {
> +		entry = pte_mkspecial(pfn_pte(zero_pfn, vma->vm_page_prot));
> +		ptl = pte_lockptr(mm, pmd);
> +		spin_lock(ptl);
> +		if (!pte_none(*page_table))
> +			goto unlock;
> +		goto setpte;
> +	}
> +
>  	/* Allocate our own private page. */
>  	pte_unmap(page_table);
>  
> @@ -2617,6 +2651,7 @@ static int do_anonymous_page(struct mm_s
>  
>  	inc_mm_counter(mm, anon_rss);
>  	page_add_new_anon_rmap(page, vma, address);
> +setpte:
>  	set_pte_at(mm, address, page_table, entry);
>  
>  	/* No need to invalidate - it was non-present before */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 7/8] mm: reinstate ZERO_PAGE
  2009-09-08  7:31   ` [PATCH 7/8] mm: reinstate ZERO_PAGE Nick Piggin
@ 2009-09-08  7:31     ` Nick Piggin
  2009-09-08 12:17     ` Hugh Dickins
  2009-09-08 14:13     ` Linus Torvalds
  2 siblings, 0 replies; 12+ messages in thread
From: Nick Piggin @ 2009-09-08  7:31 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, KOSAKI Motohiro, Linus Torvalds,
	Rik van Riel, linux-kernel, linux-mm, linux-arch

On Mon, Sep 07, 2009 at 10:39:34PM +0100, Hugh Dickins wrote:
> KAMEZAWA Hiroyuki has observed customers of earlier kernels taking
> advantage of the ZERO_PAGE: which we stopped do_anonymous_page() from
> using in 2.6.24.  And there were a couple of regression reports on LKML.
> 
> Following suggestions from Linus, reinstate do_anonymous_page() use of
> the ZERO_PAGE; but this time avoid dirtying its struct page cacheline
> with (map)count updates - let vm_normal_page() regard it as abnormal.
> 
> Use it only on arches which __HAVE_ARCH_PTE_SPECIAL (x86, s390, sh32,
> most powerpc): that's not essential, but minimizes additional branches
> (keeping them in the unlikely pte_special case); and incidentally
> excludes mips (some models of which needed eight colours of ZERO_PAGE
> to avoid costly exceptions).

Without looking closely, why is it a big problem to have a
!HAVE PTE SPECIAL case? Couldn't it just be a check for
pfn == zero_pfn that is conditionally compiled away for pte
special architectures anyway?

If zero page is such a good idea, I don't see the logic of
limiting it like thisa. Your patch looks pretty clean though.

At any rate, I think it might be an idea to cc linux-arch. 

> 
> Don't be fanatical about avoiding ZERO_PAGE updates: get_user_pages()
> callers won't want to make exceptions for it, so increment its count
> there.  Changes to mlock and migration? happily seems not needed.
> 
> In most places it's quicker to check pfn than struct page address:
> prepare a __read_mostly zero_pfn for that.  Does get_dump_page()
> still need its ZERO_PAGE check? probably not, but keep it anyway.
> 
> Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
> ---
> I have not studied the performance of this at all: I'd rather it go
> into mmotm where others may decide whether it's a good thing or not.
> 
>  mm/memory.c |   53 +++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 44 insertions(+), 9 deletions(-)
> 
> --- mm6/mm/memory.c	2009-09-07 13:16:53.000000000 +0100
> +++ mm7/mm/memory.c	2009-09-07 13:17:01.000000000 +0100
> @@ -107,6 +107,17 @@ static int __init disable_randmaps(char
>  }
>  __setup("norandmaps", disable_randmaps);
>  
> +static unsigned long zero_pfn __read_mostly;
> +
> +/*
> + * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
> + */
> +static int __init init_zero_pfn(void)
> +{
> +	zero_pfn = page_to_pfn(ZERO_PAGE(0));
> +	return 0;
> +}
> +core_initcall(init_zero_pfn);
>  
>  /*
>   * If a p?d_bad entry is found while walking page tables, report
> @@ -499,7 +510,9 @@ struct page *vm_normal_page(struct vm_ar
>  	if (HAVE_PTE_SPECIAL) {
>  		if (likely(!pte_special(pte)))
>  			goto check_pfn;
> -		if (!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)))
> +		if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
> +			return NULL;
> +		if (pfn != zero_pfn)
>  			print_bad_pte(vma, addr, pte, NULL);
>  		return NULL;
>  	}
> @@ -1144,9 +1157,14 @@ struct page *follow_page(struct vm_area_
>  		goto no_page;
>  	if ((flags & FOLL_WRITE) && !pte_write(pte))
>  		goto unlock;
> +
>  	page = vm_normal_page(vma, address, pte);
> -	if (unlikely(!page))
> -		goto bad_page;
> +	if (unlikely(!page)) {
> +		if ((flags & FOLL_DUMP) ||
> +		    pte_pfn(pte) != zero_pfn)
> +			goto bad_page;
> +		page = pte_page(pte);
> +	}
>  
>  	if (flags & FOLL_GET)
>  		get_page(page);
> @@ -2085,10 +2103,19 @@ gotten:
>  
>  	if (unlikely(anon_vma_prepare(vma)))
>  		goto oom;
> -	VM_BUG_ON(old_page == ZERO_PAGE(0));
> -	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
> -	if (!new_page)
> -		goto oom;
> +
> +	if (pte_pfn(orig_pte) == zero_pfn) {
> +		new_page = alloc_zeroed_user_highpage_movable(vma, address);
> +		if (!new_page)
> +			goto oom;
> +	} else {
> +		new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
> +		if (!new_page)
> +			goto oom;
> +		cow_user_page(new_page, old_page, address, vma);
> +	}
> +	__SetPageUptodate(new_page);
> +
>  	/*
>  	 * Don't let another task, with possibly unlocked vma,
>  	 * keep the mlocked page.
> @@ -2098,8 +2125,6 @@ gotten:
>  		clear_page_mlock(old_page);
>  		unlock_page(old_page);
>  	}
> -	cow_user_page(new_page, old_page, address, vma);
> -	__SetPageUptodate(new_page);
>  
>  	if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
>  		goto oom_free_new;
> @@ -2594,6 +2619,15 @@ static int do_anonymous_page(struct mm_s
>  	spinlock_t *ptl;
>  	pte_t entry;
>  
> +	if (HAVE_PTE_SPECIAL && !(flags & FAULT_FLAG_WRITE)) {
> +		entry = pte_mkspecial(pfn_pte(zero_pfn, vma->vm_page_prot));
> +		ptl = pte_lockptr(mm, pmd);
> +		spin_lock(ptl);
> +		if (!pte_none(*page_table))
> +			goto unlock;
> +		goto setpte;
> +	}
> +
>  	/* Allocate our own private page. */
>  	pte_unmap(page_table);
>  
> @@ -2617,6 +2651,7 @@ static int do_anonymous_page(struct mm_s
>  
>  	inc_mm_counter(mm, anon_rss);
>  	page_add_new_anon_rmap(page, vma, address);
> +setpte:
>  	set_pte_at(mm, address, page_table, entry);
>  
>  	/* No need to invalidate - it was non-present before */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 7/8] mm: reinstate ZERO_PAGE
  2009-09-08  7:31   ` [PATCH 7/8] mm: reinstate ZERO_PAGE Nick Piggin
  2009-09-08  7:31     ` Nick Piggin
@ 2009-09-08 12:17     ` Hugh Dickins
  2009-09-08 12:17       ` Hugh Dickins
  2009-09-08 15:34       ` Nick Piggin
  2009-09-08 14:13     ` Linus Torvalds
  2 siblings, 2 replies; 12+ messages in thread
From: Hugh Dickins @ 2009-09-08 12:17 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, KOSAKI Motohiro, Linus Torvalds,
	Rik van Riel, linux-kernel, linux-mm, linux-arch

On Tue, 8 Sep 2009, Nick Piggin wrote:
> On Mon, Sep 07, 2009 at 10:39:34PM +0100, Hugh Dickins wrote:
> > KAMEZAWA Hiroyuki has observed customers of earlier kernels taking
> > advantage of the ZERO_PAGE: which we stopped do_anonymous_page() from
> > using in 2.6.24.  And there were a couple of regression reports on LKML.
> > 
> > Following suggestions from Linus, reinstate do_anonymous_page() use of
> > the ZERO_PAGE; but this time avoid dirtying its struct page cacheline
> > with (map)count updates - let vm_normal_page() regard it as abnormal.
> > 
> > Use it only on arches which __HAVE_ARCH_PTE_SPECIAL (x86, s390, sh32,
> > most powerpc): that's not essential, but minimizes additional branches
> > (keeping them in the unlikely pte_special case); and incidentally
> > excludes mips (some models of which needed eight colours of ZERO_PAGE
> > to avoid costly exceptions).
> 
> Without looking closely, why is it a big problem to have a
> !HAVE PTE SPECIAL case? Couldn't it just be a check for
> pfn == zero_pfn that is conditionally compiled away for pte
> special architectures anyway?

Yes, I'm uncomfortable with that restriction too: it makes for
neater looking code in a couple of places, but it's not so good
for the architectures to diverge gratuitously there.

I'll give it a try without that restriction, see how it looks:
it was Linus who proposed the "special" approach, I'm sure he'll
speak up if he doesn't like how the alternative comes out.

Tucking the test away in an asm-generic macro, we can leave
the pain of a rangetest to the one mips case.

By the way, in compiling that list of "special" architectures,
I was surprised not to find ia64 amongst them.  Not that it
matters to me, but I thought the Fujitsu guys were usually
keen on Itanium - do they realize that the special test is
excluding it, or do they have their own special patch for it?

> 
> If zero page is such a good idea, I don't see the logic of
> limiting it like thisa. Your patch looks pretty clean though.
> 
> At any rate, I think it might be an idea to cc linux-arch. 

Yes, thanks.

Hugh

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 7/8] mm: reinstate ZERO_PAGE
  2009-09-08 12:17     ` Hugh Dickins
@ 2009-09-08 12:17       ` Hugh Dickins
  2009-09-08 15:34       ` Nick Piggin
  1 sibling, 0 replies; 12+ messages in thread
From: Hugh Dickins @ 2009-09-08 12:17 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, KOSAKI Motohiro, Linus Torvalds,
	Rik van Riel, linux-kernel, linux-mm, linux-arch

On Tue, 8 Sep 2009, Nick Piggin wrote:
> On Mon, Sep 07, 2009 at 10:39:34PM +0100, Hugh Dickins wrote:
> > KAMEZAWA Hiroyuki has observed customers of earlier kernels taking
> > advantage of the ZERO_PAGE: which we stopped do_anonymous_page() from
> > using in 2.6.24.  And there were a couple of regression reports on LKML.
> > 
> > Following suggestions from Linus, reinstate do_anonymous_page() use of
> > the ZERO_PAGE; but this time avoid dirtying its struct page cacheline
> > with (map)count updates - let vm_normal_page() regard it as abnormal.
> > 
> > Use it only on arches which __HAVE_ARCH_PTE_SPECIAL (x86, s390, sh32,
> > most powerpc): that's not essential, but minimizes additional branches
> > (keeping them in the unlikely pte_special case); and incidentally
> > excludes mips (some models of which needed eight colours of ZERO_PAGE
> > to avoid costly exceptions).
> 
> Without looking closely, why is it a big problem to have a
> !HAVE PTE SPECIAL case? Couldn't it just be a check for
> pfn == zero_pfn that is conditionally compiled away for pte
> special architectures anyway?

Yes, I'm uncomfortable with that restriction too: it makes for
neater looking code in a couple of places, but it's not so good
for the architectures to diverge gratuitously there.

I'll give it a try without that restriction, see how it looks:
it was Linus who proposed the "special" approach, I'm sure he'll
speak up if he doesn't like how the alternative comes out.

Tucking the test away in an asm-generic macro, we can leave
the pain of a rangetest to the one mips case.

By the way, in compiling that list of "special" architectures,
I was surprised not to find ia64 amongst them.  Not that it
matters to me, but I thought the Fujitsu guys were usually
keen on Itanium - do they realize that the special test is
excluding it, or do they have their own special patch for it?

> 
> If zero page is such a good idea, I don't see the logic of
> limiting it like thisa. Your patch looks pretty clean though.
> 
> At any rate, I think it might be an idea to cc linux-arch. 

Yes, thanks.

Hugh

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 7/8] mm: reinstate ZERO_PAGE
  2009-09-08  7:31   ` [PATCH 7/8] mm: reinstate ZERO_PAGE Nick Piggin
  2009-09-08  7:31     ` Nick Piggin
  2009-09-08 12:17     ` Hugh Dickins
@ 2009-09-08 14:13     ` Linus Torvalds
  2009-09-08 14:13       ` Linus Torvalds
  2 siblings, 1 reply; 12+ messages in thread
From: Linus Torvalds @ 2009-09-08 14:13 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Hugh Dickins, Andrew Morton, KAMEZAWA Hiroyuki, KOSAKI Motohiro,
	Rik van Riel, linux-kernel, linux-mm, linux-arch



On Tue, 8 Sep 2009, Nick Piggin wrote:
> 
> Without looking closely, why is it a big problem to have a
> !HAVE PTE SPECIAL case? Couldn't it just be a check for
> pfn == zero_pfn that is conditionally compiled away for pte
> special architectures anyway?

At least traditionally, there wasn't a single zero_pfn, but multiple (for 
VIPT caches that have performance issues with aliases). But yeah, we could 
check just the pfn number, and allow any architecture to do it.

			Linus

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 7/8] mm: reinstate ZERO_PAGE
  2009-09-08 14:13     ` Linus Torvalds
@ 2009-09-08 14:13       ` Linus Torvalds
  0 siblings, 0 replies; 12+ messages in thread
From: Linus Torvalds @ 2009-09-08 14:13 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Hugh Dickins, Andrew Morton, KAMEZAWA Hiroyuki, KOSAKI Motohiro,
	Rik van Riel, linux-kernel, linux-mm, linux-arch



On Tue, 8 Sep 2009, Nick Piggin wrote:
> 
> Without looking closely, why is it a big problem to have a
> !HAVE PTE SPECIAL case? Couldn't it just be a check for
> pfn == zero_pfn that is conditionally compiled away for pte
> special architectures anyway?

At least traditionally, there wasn't a single zero_pfn, but multiple (for 
VIPT caches that have performance issues with aliases). But yeah, we could 
check just the pfn number, and allow any architecture to do it.

			Linus

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 7/8] mm: reinstate ZERO_PAGE
  2009-09-08 12:17     ` Hugh Dickins
  2009-09-08 12:17       ` Hugh Dickins
@ 2009-09-08 15:34       ` Nick Piggin
  2009-09-08 16:40         ` Hugh Dickins
  1 sibling, 1 reply; 12+ messages in thread
From: Nick Piggin @ 2009-09-08 15:34 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, KOSAKI Motohiro, Linus Torvalds,
	Rik van Riel, linux-kernel, linux-mm, linux-arch

On Tue, Sep 08, 2009 at 01:17:01PM +0100, Hugh Dickins wrote:
> On Tue, 8 Sep 2009, Nick Piggin wrote:
> > On Mon, Sep 07, 2009 at 10:39:34PM +0100, Hugh Dickins wrote:
> > > KAMEZAWA Hiroyuki has observed customers of earlier kernels taking
> > > advantage of the ZERO_PAGE: which we stopped do_anonymous_page() from
> > > using in 2.6.24.  And there were a couple of regression reports on LKML.
> > > 
> > > Following suggestions from Linus, reinstate do_anonymous_page() use of
> > > the ZERO_PAGE; but this time avoid dirtying its struct page cacheline
> > > with (map)count updates - let vm_normal_page() regard it as abnormal.
> > > 
> > > Use it only on arches which __HAVE_ARCH_PTE_SPECIAL (x86, s390, sh32,
> > > most powerpc): that's not essential, but minimizes additional branches
> > > (keeping them in the unlikely pte_special case); and incidentally
> > > excludes mips (some models of which needed eight colours of ZERO_PAGE
> > > to avoid costly exceptions).
> > 
> > Without looking closely, why is it a big problem to have a
> > !HAVE PTE SPECIAL case? Couldn't it just be a check for
> > pfn == zero_pfn that is conditionally compiled away for pte
> > special architectures anyway?
> 
> Yes, I'm uncomfortable with that restriction too: it makes for
> neater looking code in a couple of places, but it's not so good
> for the architectures to diverge gratuitously there.
> 
> I'll give it a try without that restriction, see how it looks:
> it was Linus who proposed the "special" approach, I'm sure he'll
> speak up if he doesn't like how the alternative comes out.

I guess using special is pretty neat and doesn't require an
additional branch in vm_normal_page paths. But I think it is
important to allow other architectures at least the _option_
to have equivalent behaviour as x86 here. So it would be
great if you would look into it.

 
> Tucking the test away in an asm-generic macro, we can leave
> the pain of a rangetest to the one mips case.
> 
> By the way, in compiling that list of "special" architectures,
> I was surprised not to find ia64 amongst them.  Not that it
> matters to me, but I thought the Fujitsu guys were usually
> keen on Itanium - do they realize that the special test is
> excluding it, or do they have their own special patch for it?

I don't understand your question. Are you asking whether they
know your patch will not enable zero pages on ia64?

I guess pte special was primarily driven by gup_fast, which in
turn was driven primarily by DB2 9.5, which I think might be
only available on x86 and ibm's architectures.

But I admit to being a curious as to when I'll see a gup_fast
patch come out of SGI or HP or Fujitsu :)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 7/8] mm: reinstate ZERO_PAGE
  2009-09-08 15:34       ` Nick Piggin
@ 2009-09-08 16:40         ` Hugh Dickins
  2009-09-08 16:40           ` Hugh Dickins
  0 siblings, 1 reply; 12+ messages in thread
From: Hugh Dickins @ 2009-09-08 16:40 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, KOSAKI Motohiro, Linus Torvalds,
	Rik van Riel, linux-kernel, linux-mm, linux-arch

On Tue, 8 Sep 2009, Nick Piggin wrote:
> On Tue, Sep 08, 2009 at 01:17:01PM +0100, Hugh Dickins wrote:
> > By the way, in compiling that list of "special" architectures,
> > I was surprised not to find ia64 amongst them.  Not that it
> > matters to me, but I thought the Fujitsu guys were usually
> > keen on Itanium - do they realize that the special test is
> > excluding it, or do they have their own special patch for it?
> 
> I don't understand your question. Are you asking whether they
> know your patch will not enable zero pages on ia64?

That's what I was meaning to ask, yes; but wondering whether
perhaps they've already got their own patch to enable pte_special
on ia64, and just haven't got around to sending it in yet.

> 
> I guess pte special was primarily driven by gup_fast, which in
> turn was driven primarily by DB2 9.5, which I think might be
> only available on x86 and ibm's architectures.
> 
> But I admit to being a curious as to when I'll see a gup_fast
> patch come out of SGI or HP or Fujitsu :)

Yes, me too!

Hugh

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 7/8] mm: reinstate ZERO_PAGE
  2009-09-08 16:40         ` Hugh Dickins
@ 2009-09-08 16:40           ` Hugh Dickins
  0 siblings, 0 replies; 12+ messages in thread
From: Hugh Dickins @ 2009-09-08 16:40 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Andrew Morton, KAMEZAWA Hiroyuki, KOSAKI Motohiro, Linus Torvalds,
	Rik van Riel, linux-kernel, linux-mm, linux-arch

On Tue, 8 Sep 2009, Nick Piggin wrote:
> On Tue, Sep 08, 2009 at 01:17:01PM +0100, Hugh Dickins wrote:
> > By the way, in compiling that list of "special" architectures,
> > I was surprised not to find ia64 amongst them.  Not that it
> > matters to me, but I thought the Fujitsu guys were usually
> > keen on Itanium - do they realize that the special test is
> > excluding it, or do they have their own special patch for it?
> 
> I don't understand your question. Are you asking whether they
> know your patch will not enable zero pages on ia64?

That's what I was meaning to ask, yes; but wondering whether
perhaps they've already got their own patch to enable pte_special
on ia64, and just haven't got around to sending it in yet.

> 
> I guess pte special was primarily driven by gup_fast, which in
> turn was driven primarily by DB2 9.5, which I think might be
> only available on x86 and ibm's architectures.
> 
> But I admit to being a curious as to when I'll see a gup_fast
> patch come out of SGI or HP or Fujitsu :)

Yes, me too!

Hugh

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 3/4] mm: ZERO_PAGE without PTE_SPECIAL
       [not found] ` <Pine.LNX.4.64.0909152127240.22199@sister.anvils>
@ 2009-09-15 20:37   ` Hugh Dickins
  2009-09-15 20:37     ` Hugh Dickins
  2009-09-16  6:20     ` KAMEZAWA Hiroyuki
  0 siblings, 2 replies; 12+ messages in thread
From: Hugh Dickins @ 2009-09-15 20:37 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Ralf Baechle, KAMEZAWA Hiroyuki, KOSAKI Motohiro, Linus Torvalds,
	Nick Piggin, Rik van Riel, Mel Gorman, Minchan Kim, linux-kernel,
	linux-mm, linux-arch

Reinstate anonymous use of ZERO_PAGE to all architectures, not just
to those which __HAVE_ARCH_PTE_SPECIAL: as suggested by Nick Piggin.

Contrary to how I'd imagined it, there's nothing ugly about this, just
a zero_pfn test built into one or another block of vm_normal_page().

But the MIPS ZERO_PAGE-of-many-colours case demands is_zero_pfn() and
my_zero_pfn() inlines.  Reinstate its mremap move_pte() shuffling of
ZERO_PAGEs we did from 2.6.17 to 2.6.19?  Not unless someone shouts
for that: it would have to take vm_flags to weed out some cases.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
---
I've not built and tested the actual MIPS case, just hacked up x86
definitions to simulate it; had to drop the static from zero_pfn.

 arch/mips/include/asm/pgtable.h |   14 +++++++++++
 mm/memory.c                     |   36 ++++++++++++++++++++----------
 2 files changed, 39 insertions(+), 11 deletions(-)

--- mm2/arch/mips/include/asm/pgtable.h	2009-09-09 23:13:59.000000000 +0100
+++ mm3/arch/mips/include/asm/pgtable.h	2009-09-15 17:32:19.000000000 +0100
@@ -76,6 +76,20 @@ extern unsigned long zero_page_mask;
 #define ZERO_PAGE(vaddr) \
 	(virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
 
+#define is_zero_pfn is_zero_pfn
+static inline int is_zero_pfn(unsigned long pfn)
+{
+	extern unsigned long zero_pfn;
+	unsigned long offset_from_zero_pfn = pfn - zero_pfn;
+	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
+}
+
+#define my_zero_pfn my_zero_pfn
+static inline unsigned long my_zero_pfn(unsigned long addr)
+{
+	return page_to_pfn(ZERO_PAGE(addr));
+}
+
 extern void paging_init(void);
 
 /*
--- mm2/mm/memory.c	2009-09-14 16:34:37.000000000 +0100
+++ mm3/mm/memory.c	2009-09-15 17:32:19.000000000 +0100
@@ -107,7 +107,7 @@ static int __init disable_randmaps(char
 }
 __setup("norandmaps", disable_randmaps);
 
-static unsigned long zero_pfn __read_mostly;
+unsigned long zero_pfn __read_mostly;
 
 /*
  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
@@ -455,6 +455,20 @@ static inline int is_cow_mapping(unsigne
 	return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 }
 
+#ifndef is_zero_pfn
+static inline int is_zero_pfn(unsigned long pfn)
+{
+	return pfn == zero_pfn;
+}
+#endif
+
+#ifndef my_zero_pfn
+static inline unsigned long my_zero_pfn(unsigned long addr)
+{
+	return zero_pfn;
+}
+#endif
+
 /*
  * vm_normal_page -- This function gets the "struct page" associated with a pte.
  *
@@ -512,7 +526,7 @@ struct page *vm_normal_page(struct vm_ar
 			goto check_pfn;
 		if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
 			return NULL;
-		if (pfn != zero_pfn)
+		if (!is_zero_pfn(pfn))
 			print_bad_pte(vma, addr, pte, NULL);
 		return NULL;
 	}
@@ -534,6 +548,8 @@ struct page *vm_normal_page(struct vm_ar
 		}
 	}
 
+	if (is_zero_pfn(pfn))
+		return NULL;
 check_pfn:
 	if (unlikely(pfn > highest_memmap_pfn)) {
 		print_bad_pte(vma, addr, pte, NULL);
@@ -1161,7 +1177,7 @@ struct page *follow_page(struct vm_area_
 	page = vm_normal_page(vma, address, pte);
 	if (unlikely(!page)) {
 		if ((flags & FOLL_DUMP) ||
-		    pte_pfn(pte) != zero_pfn)
+		    !is_zero_pfn(pte_pfn(pte)))
 			goto bad_page;
 		page = pte_page(pte);
 	}
@@ -1444,10 +1460,6 @@ struct page *get_dump_page(unsigned long
 	if (__get_user_pages(current, current->mm, addr, 1,
 			FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma) < 1)
 		return NULL;
-	if (page == ZERO_PAGE(0)) {
-		page_cache_release(page);
-		return NULL;
-	}
 	flush_cache_page(vma, addr, page_to_pfn(page));
 	return page;
 }
@@ -1630,7 +1642,8 @@ int vm_insert_mixed(struct vm_area_struc
 	 * If we don't have pte special, then we have to use the pfn_valid()
 	 * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
 	 * refcount the page if pfn_valid is true (hence insert_page rather
-	 * than insert_pfn).
+	 * than insert_pfn).  If a zero_pfn were inserted into a VM_MIXEDMAP
+	 * without pte special, it would there be refcounted as a normal page.
 	 */
 	if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) {
 		struct page *page;
@@ -2098,7 +2111,7 @@ gotten:
 	if (unlikely(anon_vma_prepare(vma)))
 		goto oom;
 
-	if (pte_pfn(orig_pte) == zero_pfn) {
+	if (is_zero_pfn(pte_pfn(orig_pte))) {
 		new_page = alloc_zeroed_user_highpage_movable(vma, address);
 		if (!new_page)
 			goto oom;
@@ -2613,8 +2626,9 @@ static int do_anonymous_page(struct mm_s
 	spinlock_t *ptl;
 	pte_t entry;
 
-	if (HAVE_PTE_SPECIAL && !(flags & FAULT_FLAG_WRITE)) {
-		entry = pte_mkspecial(pfn_pte(zero_pfn, vma->vm_page_prot));
+	if (!(flags & FAULT_FLAG_WRITE)) {
+		entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
+						vma->vm_page_prot));
 		ptl = pte_lockptr(mm, pmd);
 		spin_lock(ptl);
 		if (!pte_none(*page_table))

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 3/4] mm: ZERO_PAGE without PTE_SPECIAL
  2009-09-15 20:37   ` [PATCH 3/4] mm: ZERO_PAGE without PTE_SPECIAL Hugh Dickins
@ 2009-09-15 20:37     ` Hugh Dickins
  2009-09-16  6:20     ` KAMEZAWA Hiroyuki
  1 sibling, 0 replies; 12+ messages in thread
From: Hugh Dickins @ 2009-09-15 20:37 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Ralf Baechle, KAMEZAWA Hiroyuki, KOSAKI Motohiro, Linus Torvalds,
	Nick Piggin, Rik van Riel, Mel Gorman, Minchan Kim, linux-kernel,
	linux-mm, linux-arch

Reinstate anonymous use of ZERO_PAGE to all architectures, not just
to those which __HAVE_ARCH_PTE_SPECIAL: as suggested by Nick Piggin.

Contrary to how I'd imagined it, there's nothing ugly about this, just
a zero_pfn test built into one or another block of vm_normal_page().

But the MIPS ZERO_PAGE-of-many-colours case demands is_zero_pfn() and
my_zero_pfn() inlines.  Reinstate its mremap move_pte() shuffling of
ZERO_PAGEs we did from 2.6.17 to 2.6.19?  Not unless someone shouts
for that: it would have to take vm_flags to weed out some cases.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
---
I've not built and tested the actual MIPS case, just hacked up x86
definitions to simulate it; had to drop the static from zero_pfn.

 arch/mips/include/asm/pgtable.h |   14 +++++++++++
 mm/memory.c                     |   36 ++++++++++++++++++++----------
 2 files changed, 39 insertions(+), 11 deletions(-)

--- mm2/arch/mips/include/asm/pgtable.h	2009-09-09 23:13:59.000000000 +0100
+++ mm3/arch/mips/include/asm/pgtable.h	2009-09-15 17:32:19.000000000 +0100
@@ -76,6 +76,20 @@ extern unsigned long zero_page_mask;
 #define ZERO_PAGE(vaddr) \
 	(virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
 
+#define is_zero_pfn is_zero_pfn
+static inline int is_zero_pfn(unsigned long pfn)
+{
+	extern unsigned long zero_pfn;
+	unsigned long offset_from_zero_pfn = pfn - zero_pfn;
+	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
+}
+
+#define my_zero_pfn my_zero_pfn
+static inline unsigned long my_zero_pfn(unsigned long addr)
+{
+	return page_to_pfn(ZERO_PAGE(addr));
+}
+
 extern void paging_init(void);
 
 /*
--- mm2/mm/memory.c	2009-09-14 16:34:37.000000000 +0100
+++ mm3/mm/memory.c	2009-09-15 17:32:19.000000000 +0100
@@ -107,7 +107,7 @@ static int __init disable_randmaps(char
 }
 __setup("norandmaps", disable_randmaps);
 
-static unsigned long zero_pfn __read_mostly;
+unsigned long zero_pfn __read_mostly;
 
 /*
  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
@@ -455,6 +455,20 @@ static inline int is_cow_mapping(unsigne
 	return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 }
 
+#ifndef is_zero_pfn
+static inline int is_zero_pfn(unsigned long pfn)
+{
+	return pfn == zero_pfn;
+}
+#endif
+
+#ifndef my_zero_pfn
+static inline unsigned long my_zero_pfn(unsigned long addr)
+{
+	return zero_pfn;
+}
+#endif
+
 /*
  * vm_normal_page -- This function gets the "struct page" associated with a pte.
  *
@@ -512,7 +526,7 @@ struct page *vm_normal_page(struct vm_ar
 			goto check_pfn;
 		if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
 			return NULL;
-		if (pfn != zero_pfn)
+		if (!is_zero_pfn(pfn))
 			print_bad_pte(vma, addr, pte, NULL);
 		return NULL;
 	}
@@ -534,6 +548,8 @@ struct page *vm_normal_page(struct vm_ar
 		}
 	}
 
+	if (is_zero_pfn(pfn))
+		return NULL;
 check_pfn:
 	if (unlikely(pfn > highest_memmap_pfn)) {
 		print_bad_pte(vma, addr, pte, NULL);
@@ -1161,7 +1177,7 @@ struct page *follow_page(struct vm_area_
 	page = vm_normal_page(vma, address, pte);
 	if (unlikely(!page)) {
 		if ((flags & FOLL_DUMP) ||
-		    pte_pfn(pte) != zero_pfn)
+		    !is_zero_pfn(pte_pfn(pte)))
 			goto bad_page;
 		page = pte_page(pte);
 	}
@@ -1444,10 +1460,6 @@ struct page *get_dump_page(unsigned long
 	if (__get_user_pages(current, current->mm, addr, 1,
 			FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma) < 1)
 		return NULL;
-	if (page == ZERO_PAGE(0)) {
-		page_cache_release(page);
-		return NULL;
-	}
 	flush_cache_page(vma, addr, page_to_pfn(page));
 	return page;
 }
@@ -1630,7 +1642,8 @@ int vm_insert_mixed(struct vm_area_struc
 	 * If we don't have pte special, then we have to use the pfn_valid()
 	 * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
 	 * refcount the page if pfn_valid is true (hence insert_page rather
-	 * than insert_pfn).
+	 * than insert_pfn).  If a zero_pfn were inserted into a VM_MIXEDMAP
+	 * without pte special, it would there be refcounted as a normal page.
 	 */
 	if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) {
 		struct page *page;
@@ -2098,7 +2111,7 @@ gotten:
 	if (unlikely(anon_vma_prepare(vma)))
 		goto oom;
 
-	if (pte_pfn(orig_pte) == zero_pfn) {
+	if (is_zero_pfn(pte_pfn(orig_pte))) {
 		new_page = alloc_zeroed_user_highpage_movable(vma, address);
 		if (!new_page)
 			goto oom;
@@ -2613,8 +2626,9 @@ static int do_anonymous_page(struct mm_s
 	spinlock_t *ptl;
 	pte_t entry;
 
-	if (HAVE_PTE_SPECIAL && !(flags & FAULT_FLAG_WRITE)) {
-		entry = pte_mkspecial(pfn_pte(zero_pfn, vma->vm_page_prot));
+	if (!(flags & FAULT_FLAG_WRITE)) {
+		entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
+						vma->vm_page_prot));
 		ptl = pte_lockptr(mm, pmd);
 		spin_lock(ptl);
 		if (!pte_none(*page_table))

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/4] mm: ZERO_PAGE without PTE_SPECIAL
  2009-09-15 20:37   ` [PATCH 3/4] mm: ZERO_PAGE without PTE_SPECIAL Hugh Dickins
  2009-09-15 20:37     ` Hugh Dickins
@ 2009-09-16  6:20     ` KAMEZAWA Hiroyuki
  1 sibling, 0 replies; 12+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-09-16  6:20 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Andrew Morton, Ralf Baechle, KOSAKI Motohiro, Linus Torvalds,
	Nick Piggin, Rik van Riel, Mel Gorman, Minchan Kim, linux-kernel,
	linux-mm, linux-arch

On Tue, 15 Sep 2009 21:37:20 +0100 (BST)
Hugh Dickins <hugh.dickins@tiscali.co.uk> wrote:

> Reinstate anonymous use of ZERO_PAGE to all architectures, not just
> to those which __HAVE_ARCH_PTE_SPECIAL: as suggested by Nick Piggin.
> 
> Contrary to how I'd imagined it, there's nothing ugly about this, just
> a zero_pfn test built into one or another block of vm_normal_page().
> 
> But the MIPS ZERO_PAGE-of-many-colours case demands is_zero_pfn() and
> my_zero_pfn() inlines.  Reinstate its mremap move_pte() shuffling of
> ZERO_PAGEs we did from 2.6.17 to 2.6.19?  Not unless someone shouts
> for that: it would have to take vm_flags to weed out some cases.
> 
> Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>

Thank you. I like this way.
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

> ---
> I've not built and tested the actual MIPS case, just hacked up x86
> definitions to simulate it; had to drop the static from zero_pfn.
> 
>  arch/mips/include/asm/pgtable.h |   14 +++++++++++
>  mm/memory.c                     |   36 ++++++++++++++++++++----------
>  2 files changed, 39 insertions(+), 11 deletions(-)
> 
> --- mm2/arch/mips/include/asm/pgtable.h	2009-09-09 23:13:59.000000000 +0100
> +++ mm3/arch/mips/include/asm/pgtable.h	2009-09-15 17:32:19.000000000 +0100
> @@ -76,6 +76,20 @@ extern unsigned long zero_page_mask;
>  #define ZERO_PAGE(vaddr) \
>  	(virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
>  
> +#define is_zero_pfn is_zero_pfn
> +static inline int is_zero_pfn(unsigned long pfn)
> +{
> +	extern unsigned long zero_pfn;
> +	unsigned long offset_from_zero_pfn = pfn - zero_pfn;
> +	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
> +}
> +
> +#define my_zero_pfn my_zero_pfn
> +static inline unsigned long my_zero_pfn(unsigned long addr)
> +{
> +	return page_to_pfn(ZERO_PAGE(addr));
> +}
> +
>  extern void paging_init(void);
>  
>  /*
> --- mm2/mm/memory.c	2009-09-14 16:34:37.000000000 +0100
> +++ mm3/mm/memory.c	2009-09-15 17:32:19.000000000 +0100
> @@ -107,7 +107,7 @@ static int __init disable_randmaps(char
>  }
>  __setup("norandmaps", disable_randmaps);
>  
> -static unsigned long zero_pfn __read_mostly;
> +unsigned long zero_pfn __read_mostly;
>  
>  /*
>   * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
> @@ -455,6 +455,20 @@ static inline int is_cow_mapping(unsigne
>  	return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
>  }
>  
> +#ifndef is_zero_pfn
> +static inline int is_zero_pfn(unsigned long pfn)
> +{
> +	return pfn == zero_pfn;
> +}
> +#endif
> +
> +#ifndef my_zero_pfn
> +static inline unsigned long my_zero_pfn(unsigned long addr)
> +{
> +	return zero_pfn;
> +}
> +#endif
> +
>  /*
>   * vm_normal_page -- This function gets the "struct page" associated with a pte.
>   *
> @@ -512,7 +526,7 @@ struct page *vm_normal_page(struct vm_ar
>  			goto check_pfn;
>  		if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
>  			return NULL;
> -		if (pfn != zero_pfn)
> +		if (!is_zero_pfn(pfn))
>  			print_bad_pte(vma, addr, pte, NULL);
>  		return NULL;
>  	}
> @@ -534,6 +548,8 @@ struct page *vm_normal_page(struct vm_ar
>  		}
>  	}
>  
> +	if (is_zero_pfn(pfn))
> +		return NULL;
>  check_pfn:
>  	if (unlikely(pfn > highest_memmap_pfn)) {
>  		print_bad_pte(vma, addr, pte, NULL);
> @@ -1161,7 +1177,7 @@ struct page *follow_page(struct vm_area_
>  	page = vm_normal_page(vma, address, pte);
>  	if (unlikely(!page)) {
>  		if ((flags & FOLL_DUMP) ||
> -		    pte_pfn(pte) != zero_pfn)
> +		    !is_zero_pfn(pte_pfn(pte)))
>  			goto bad_page;
>  		page = pte_page(pte);
>  	}
> @@ -1444,10 +1460,6 @@ struct page *get_dump_page(unsigned long
>  	if (__get_user_pages(current, current->mm, addr, 1,
>  			FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma) < 1)
>  		return NULL;
> -	if (page == ZERO_PAGE(0)) {
> -		page_cache_release(page);
> -		return NULL;
> -	}
>  	flush_cache_page(vma, addr, page_to_pfn(page));
>  	return page;
>  }
> @@ -1630,7 +1642,8 @@ int vm_insert_mixed(struct vm_area_struc
>  	 * If we don't have pte special, then we have to use the pfn_valid()
>  	 * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
>  	 * refcount the page if pfn_valid is true (hence insert_page rather
> -	 * than insert_pfn).
> +	 * than insert_pfn).  If a zero_pfn were inserted into a VM_MIXEDMAP
> +	 * without pte special, it would there be refcounted as a normal page.
>  	 */
>  	if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) {
>  		struct page *page;
> @@ -2098,7 +2111,7 @@ gotten:
>  	if (unlikely(anon_vma_prepare(vma)))
>  		goto oom;
>  
> -	if (pte_pfn(orig_pte) == zero_pfn) {
> +	if (is_zero_pfn(pte_pfn(orig_pte))) {
>  		new_page = alloc_zeroed_user_highpage_movable(vma, address);
>  		if (!new_page)
>  			goto oom;
> @@ -2613,8 +2626,9 @@ static int do_anonymous_page(struct mm_s
>  	spinlock_t *ptl;
>  	pte_t entry;
>  
> -	if (HAVE_PTE_SPECIAL && !(flags & FAULT_FLAG_WRITE)) {
> -		entry = pte_mkspecial(pfn_pte(zero_pfn, vma->vm_page_prot));
> +	if (!(flags & FAULT_FLAG_WRITE)) {
> +		entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
> +						vma->vm_page_prot));
>  		ptl = pte_lockptr(mm, pmd);
>  		spin_lock(ptl);
>  		if (!pte_none(*page_table))
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2009-09-16  6:22 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <Pine.LNX.4.64.0909072222070.15424@sister.anvils>
     [not found] ` <Pine.LNX.4.64.0909072238320.15430@sister.anvils>
2009-09-08  7:31   ` [PATCH 7/8] mm: reinstate ZERO_PAGE Nick Piggin
2009-09-08  7:31     ` Nick Piggin
2009-09-08 12:17     ` Hugh Dickins
2009-09-08 12:17       ` Hugh Dickins
2009-09-08 15:34       ` Nick Piggin
2009-09-08 16:40         ` Hugh Dickins
2009-09-08 16:40           ` Hugh Dickins
2009-09-08 14:13     ` Linus Torvalds
2009-09-08 14:13       ` Linus Torvalds
     [not found] ` <Pine.LNX.4.64.0909152127240.22199@sister.anvils>
2009-09-15 20:37   ` [PATCH 3/4] mm: ZERO_PAGE without PTE_SPECIAL Hugh Dickins
2009-09-15 20:37     ` Hugh Dickins
2009-09-16  6:20     ` KAMEZAWA Hiroyuki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox