xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: David Vrabel <david.vrabel@citrix.com>
To: xen-devel@lists.xensource.com
Cc: David Vrabel <david.vrabel@citrix.com>,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
	"H. Peter Anvin" <hpa@zytor.com>,
	x86@kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 1/2] x86/mm: remove arch-specific ptep_get_and_clear() function
Date: Wed, 13 Jun 2012 11:20:44 +0100	[thread overview]
Message-ID: <1339582845-25659-2-git-send-email-david.vrabel@citrix.com> (raw)
In-Reply-To: <1339582845-25659-1-git-send-email-david.vrabel@citrix.com>

From: David Vrabel <david.vrabel@citrix.com>

x86 provides a ptep_get_and_clear() function instead of using the
generic implementation so it can atomically get and clear the PTE.

It is not clear why x86 has this requirement.  PTE updates are done
while the appropriate PTE lock are held, so presumably, it is an
attempt to ensure that the accessed and dirty bits of the PTE are
saved even if they have been updated by the hardware due to a
concurrent access on another CPU.

However, the atomic exchange is not sufficient for saving the dirty
bit as if there is a TLB entry for this page on another CPU then the
dirty bit may be written by that processor after the PTE is cleared
but before the TLB entry is flushed.  It is also not clear from the
Intel SDM[1] if the processor's read of the PTE and the write to set
the accessed bit are atomic or not.

With this patch the get/clear becomes a read of the PTE and call to
pte_clear() which allows the clears to be batched by some
paravirtualized guests (such as Xen guests).  This can lead to
significant performance improvements in munmap().  e.g., for Xen, a
trap-and-emulate[2] for every page becomes a hypercall for every 31
pages.

There should be no change in the performance on native or for KVM
guests.  There may be a performance regression with lguest guests as
an optimization for avoiding calling pte_update() when doing a full
teardown of an mm is removed.

As a consequence there is a slight increase of the window where a set
(by the processor) of the accessed or dirty bit may be lost.  This
shouldn't change the behaviour of user space in any meaningful way.
Any user space application accessing a mmap()'d region that is being
munmap()'d or mremap()'d is already going to have unpredicatable
behaviour -- the access may succeed, it may fault, or the write to the
mapped file may be lost.

[1] Intel Software Developer's Manual Vol 3A section 4.8 says nothing
on this.

[2] For 32-bit guests, two traps are required for every page to update
both halves of the PTE.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/include/asm/pgtable-2level.h |    9 --------
 arch/x86/include/asm/pgtable-3level.h |   16 --------------
 arch/x86/include/asm/pgtable.h        |   37 ---------------------------------
 arch/x86/include/asm/pgtable_64.h     |   13 -----------
 4 files changed, 0 insertions(+), 75 deletions(-)

diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 98391db..be7c20e 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -38,15 +38,6 @@ static inline void native_pte_clear(struct mm_struct *mm,
 }
 
 #ifdef CONFIG_SMP
-static inline pte_t native_ptep_get_and_clear(pte_t *xp)
-{
-	return __pte(xchg(&xp->pte_low, 0));
-}
-#else
-#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
-#endif
-
-#ifdef CONFIG_SMP
 static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
 {
 	return __pmd(xchg((pmdval_t *)xp, 0));
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 43876f1..42101e9 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -134,22 +134,6 @@ static inline void pud_clear(pud_t *pudp)
 }
 
 #ifdef CONFIG_SMP
-static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
-{
-	pte_t res;
-
-	/* xchg acts as a barrier before the setting of the high bits */
-	res.pte_low = xchg(&ptep->pte_low, 0);
-	res.pte_high = ptep->pte_high;
-	ptep->pte_high = 0;
-
-	return res;
-}
-#else
-#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
-#endif
-
-#ifdef CONFIG_SMP
 union split_pmd {
 	struct {
 		u32 pmd_low;
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 49afb3f..4413bed 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -598,16 +598,6 @@ static inline int pgd_none(pgd_t pgd)
 
 extern int direct_gbpages;
 
-/* local pte updates need not use xchg for locking */
-static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
-{
-	pte_t res = *ptep;
-
-	/* Pure native function needs no input for mm, addr */
-	native_pte_clear(NULL, 0, ptep);
-	return res;
-}
-
 static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp)
 {
 	pmd_t res = *pmdp;
@@ -668,33 +658,6 @@ extern int ptep_test_and_clear_young(struct vm_area_struct *vma,
 extern int ptep_clear_flush_young(struct vm_area_struct *vma,
 				  unsigned long address, pte_t *ptep);
 
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
-				       pte_t *ptep)
-{
-	pte_t pte = native_ptep_get_and_clear(ptep);
-	pte_update(mm, addr, ptep);
-	return pte;
-}
-
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
-static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
-					    unsigned long addr, pte_t *ptep,
-					    int full)
-{
-	pte_t pte;
-	if (full) {
-		/*
-		 * Full address destruction in progress; paravirt does not
-		 * care about updates and native needs no locking
-		 */
-		pte = native_local_ptep_get_and_clear(ptep);
-	} else {
-		pte = ptep_get_and_clear(mm, addr, ptep);
-	}
-	return pte;
-}
-
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm,
 				      unsigned long addr, pte_t *ptep)
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 975f709..cc12d27 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -69,19 +69,6 @@ static inline void native_pmd_clear(pmd_t *pmd)
 	native_set_pmd(pmd, native_make_pmd(0));
 }
 
-static inline pte_t native_ptep_get_and_clear(pte_t *xp)
-{
-#ifdef CONFIG_SMP
-	return native_make_pte(xchg(&xp->pte, 0));
-#else
-	/* native_local_ptep_get_and_clear,
-	   but duplicated because of cyclic dependency */
-	pte_t ret = *xp;
-	native_pte_clear(NULL, 0, xp);
-	return ret;
-#endif
-}
-
 static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
 {
 #ifdef CONFIG_SMP
-- 
1.7.2.5

  reply	other threads:[~2012-06-13 10:20 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-13 10:20 [PATCH 0/2] x86/mm: remove arch-specific PTE/PMD get-and-clear functions David Vrabel
2012-06-13 10:20 ` David Vrabel [this message]
2012-06-15  9:41   ` [PATCH 1/2] x86/mm: remove arch-specific ptep_get_and_clear() function David Vrabel
2012-06-15 10:49     ` [Xen-devel] " Keir Fraser
2012-06-13 10:20 ` [PATCH 2/2] x86/mm: remove arch-specific pmdp_get_and_clear() function David Vrabel
2012-06-13 14:04 ` [PATCH 0/2] x86/mm: remove arch-specific PTE/PMD get-and-clear functions Konrad Rzeszutek Wilk
2012-06-13 15:00   ` David Vrabel
2012-06-14 18:29     ` Konrad Rzeszutek Wilk
2012-06-14 18:41       ` H. Peter Anvin
2012-06-18  9:13         ` Rusty Russell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1339582845-25659-2-git-send-email-david.vrabel@citrix.com \
    --to=david.vrabel@citrix.com \
    --cc=hpa@zytor.com \
    --cc=konrad.wilk@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=x86@kernel.org \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).