All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] linux: add p[mug]d_clear_full() accessors
@ 2008-05-20 14:42 Jan Beulich
  2008-05-20 14:46 ` Keir Fraser
  2008-05-20 15:17 ` Jeremy Fitzhardinge
  0 siblings, 2 replies; 7+ messages in thread
From: Jan Beulich @ 2008-05-20 14:42 UTC (permalink / raw)
  To: xen-devel

.. to avoid using hypercalls for clearing dead (un-pinned) page tables.
This eliminates well over a quarter of a million hypercalls during
kernel builds (up to about a million depending on the configuration).

This is against 2.6.25.4, it does not apply to the 2.6.18 tree (I'd
make this effort only if the patch is desired into that tree). The
primary goal of sending this is to find out whether the required change
to include/asm-generic/pgtable.h and mm/memory.c is considered
worthwhile (in which case I'd make an attempt at getting this accepted
upstream).

Signed-off-by: Jan Beulich <jbeulich@novell.com>

Index: head-2008-05-19/include/asm-generic/pgtable.h
===================================================================
--- head-2008-05-19.orig/include/asm-generic/pgtable.h	2008-05-19 10:48:15.000000000 +0200
+++ head-2008-05-19/include/asm-generic/pgtable.h	2008-05-19 10:57:25.000000000 +0200
@@ -133,6 +133,18 @@ static inline void ptep_set_wrprotect(st
 #define move_pte(pte, prot, old_addr, new_addr)	(pte)
 #endif
 
+#ifndef __HAVE_ARCH_PMD_CLEAR_FULL
+#define pmd_clear_full(mm, addr, pmd, full) pmd_clear(pmd)
+#endif
+
+#ifndef __HAVE_ARCH_PUD_CLEAR_FULL
+#define pud_clear_full(mm, addr, pud, full) pud_clear(pud)
+#endif
+
+#ifndef __HAVE_ARCH_PGD_CLEAR_FULL
+#define pgd_clear_full(mm, addr, pgd, full) pgd_clear(pgd)
+#endif
+
 /*
  * When walking page tables, get the address of the next boundary,
  * or the end address of the range if that comes earlier.  Although no
Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable.h
===================================================================
--- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable.h	2008-05-19 10:57:24.000000000 +0200
+++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable.h	2008-05-19 10:57:25.000000000 +0200
@@ -397,6 +397,22 @@ static inline pte_t ptep_get_and_clear(s
 
 pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *, unsigned long, pte_t *, int);
 
+#define __HAVE_ARCH_PMD_CLEAR_FULL
+#define pmd_clear_full(mm, addr, pmd, full)			\
+	(!mm_is_pinned(mm) ? __xen_pmd_clear(pmd) : xen_pmd_clear(pmd))
+
+#ifndef __PAGETABLE_PMD_FOLDED
+#define __HAVE_ARCH_PUD_CLEAR_FULL
+#define pud_clear_full(mm, addr, pud, full)			\
+	(!mm_is_pinned(mm) ? __xen_pud_clear(pud) : xen_pud_clear(pud))
+#endif
+
+#ifndef __PAGETABLE_PUD_FOLDED
+#define __HAVE_ARCH_PGD_CLEAR_FULL
+#define pgd_clear_full(mm, addr, pgd, full)			\
+	(!mm_is_pinned(mm) ? __xen_pgd_clear(pgd) : xen_pgd_clear(pgd))
+#endif
+
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-2level.h
===================================================================
--- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable-2level.h	2008-05-19 10:49:58.000000000 +0200
+++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-2level.h	2008-05-19 10:57:25.000000000 +0200
@@ -35,6 +35,11 @@ static inline void xen_pmd_clear(pmd_t *
 
 #define __xen_pte_clear(ptep) xen_set_pte(ptep, __pte(0))
 
+static inline void __xen_pmd_clear(pmd_t *pmdp)
+{
+	*pmdp = __pmd(0);
+}
+
 #ifdef CONFIG_SMP
 static inline pte_t xen_ptep_get_and_clear(pte_t *xp, pte_t res)
 {
Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-3level.h
===================================================================
--- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h	2008-05-19 10:49:58.000000000 +0200
+++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-3level.h	2008-05-19 10:57:25.000000000 +0200
@@ -76,6 +76,11 @@ static inline void xen_pmd_clear(pmd_t *
 	xen_l2_entry_update(pmd, __pmd(0));
 }
 
+static inline void __xen_pmd_clear(pmd_t *pmd)
+{
+	*pmd = __pmd(0);
+}
+
 static inline void pud_clear(pud_t *pudp)
 {
 	pgdval_t pgd;
@@ -96,6 +101,11 @@ static inline void pud_clear(pud_t *pudp
 		xen_tlb_flush();
 }
 
+static inline void __xen_pud_clear(pud_t *pudp)
+{
+	*pudp = __pud(0);
+}
+
 #define pud_page(pud) \
 ((struct page *) __va(pud_val(pud) & PAGE_MASK))
 
Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable_64.h
===================================================================
--- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable_64.h	2008-05-19 10:57:24.000000000 +0200
+++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable_64.h	2008-05-19 10:57:25.000000000 +0200
@@ -109,6 +109,11 @@ static inline void xen_pmd_clear(pmd_t *
 	xen_set_pmd(pmd, xen_make_pmd(0));
 }
 
+static inline void __xen_pmd_clear(pmd_t *pmd)
+{
+	*pmd = xen_make_pmd(0);
+}
+
 static inline void xen_set_pud(pud_t *pudp, pud_t pud)
 {
 	xen_l3_entry_update(pudp, pud);
@@ -119,6 +124,11 @@ static inline void xen_pud_clear(pud_t *
 	xen_set_pud(pud, xen_make_pud(0));
 }
 
+static inline void __xen_pud_clear(pud_t *pud)
+{
+	*pud = xen_make_pud(0);
+}
+
 #define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
 
 static inline void xen_set_pgd(pgd_t *pgdp, pgd_t pgd)
@@ -132,6 +142,12 @@ static inline void xen_pgd_clear(pgd_t *
 	xen_set_pgd(__user_pgd(pgd), xen_make_pgd(0));
 }
 
+static inline void __xen_pgd_clear(pgd_t *pgd)
+{
+	*pgd = xen_make_pgd(0);
+	*__user_pgd(pgd) = xen_make_pgd(0);
+}
+
 #define pte_same(a, b)		((a).pte == (b).pte)
 
 #endif /* !__ASSEMBLY__ */
Index: head-2008-05-19/mm/memory.c
===================================================================
--- head-2008-05-19.orig/mm/memory.c	2008-05-19 10:49:46.000000000 +0200
+++ head-2008-05-19/mm/memory.c	2008-05-19 10:57:25.000000000 +0200
@@ -132,10 +132,11 @@ void pmd_clear_bad(pmd_t *pmd)
  * Note: this doesn't free the actual pages themselves. That
  * has been handled earlier when unmapping all the memory regions.
  */
-static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
+static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+			   unsigned long addr)
 {
 	pgtable_t token = pmd_pgtable(*pmd);
-	pmd_clear(pmd);
+	pmd_clear_full(tlb->mm, addr, pmd, tlb->fullmm);
 	pte_free_tlb(tlb, token);
 	tlb->mm->nr_ptes--;
 }
@@ -154,7 +155,7 @@ static inline void free_pmd_range(struct
 		next = pmd_addr_end(addr, end);
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		free_pte_range(tlb, pmd);
+		free_pte_range(tlb, pmd, addr);
 	} while (pmd++, addr = next, addr != end);
 
 	start &= PUD_MASK;
@@ -169,7 +170,7 @@ static inline void free_pmd_range(struct
 		return;
 
 	pmd = pmd_offset(pud, start);
-	pud_clear(pud);
+	pud_clear_full(tlb->mm, start, pud, tlb->fullmm);
 	pmd_free_tlb(tlb, pmd);
 }
 
@@ -202,7 +203,7 @@ static inline void free_pud_range(struct
 		return;
 
 	pud = pud_offset(pgd, start);
-	pgd_clear(pgd);
+	pgd_clear_full(tlb->mm, start, pgd, tlb->fullmm);
 	pud_free_tlb(tlb, pud);
 }
 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC] linux: add p[mug]d_clear_full() accessors
  2008-05-20 14:42 [RFC] linux: add p[mug]d_clear_full() accessors Jan Beulich
@ 2008-05-20 14:46 ` Keir Fraser
  2008-05-20 15:17 ` Jeremy Fitzhardinge
  1 sibling, 0 replies; 7+ messages in thread
From: Keir Fraser @ 2008-05-20 14:46 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

We'd certainly be happy to have it in the 2.6.18 tree. But of course that is
a dead-end with respect to upstream aspirations for this patch.

 -- Keir

On 20/5/08 15:42, "Jan Beulich" <jbeulich@novell.com> wrote:

> .. to avoid using hypercalls for clearing dead (un-pinned) page tables.
> This eliminates well over a quarter of a million hypercalls during
> kernel builds (up to about a million depending on the configuration).
> 
> This is against 2.6.25.4, it does not apply to the 2.6.18 tree (I'd
> make this effort only if the patch is desired into that tree). The
> primary goal of sending this is to find out whether the required change
> to include/asm-generic/pgtable.h and mm/memory.c is considered
> worthwhile (in which case I'd make an attempt at getting this accepted
> upstream).
> 
> Signed-off-by: Jan Beulich <jbeulich@novell.com>
> 
> Index: head-2008-05-19/include/asm-generic/pgtable.h
> ===================================================================
> --- head-2008-05-19.orig/include/asm-generic/pgtable.h 2008-05-19
> 10:48:15.000000000 +0200
> +++ head-2008-05-19/include/asm-generic/pgtable.h 2008-05-19
> 10:57:25.000000000 +0200
> @@ -133,6 +133,18 @@ static inline void ptep_set_wrprotect(st
>  #define move_pte(pte, prot, old_addr, new_addr) (pte)
>  #endif
>  
> +#ifndef __HAVE_ARCH_PMD_CLEAR_FULL
> +#define pmd_clear_full(mm, addr, pmd, full) pmd_clear(pmd)
> +#endif
> +
> +#ifndef __HAVE_ARCH_PUD_CLEAR_FULL
> +#define pud_clear_full(mm, addr, pud, full) pud_clear(pud)
> +#endif
> +
> +#ifndef __HAVE_ARCH_PGD_CLEAR_FULL
> +#define pgd_clear_full(mm, addr, pgd, full) pgd_clear(pgd)
> +#endif
> +
>  /*
>   * When walking page tables, get the address of the next boundary,
>   * or the end address of the range if that comes earlier.  Although no
> Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable.h
> ===================================================================
> --- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable.h 2008-05-19
> 10:57:24.000000000 +0200
> +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable.h 2008-05-19
> 10:57:25.000000000 +0200
> @@ -397,6 +397,22 @@ static inline pte_t ptep_get_and_clear(s
>  
>  pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *, unsigned long,
> pte_t *, int);
>  
> +#define __HAVE_ARCH_PMD_CLEAR_FULL
> +#define pmd_clear_full(mm, addr, pmd, full)   \
> + (!mm_is_pinned(mm) ? __xen_pmd_clear(pmd) : xen_pmd_clear(pmd))
> +
> +#ifndef __PAGETABLE_PMD_FOLDED
> +#define __HAVE_ARCH_PUD_CLEAR_FULL
> +#define pud_clear_full(mm, addr, pud, full)   \
> + (!mm_is_pinned(mm) ? __xen_pud_clear(pud) : xen_pud_clear(pud))
> +#endif
> +
> +#ifndef __PAGETABLE_PUD_FOLDED
> +#define __HAVE_ARCH_PGD_CLEAR_FULL
> +#define pgd_clear_full(mm, addr, pgd, full)   \
> + (!mm_is_pinned(mm) ? __xen_pgd_clear(pgd) : xen_pgd_clear(pgd))
> +#endif
> +
>  #define __HAVE_ARCH_PTEP_SET_WRPROTECT
>  static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long
> addr, pte_t *ptep)
>  {
> Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-2level.h
> ===================================================================
> --- 
> head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable-2level.h 2008-05-19
> 10:49:58.000000000 +0200
> +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-2level.h 2008-05-19
> 10:57:25.000000000 +0200
> @@ -35,6 +35,11 @@ static inline void xen_pmd_clear(pmd_t *
>  
>  #define __xen_pte_clear(ptep) xen_set_pte(ptep, __pte(0))
>  
> +static inline void __xen_pmd_clear(pmd_t *pmdp)
> +{
> + *pmdp = __pmd(0);
> +}
> +
>  #ifdef CONFIG_SMP
>  static inline pte_t xen_ptep_get_and_clear(pte_t *xp, pte_t res)
>  {
> Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-3level.h
> ===================================================================
> --- 
> head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-05-19
> 10:49:58.000000000 +0200
> +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-05-19
> 10:57:25.000000000 +0200
> @@ -76,6 +76,11 @@ static inline void xen_pmd_clear(pmd_t *
> xen_l2_entry_update(pmd, __pmd(0));
>  }
>  
> +static inline void __xen_pmd_clear(pmd_t *pmd)
> +{
> + *pmd = __pmd(0);
> +}
> +
>  static inline void pud_clear(pud_t *pudp)
>  {
> pgdval_t pgd;
> @@ -96,6 +101,11 @@ static inline void pud_clear(pud_t *pudp
> xen_tlb_flush();
>  }
>  
> +static inline void __xen_pud_clear(pud_t *pudp)
> +{
> + *pudp = __pud(0);
> +}
> +
>  #define pud_page(pud) \
>  ((struct page *) __va(pud_val(pud) & PAGE_MASK))
>  
> Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable_64.h
> ===================================================================
> --- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-05-19
> 10:57:24.000000000 +0200
> +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-05-19
> 10:57:25.000000000 +0200
> @@ -109,6 +109,11 @@ static inline void xen_pmd_clear(pmd_t *
> xen_set_pmd(pmd, xen_make_pmd(0));
>  }
>  
> +static inline void __xen_pmd_clear(pmd_t *pmd)
> +{
> + *pmd = xen_make_pmd(0);
> +}
> +
>  static inline void xen_set_pud(pud_t *pudp, pud_t pud)
>  {
> xen_l3_entry_update(pudp, pud);
> @@ -119,6 +124,11 @@ static inline void xen_pud_clear(pud_t *
> xen_set_pud(pud, xen_make_pud(0));
>  }
>  
> +static inline void __xen_pud_clear(pud_t *pud)
> +{
> + *pud = xen_make_pud(0);
> +}
> +
>  #define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
>  
>  static inline void xen_set_pgd(pgd_t *pgdp, pgd_t pgd)
> @@ -132,6 +142,12 @@ static inline void xen_pgd_clear(pgd_t *
> xen_set_pgd(__user_pgd(pgd), xen_make_pgd(0));
>  }
>  
> +static inline void __xen_pgd_clear(pgd_t *pgd)
> +{
> + *pgd = xen_make_pgd(0);
> + *__user_pgd(pgd) = xen_make_pgd(0);
> +}
> +
>  #define pte_same(a, b)  ((a).pte == (b).pte)
>  
>  #endif /* !__ASSEMBLY__ */
> Index: head-2008-05-19/mm/memory.c
> ===================================================================
> --- head-2008-05-19.orig/mm/memory.c 2008-05-19 10:49:46.000000000 +0200
> +++ head-2008-05-19/mm/memory.c 2008-05-19 10:57:25.000000000 +0200
> @@ -132,10 +132,11 @@ void pmd_clear_bad(pmd_t *pmd)
>   * Note: this doesn't free the actual pages themselves. That
>   * has been handled earlier when unmapping all the memory regions.
>   */
> -static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
> +static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
> +      unsigned long addr)
>  {
> pgtable_t token = pmd_pgtable(*pmd);
> - pmd_clear(pmd);
> + pmd_clear_full(tlb->mm, addr, pmd, tlb->fullmm);
> pte_free_tlb(tlb, token);
> tlb->mm->nr_ptes--;
>  }
> @@ -154,7 +155,7 @@ static inline void free_pmd_range(struct
> next = pmd_addr_end(addr, end);
> if (pmd_none_or_clear_bad(pmd))
> continue;
> -  free_pte_range(tlb, pmd);
> +  free_pte_range(tlb, pmd, addr);
> } while (pmd++, addr = next, addr != end);
>  
> start &= PUD_MASK;
> @@ -169,7 +170,7 @@ static inline void free_pmd_range(struct
> return;
>  
> pmd = pmd_offset(pud, start);
> - pud_clear(pud);
> + pud_clear_full(tlb->mm, start, pud, tlb->fullmm);
> pmd_free_tlb(tlb, pmd);
>  }
>  
> @@ -202,7 +203,7 @@ static inline void free_pud_range(struct
> return;
>  
> pud = pud_offset(pgd, start);
> - pgd_clear(pgd);
> + pgd_clear_full(tlb->mm, start, pgd, tlb->fullmm);
> pud_free_tlb(tlb, pud);
>  }
>  
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC] linux: add p[mug]d_clear_full() accessors
  2008-05-20 14:42 [RFC] linux: add p[mug]d_clear_full() accessors Jan Beulich
  2008-05-20 14:46 ` Keir Fraser
@ 2008-05-20 15:17 ` Jeremy Fitzhardinge
  2008-05-20 15:54   ` Jan Beulich
  1 sibling, 1 reply; 7+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-20 15:17 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

Jan Beulich wrote:
> .. to avoid using hypercalls for clearing dead (un-pinned) page tables.
> This eliminates well over a quarter of a million hypercalls during
> kernel builds (up to about a million depending on the configuration).
>   

Does it have much effect on performance?

> This is against 2.6.25.4, it does not apply to the 2.6.18 tree (I'd
> make this effort only if the patch is desired into that tree). The
> primary goal of sending this is to find out whether the required change
> to include/asm-generic/pgtable.h and mm/memory.c is considered
> worthwhile (in which case I'd make an attempt at getting this accepted
> upstream).
>   

Is this based on your forward-port of the Xen patches?  Stock 2.6.25.4 
doesn't have mm_is_pinned().

In fact, in pvops-Xen, I set PG_pinned on all pinned pagetable pages 
(not just the top level), so you can tell whether you're updating a 
pinned pgd/pud/pmd without needing an mm on hand.  So I think this 
optimisation can be implemented in current Linux entirely within the 
Xen-specific code.

    J

> Signed-off-by: Jan Beulich <jbeulich@novell.com>
>
> Index: head-2008-05-19/include/asm-generic/pgtable.h
> ===================================================================
> --- head-2008-05-19.orig/include/asm-generic/pgtable.h	2008-05-19 10:48:15.000000000 +0200
> +++ head-2008-05-19/include/asm-generic/pgtable.h	2008-05-19 10:57:25.000000000 +0200
> @@ -133,6 +133,18 @@ static inline void ptep_set_wrprotect(st
>  #define move_pte(pte, prot, old_addr, new_addr)	(pte)
>  #endif
>  
> +#ifndef __HAVE_ARCH_PMD_CLEAR_FULL
> +#define pmd_clear_full(mm, addr, pmd, full) pmd_clear(pmd)
> +#endif
> +
> +#ifndef __HAVE_ARCH_PUD_CLEAR_FULL
> +#define pud_clear_full(mm, addr, pud, full) pud_clear(pud)
> +#endif
> +
> +#ifndef __HAVE_ARCH_PGD_CLEAR_FULL
> +#define pgd_clear_full(mm, addr, pgd, full) pgd_clear(pgd)
> +#endif
> +
>  /*
>   * When walking page tables, get the address of the next boundary,
>   * or the end address of the range if that comes earlier.  Although no
> Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable.h
> ===================================================================
> --- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable.h	2008-05-19 10:57:24.000000000 +0200
> +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable.h	2008-05-19 10:57:25.000000000 +0200
> @@ -397,6 +397,22 @@ static inline pte_t ptep_get_and_clear(s
>  
>  pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *, unsigned long, pte_t *, int);
>  
> +#define __HAVE_ARCH_PMD_CLEAR_FULL
> +#define pmd_clear_full(mm, addr, pmd, full)			\
> +	(!mm_is_pinned(mm) ? __xen_pmd_clear(pmd) : xen_pmd_clear(pmd))
> +
> +#ifndef __PAGETABLE_PMD_FOLDED
> +#define __HAVE_ARCH_PUD_CLEAR_FULL
> +#define pud_clear_full(mm, addr, pud, full)			\
> +	(!mm_is_pinned(mm) ? __xen_pud_clear(pud) : xen_pud_clear(pud))
> +#endif
> +
> +#ifndef __PAGETABLE_PUD_FOLDED
> +#define __HAVE_ARCH_PGD_CLEAR_FULL
> +#define pgd_clear_full(mm, addr, pgd, full)			\
> +	(!mm_is_pinned(mm) ? __xen_pgd_clear(pgd) : xen_pgd_clear(pgd))
> +#endif
> +
>  #define __HAVE_ARCH_PTEP_SET_WRPROTECT
>  static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
>  {
> Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-2level.h
> ===================================================================
> --- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable-2level.h	2008-05-19 10:49:58.000000000 +0200
> +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-2level.h	2008-05-19 10:57:25.000000000 +0200
> @@ -35,6 +35,11 @@ static inline void xen_pmd_clear(pmd_t *
>  
>  #define __xen_pte_clear(ptep) xen_set_pte(ptep, __pte(0))
>  
> +static inline void __xen_pmd_clear(pmd_t *pmdp)
> +{
> +	*pmdp = __pmd(0);
> +}
> +
>  #ifdef CONFIG_SMP
>  static inline pte_t xen_ptep_get_and_clear(pte_t *xp, pte_t res)
>  {
> Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-3level.h
> ===================================================================
> --- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h	2008-05-19 10:49:58.000000000 +0200
> +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable-3level.h	2008-05-19 10:57:25.000000000 +0200
> @@ -76,6 +76,11 @@ static inline void xen_pmd_clear(pmd_t *
>  	xen_l2_entry_update(pmd, __pmd(0));
>  }
>  
> +static inline void __xen_pmd_clear(pmd_t *pmd)
> +{
> +	*pmd = __pmd(0);
> +}
> +
>  static inline void pud_clear(pud_t *pudp)
>  {
>  	pgdval_t pgd;
> @@ -96,6 +101,11 @@ static inline void pud_clear(pud_t *pudp
>  		xen_tlb_flush();
>  }
>  
> +static inline void __xen_pud_clear(pud_t *pudp)
> +{
> +	*pudp = __pud(0);
> +}
> +
>  #define pud_page(pud) \
>  ((struct page *) __va(pud_val(pud) & PAGE_MASK))
>  
> Index: head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable_64.h
> ===================================================================
> --- head-2008-05-19.orig/include/asm-x86/mach-xen/asm/pgtable_64.h	2008-05-19 10:57:24.000000000 +0200
> +++ head-2008-05-19/include/asm-x86/mach-xen/asm/pgtable_64.h	2008-05-19 10:57:25.000000000 +0200
> @@ -109,6 +109,11 @@ static inline void xen_pmd_clear(pmd_t *
>  	xen_set_pmd(pmd, xen_make_pmd(0));
>  }
>  
> +static inline void __xen_pmd_clear(pmd_t *pmd)
> +{
> +	*pmd = xen_make_pmd(0);
> +}
> +
>  static inline void xen_set_pud(pud_t *pudp, pud_t pud)
>  {
>  	xen_l3_entry_update(pudp, pud);
> @@ -119,6 +124,11 @@ static inline void xen_pud_clear(pud_t *
>  	xen_set_pud(pud, xen_make_pud(0));
>  }
>  
> +static inline void __xen_pud_clear(pud_t *pud)
> +{
> +	*pud = xen_make_pud(0);
> +}
> +
>  #define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
>  
>  static inline void xen_set_pgd(pgd_t *pgdp, pgd_t pgd)
> @@ -132,6 +142,12 @@ static inline void xen_pgd_clear(pgd_t *
>  	xen_set_pgd(__user_pgd(pgd), xen_make_pgd(0));
>  }
>  
> +static inline void __xen_pgd_clear(pgd_t *pgd)
> +{
> +	*pgd = xen_make_pgd(0);
> +	*__user_pgd(pgd) = xen_make_pgd(0);
> +}
> +
>  #define pte_same(a, b)		((a).pte == (b).pte)
>  
>  #endif /* !__ASSEMBLY__ */
> Index: head-2008-05-19/mm/memory.c
> ===================================================================
> --- head-2008-05-19.orig/mm/memory.c	2008-05-19 10:49:46.000000000 +0200
> +++ head-2008-05-19/mm/memory.c	2008-05-19 10:57:25.000000000 +0200
> @@ -132,10 +132,11 @@ void pmd_clear_bad(pmd_t *pmd)
>   * Note: this doesn't free the actual pages themselves. That
>   * has been handled earlier when unmapping all the memory regions.
>   */
> -static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
> +static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
> +			   unsigned long addr)
>  {
>  	pgtable_t token = pmd_pgtable(*pmd);
> -	pmd_clear(pmd);
> +	pmd_clear_full(tlb->mm, addr, pmd, tlb->fullmm);
>  	pte_free_tlb(tlb, token);
>  	tlb->mm->nr_ptes--;
>  }
> @@ -154,7 +155,7 @@ static inline void free_pmd_range(struct
>  		next = pmd_addr_end(addr, end);
>  		if (pmd_none_or_clear_bad(pmd))
>  			continue;
> -		free_pte_range(tlb, pmd);
> +		free_pte_range(tlb, pmd, addr);
>  	} while (pmd++, addr = next, addr != end);
>  
>  	start &= PUD_MASK;
> @@ -169,7 +170,7 @@ static inline void free_pmd_range(struct
>  		return;
>  
>  	pmd = pmd_offset(pud, start);
> -	pud_clear(pud);
> +	pud_clear_full(tlb->mm, start, pud, tlb->fullmm);
>  	pmd_free_tlb(tlb, pmd);
>  }
>  
> @@ -202,7 +203,7 @@ static inline void free_pud_range(struct
>  		return;
>  
>  	pud = pud_offset(pgd, start);
> -	pgd_clear(pgd);
> +	pgd_clear_full(tlb->mm, start, pgd, tlb->fullmm);
>  	pud_free_tlb(tlb, pud);
>  }
>  
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
>   

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC] linux: add p[mug]d_clear_full() accessors
  2008-05-20 15:17 ` Jeremy Fitzhardinge
@ 2008-05-20 15:54   ` Jan Beulich
  2008-05-20 20:03     ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2008-05-20 15:54 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: xen-devel

>>> Jeremy Fitzhardinge <jeremy@goop.org> 20.05.08 17:17 >>>
>Is this based on your forward-port of the Xen patches?  Stock 2.6.25.4 
>doesn't have mm_is_pinned().

Yes.

>In fact, in pvops-Xen, I set PG_pinned on all pinned pagetable pages 
>(not just the top level), so you can tell whether you're updating a 
>pinned pgd/pud/pmd without needing an mm on hand.  So I think this 
>optimisation can be implemented in current Linux entirely within the 
>Xen-specific code.

Ah, I didn't realize that so far. However, 64-bit doesn't use a PG_*
flag for identifying pinned mm-s, because of a conflict in use of
PG_arch_1 in those older kernel versions. But as the conflict is gone
in recent kernel, I should indeed be able to synchronize 64-bit with
32-bit here, then follow your approach of marking all pinned page
tables, and finally get away without adding a new set of abstractions.

One question though - in our 2.6.23 merge (where pv-ops-Xen's
PG_pinned appeared as an alias of PG_owner_priv_1, and where
PG_arch_1 got assigned a meaning for native x86, so PG_pinned
for the traditional patches needed to change anyway) I intentionally
didn't follow pv-ops for our patches, since PG_pinned is among the
flags bad_page() checks for (in the XenSource tree, and I think this
should really also be done in upstream Linux), and hence re-using
the bit here would change behavior for other parts of the kernel.

Jan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC] linux: add p[mug]d_clear_full() accessors
  2008-05-20 15:54   ` Jan Beulich
@ 2008-05-20 20:03     ` Jeremy Fitzhardinge
  2008-05-21  6:47       ` Jan Beulich
  0 siblings, 1 reply; 7+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-20 20:03 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

Jan Beulich wrote:
>> In fact, in pvops-Xen, I set PG_pinned on all pinned pagetable pages 
>> (not just the top level), so you can tell whether you're updating a 
>> pinned pgd/pud/pmd without needing an mm on hand.  So I think this 
>> optimisation can be implemented in current Linux entirely within the 
>> Xen-specific code.
>>     
>
> Ah, I didn't realize that so far. However, 64-bit doesn't use a PG_*
> flag for identifying pinned mm-s, because of a conflict in use of
> PG_arch_1 in those older kernel versions. But as the conflict is gone
> in recent kernel, I should indeed be able to synchronize 64-bit with
> 32-bit here, then follow your approach of marking all pinned page
> tables, and finally get away without adding a new set of abstractions.
>   

Yes, 64-bit has masses of free page flags, so we could allocate new ones 
if need be.  But owner_priv_1 is definitely available for Xen's use, so 
it may as well be common (indeed, I'd expect all the existing code to 
just work on 64-bit at the moment, though perhaps its missing some 
4th-level pagetable stuff in places, and testing).

> One question though - in our 2.6.23 merge (where pv-ops-Xen's
> PG_pinned appeared as an alias of PG_owner_priv_1, and where
> PG_arch_1 got assigned a meaning for native x86, so PG_pinned
> for the traditional patches needed to change anyway) I intentionally
> didn't follow pv-ops for our patches, since PG_pinned is among the
> flags bad_page() checks for (in the XenSource tree, and I think this
> should really also be done in upstream Linux), and hence re-using
> the bit here would change behavior for other parts of the kernel.

I don't think so.  So long as its clear by the time you free the page, 
it doesn't matter how it gets used in the meantime (after all, you 
should never free a pinned pagetable page).

In general, almost all the page flags are available for our use in 
pagetable pages.  The pages are privately owned by Xen, and don't 
participate in any of the mm/ code, so there's no conflict if we use the 
bits for something else.  For example, in my Xen save/restore patches, 
I've overloaded PG_dirty to mean pinned-for-suspend, and there's no 
conflict with any other users.

    J

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC] linux: add p[mug]d_clear_full() accessors
  2008-05-20 20:03     ` Jeremy Fitzhardinge
@ 2008-05-21  6:47       ` Jan Beulich
  2008-05-21  9:02         ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2008-05-21  6:47 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: xen-devel

>> One question though - in our 2.6.23 merge (where pv-ops-Xen's
>> PG_pinned appeared as an alias of PG_owner_priv_1, and where
>> PG_arch_1 got assigned a meaning for native x86, so PG_pinned
>> for the traditional patches needed to change anyway) I intentionally
>> didn't follow pv-ops for our patches, since PG_pinned is among the
>> flags bad_page() checks for (in the XenSource tree, and I think this
>> should really also be done in upstream Linux), and hence re-using
>> the bit here would change behavior for other parts of the kernel.
>
>I don't think so.  So long as its clear by the time you free the page, 
>it doesn't matter how it gets used in the meantime (after all, you 
>should never free a pinned pagetable page).

But isn't that one of the purposes of those page flags checks - checking
whether e.g. a page may validly get freed (free_pages_check())? All
of bad_page(), free_pages_check(), and prep_new_page() currently
add PG_pinned into the set of flags cleared/checked, and I continue to
think that that is the right thing to do.

Jan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC] linux: add p[mug]d_clear_full() accessors
  2008-05-21  6:47       ` Jan Beulich
@ 2008-05-21  9:02         ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 7+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-21  9:02 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

Jan Beulich wrote:
>>> One question though - in our 2.6.23 merge (where pv-ops-Xen's
>>> PG_pinned appeared as an alias of PG_owner_priv_1, and where
>>> PG_arch_1 got assigned a meaning for native x86, so PG_pinned
>>> for the traditional patches needed to change anyway) I intentionally
>>> didn't follow pv-ops for our patches, since PG_pinned is among the
>>> flags bad_page() checks for (in the XenSource tree, and I think this
>>> should really also be done in upstream Linux), and hence re-using
>>> the bit here would change behavior for other parts of the kernel.
>>>       
>> I don't think so.  So long as its clear by the time you free the page, 
>> it doesn't matter how it gets used in the meantime (after all, you 
>> should never free a pinned pagetable page).
>>     
>
> But isn't that one of the purposes of those page flags checks - checking
> whether e.g. a page may validly get freed (free_pages_check())? All
> of bad_page(), free_pages_check(), and prep_new_page() currently
> add PG_pinned into the set of flags cleared/checked, and I continue to
> think that that is the right thing to do.

Well, yes, it would be helpful for free_pages_check to test whether 
PG_owner_priv_1 is set on a page you're attempting to free, since that 
would definitely be a bug.  But its also a bug which would turn up 
fairly quickly anyway, since Xen would complain about any subsequent 
users of that page.  Regardless, its not actually a bug that has turned 
up, since the lifespan of pagetable pages is pretty well controlled, and 
there's only a couple of places where they get allocated and freed.

So, not a big issue either way, I think.

    J

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2008-05-21  9:02 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-05-20 14:42 [RFC] linux: add p[mug]d_clear_full() accessors Jan Beulich
2008-05-20 14:46 ` Keir Fraser
2008-05-20 15:17 ` Jeremy Fitzhardinge
2008-05-20 15:54   ` Jan Beulich
2008-05-20 20:03     ` Jeremy Fitzhardinge
2008-05-21  6:47       ` Jan Beulich
2008-05-21  9:02         ` Jeremy Fitzhardinge

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.