* [PATCH] 8/8 Create MMU 2/3 level accessors in the sub-arch layer (i386)
@ 2005-08-06 7:23 Zachary Amsden
2005-08-07 1:16 ` Chris Wright
0 siblings, 1 reply; 10+ messages in thread
From: Zachary Amsden @ 2005-08-06 7:23 UTC (permalink / raw)
To: akpm, chrisw, linux-kernel, davej, hpa, Riley, pratap, zach,
chrisl
[-- Attachment #1: Type: text/plain, Size: 1 bytes --]
[-- Attachment #2: subarch-mmu --]
[-- Type: text/plain, Size: 11102 bytes --]
i386 Transparent paravirtualization sub-arch patch #8.
Transparent paravirtualization support for MMU operations.
All operations which update live page table entries have been moved to the
sub-architecture layer. Unfortunately, this required yet another parallel set
of pgtable-Nlevel-ops.h files, but this avoids the ugliness of having to use
#ifdef's all of the code.
This is pure code motion. Anything else would be a bug.
Signed-off-by: Zachary Amsden <zach@vmware.com>
Index: linux-2.6.13/include/asm-i386/pgtable-2level.h
===================================================================
--- linux-2.6.13.orig/include/asm-i386/pgtable-2level.h 2005-08-04 13:42:31.000000000 -0700
+++ linux-2.6.13/include/asm-i386/pgtable-2level.h 2005-08-04 14:02:16.000000000 -0700
@@ -8,17 +8,6 @@
#define pgd_ERROR(e) \
printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
-/*
- * Certain architectures need to do special things when PTEs
- * within a page table are directly modified. Thus, the following
- * hook is made available.
- */
-#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
-#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
-#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
-
-#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte_low, 0))
#define pte_same(a, b) ((a).pte_low == (b).pte_low)
#define pte_page(x) pfn_to_page(pte_pfn(x))
#define pte_none(x) (!(x).pte_low)
Index: linux-2.6.13/include/asm-i386/pgtable-3level.h
===================================================================
--- linux-2.6.13.orig/include/asm-i386/pgtable-3level.h 2005-08-04 13:42:31.000000000 -0700
+++ linux-2.6.13/include/asm-i386/pgtable-3level.h 2005-08-04 14:02:16.000000000 -0700
@@ -44,28 +44,6 @@
return pte_x(pte);
}
-/* Rules for using set_pte: the pte being assigned *must* be
- * either not present or in a state where the hardware will
- * not attempt to update the pte. In places where this is
- * not possible, use pte_get_and_clear to obtain the old pte
- * value and then use set_pte to update it. -ben
- */
-static inline void set_pte(pte_t *ptep, pte_t pte)
-{
- ptep->pte_high = pte.pte_high;
- smp_wmb();
- ptep->pte_low = pte.pte_low;
-}
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
-
-#define __HAVE_ARCH_SET_PTE_ATOMIC
-#define set_pte_atomic(pteptr,pteval) \
- set_64bit((unsigned long long *)(pteptr),pte_val(pteval))
-#define set_pmd(pmdptr,pmdval) \
- set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval))
-#define set_pud(pudptr,pudval) \
- (*(pudptr) = (pudval))
-
/*
* Pentium-II erratum A13: in PAE mode we explicitly have to flush
* the TLB via cr3 if the top-level pgd is changed...
@@ -90,18 +68,6 @@
#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
pmd_index(address))
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- pte_t res;
-
- /* xchg acts as a barrier before the setting of the high bits */
- res.pte_low = xchg(&ptep->pte_low, 0);
- res.pte_high = ptep->pte_high;
- ptep->pte_high = 0;
-
- return res;
-}
-
static inline int pte_same(pte_t a, pte_t b)
{
return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
Index: linux-2.6.13/include/asm-i386/pgtable.h
===================================================================
--- linux-2.6.13.orig/include/asm-i386/pgtable.h 2005-08-04 13:47:07.000000000 -0700
+++ linux-2.6.13/include/asm-i386/pgtable.h 2005-08-04 14:02:29.000000000 -0700
@@ -201,11 +201,9 @@
extern unsigned long pg0[];
#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
#define pmd_none(x) (!pmd_val(x))
#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
-#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
@@ -243,20 +241,12 @@
#else
# include <asm/pgtable-2level.h>
#endif
+#include <pgtable-ops.h>
+
+#define set_pte_at(mm,addr,pteptr,pteval) set_pte(pteptr,pteval)
-static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
-{
- if (!pte_dirty(*ptep))
- return 0;
- return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
-}
-
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
-{
- if (!pte_young(*ptep))
- return 0;
- return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low);
-}
+#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
+#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
{
@@ -270,26 +260,6 @@
return pte;
}
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
-}
-
-/*
- * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
- *
- * dst - pointer to pgd range anwhere on a pgd page
- * src - ""
- * count - the number of pgds to copy.
- *
- * dst and src can be on the same page, but the range must not overlap,
- * and must not cross a page boundary.
- */
-static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
-{
- memcpy(dst, src, count * sizeof(pgd_t));
-}
-
/*
* Macro to mark a page protection value as "uncacheable". On processors which do not support
* it, this is a no-op.
@@ -414,14 +384,6 @@
* bit at the same time.
*/
#define update_mmu_cache(vma,address,pte) do { } while (0)
-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
- do { \
- if (__dirty) { \
- (__ptep)->pte_low = (__entry).pte_low; \
- flush_tlb_page(__vma, __address); \
- } \
- } while (0)
#endif /* !__ASSEMBLY__ */
Index: linux-2.6.13/include/asm-i386/mach-default/pgtable-2level-ops.h
===================================================================
--- linux-2.6.13.orig/include/asm-i386/mach-default/pgtable-2level-ops.h 2005-08-04 14:02:04.000000000 -0700
+++ linux-2.6.13/include/asm-i386/mach-default/pgtable-2level-ops.h 2005-08-04 14:02:16.000000000 -0700
@@ -0,0 +1,15 @@
+#ifndef _MACH_PGTABLE_LEVEL_OPS_H
+#define _MACH_PGTABLE_LEVEL_OPS_H
+
+/*
+ * Certain architectures need to do special things when PTEs
+ * within a page table are directly modified. Thus, the following
+ * hook is made available.
+ */
+#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
+#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+
+#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte_low, 0))
+
+#endif /* _PGTABLE_OPS_H */
Index: linux-2.6.13/include/asm-i386/mach-default/pgtable-3level-ops.h
===================================================================
--- linux-2.6.13.orig/include/asm-i386/mach-default/pgtable-3level-ops.h 2005-08-04 14:02:04.000000000 -0700
+++ linux-2.6.13/include/asm-i386/mach-default/pgtable-3level-ops.h 2005-08-04 14:02:16.000000000 -0700
@@ -0,0 +1,37 @@
+#ifndef _MACH_PGTABLE_LEVEL_OPS_H
+#define _MACH_PGTABLE_LEVEL_OPS_H
+
+/* Rules for using set_pte: the pte being assigned *must* be
+ * either not present or in a state where the hardware will
+ * not attempt to update the pte. In places where this is
+ * not possible, use pte_get_and_clear to obtain the old pte
+ * value and then use set_pte to update it. -ben
+ */
+static inline void set_pte(pte_t *ptep, pte_t pte)
+{
+ ptep->pte_high = pte.pte_high;
+ smp_wmb();
+ ptep->pte_low = pte.pte_low;
+}
+
+#define __HAVE_ARCH_SET_PTE_ATOMIC
+#define set_pte_atomic(pteptr,pteval) \
+ set_64bit((unsigned long long *)(pteptr),pte_val(pteval))
+#define set_pmd(pmdptr,pmdval) \
+ set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval))
+#define set_pud(pudptr,pudval) \
+ (*(pudptr) = (pudval))
+
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ pte_t res;
+
+ /* xchg acts as a barrier before the setting of the high bits */
+ res.pte_low = xchg(&ptep->pte_low, 0);
+ res.pte_high = ptep->pte_high;
+ ptep->pte_high = 0;
+
+ return res;
+}
+
+#endif
Index: linux-2.6.13/include/asm-i386/mach-default/pgtable-ops.h
===================================================================
--- linux-2.6.13.orig/include/asm-i386/mach-default/pgtable-ops.h 2005-08-04 14:02:04.000000000 -0700
+++ linux-2.6.13/include/asm-i386/mach-default/pgtable-ops.h 2005-08-04 14:02:44.000000000 -0700
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2005, VMware, Inc.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to zach@vmware.com
+ *
+ */
+
+#ifndef _PGTABLE_OPS_H
+#define _PGTABLE_OPS_H
+
+#ifdef CONFIG_X86_PAE
+# include <pgtable-3level-ops.h>
+#else
+# include <pgtable-2level-ops.h>
+#endif
+
+static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+{
+ if (!pte_dirty(*ptep))
+ return 0;
+ return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
+}
+
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+{
+ if (!pte_young(*ptep))
+ return 0;
+ return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low);
+}
+
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
+}
+
+/*
+ * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
+ *
+ * dst - pointer to pgd range anwhere on a pgd page
+ * src - ""
+ * count - the number of pgds to copy.
+ *
+ * dst and src can be on the same page, but the range must not overlap,
+ * and must not cross a page boundary.
+ */
+static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+{
+ memcpy(dst, src, count * sizeof(pgd_t));
+}
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
+ do { \
+ if (__dirty) { \
+ (__ptep)->pte_low = (__entry).pte_low; \
+ flush_tlb_page(__vma, __address); \
+ } \
+ } while (0)
+
+#endif /* _PGTABLE_OPS_H */
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] 8/8 Create MMU 2/3 level accessors in the sub-arch layer (i386)
[not found] <42F46558.9010202@vmware.com.suse.lists.linux.kernel>
@ 2005-08-06 11:37 ` Andi Kleen
2005-08-06 11:56 ` Christoph Hellwig
0 siblings, 1 reply; 10+ messages in thread
From: Andi Kleen @ 2005-08-06 11:37 UTC (permalink / raw)
To: Zachary Amsden; +Cc: linux-kernel
Zachary Amsden <zach@vmware.com> writes:
> i386 Transparent paravirtualization sub-arch patch #8.
>
> Transparent paravirtualization support for MMU operations.
>
> All operations which update live page table entries have been moved to the
> sub-architecture layer. Unfortunately, this required yet another parallel set
> of pgtable-Nlevel-ops.h files, but this avoids the ugliness of having to use
> #ifdef's all of the code.
>
> This is pure code motion. Anything else would be a bug.
I think that patch is really ugly - it makes hacking VM on i386
even more painful than it already is because the convolutes the file
structure even more. Hope it is not applied.
-Andi
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] 8/8 Create MMU 2/3 level accessors in the sub-arch layer (i386)
2005-08-06 11:37 ` [PATCH] 8/8 Create MMU 2/3 level accessors in the sub-arch layer (i386) Andi Kleen
@ 2005-08-06 11:56 ` Christoph Hellwig
2005-08-06 11:58 ` Andi Kleen
0 siblings, 1 reply; 10+ messages in thread
From: Christoph Hellwig @ 2005-08-06 11:56 UTC (permalink / raw)
To: Andi Kleen; +Cc: Zachary Amsden, linux-kernel
On Sat, Aug 06, 2005 at 01:37:31PM +0200, Andi Kleen wrote:
> Zachary Amsden <zach@vmware.com> writes:
>
> > i386 Transparent paravirtualization sub-arch patch #8.
> >
> > Transparent paravirtualization support for MMU operations.
> >
> > All operations which update live page table entries have been moved to the
> > sub-architecture layer. Unfortunately, this required yet another parallel set
> > of pgtable-Nlevel-ops.h files, but this avoids the ugliness of having to use
> > #ifdef's all of the code.
> >
> > This is pure code motion. Anything else would be a bug.
>
> I think that patch is really ugly - it makes hacking VM on i386
> even more painful than it already is because the convolutes the file
> structure even more. Hope it is not applied.
Especially as there's been no user shown for it, similar to all the other
ugly patches from vmware.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] 8/8 Create MMU 2/3 level accessors in the sub-arch layer (i386)
2005-08-06 11:56 ` Christoph Hellwig
@ 2005-08-06 11:58 ` Andi Kleen
2005-08-06 12:01 ` Christoph Hellwig
0 siblings, 1 reply; 10+ messages in thread
From: Andi Kleen @ 2005-08-06 11:58 UTC (permalink / raw)
To: Christoph Hellwig, Andi Kleen, Zachary Amsden, linux-kernel
> > I think that patch is really ugly - it makes hacking VM on i386
> > even more painful than it already is because the convolutes the file
> > structure even more. Hope it is not applied.
>
> Especially as there's been no user shown for it, similar to all the other
> ugly patches from vmware.
Well, some of it can be counted as cleanup or even tuning like the excellent
switch_to patch. But not that one and some of the more intrusive patches.
-Andi
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] 8/8 Create MMU 2/3 level accessors in the sub-arch layer (i386)
2005-08-06 11:58 ` Andi Kleen
@ 2005-08-06 12:01 ` Christoph Hellwig
2005-08-06 18:28 ` Zachary Amsden
0 siblings, 1 reply; 10+ messages in thread
From: Christoph Hellwig @ 2005-08-06 12:01 UTC (permalink / raw)
To: Andi Kleen; +Cc: Christoph Hellwig, Zachary Amsden, linux-kernel
On Sat, Aug 06, 2005 at 01:58:36PM +0200, Andi Kleen wrote:
> > > I think that patch is really ugly - it makes hacking VM on i386
> > > even more painful than it already is because the convolutes the file
> > > structure even more. Hope it is not applied.
> >
> > Especially as there's been no user shown for it, similar to all the other
> > ugly patches from vmware.
>
> Well, some of it can be counted as cleanup or even tuning like the excellent
> switch_to patch. But not that one and some of the more intrusive patches.
Yeah, I said ugly ones specificly. There's been some nice previous ones,
but most in this series (all the move of stuff to subarches) are rather
horrible and lack lots of explanation.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] 8/8 Create MMU 2/3 level accessors in the sub-arch layer (i386)
2005-08-06 12:01 ` Christoph Hellwig
@ 2005-08-06 18:28 ` Zachary Amsden
2005-08-06 22:58 ` Andrew Morton
0 siblings, 1 reply; 10+ messages in thread
From: Zachary Amsden @ 2005-08-06 18:28 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Andi Kleen, linux-kernel, Rik van Riel, Chris Wright,
Pratap Subrahmanyam
Christoph Hellwig wrote:
>On Sat, Aug 06, 2005 at 01:58:36PM +0200, Andi Kleen wrote:
>
>
>>>>I think that patch is really ugly - it makes hacking VM on i386
>>>>even more painful than it already is because the convolutes the file
>>>>structure even more. Hope it is not applied.
>>>>
>>>>
>>>Especially as there's been no user shown for it, similar to all the other
>>>ugly patches from vmware.
>>>
>>>
>>Well, some of it can be counted as cleanup or even tuning like the excellent
>>switch_to patch. But not that one and some of the more intrusive patches.
>>
>>
>
>Yeah, I said ugly ones specificly. There's been some nice previous ones,
>but most in this series (all the move of stuff to subarches) are rather
>horrible and lack lots of explanation.
>
>
All of my previous patches have been aimed at fixing bugs, improving
performance, reliability and maintinability of the i386 architecture.
If you found something that didn't fit one of those categories in my
previous patches, then it is either not well enough explained or perhaps
inadvertently slipped through from one of my more radical trees - or it
could be a bug.
There is a simple explanation for all of this series. The goal is to
move all privileged instructions, sensitive instructions, and privilege
awareness into a layer where it can be overridden by new code without
disrupting the default architecture. On x86, there are a lot of
instructions - popf, iret, sgdt, and others which behave differently
under different privilege levels, but do not trap. These architectural
features must be redefined by any architecture which virtualizes the
x86, be it Xen, UML, or an alternative approach. Similarly, certain
privileged processor data structures (page tables, descriptor tables)
must be protected and accessed in a different manner if one is to
utilize the principles of paravirtualization to achieve high performance
inside of a virtual machine. I believe this series of patches is one
almost clean solution to this. There are obvious problems with the MMU
patch, and I'm still trying to come up with a way to properly address that.
That said, I am definitely seeking any feedback you have on how to
achieve this goal while being as clean and maintainable as possible - if
the Linux community is indeed interested in adopting a
paravirtualization approach. Looking from the most general view
possible, there are a couple of ways to do this in Linux:
1) Create a new architecture. This is the UML approach, and while it
has been successful there, it is difficult to maintain closeness to the
hardware architecture without introducing a maintenance burden. This
closeness is desirable if one is to achieve high performance and take
advantage of more processor specific features.
2) Use the sub-architecture strategy of x86. This approach has a
relatively small set of code movement to allow a new virtualized
sub-architecture to redefine the privileged and sensitive operations of
the processor, as well as to implement easily defined architectural
hints which employ higher level virtualization strategies.
3) Use #ifdef'd include/asm-i386 header files. While workable, this has
flaws - it is ugly, and it causes the hypervisor header files to sneak
into include/asm-i386 rather rapidly destroying maintainability for the
native code. This has been attempted before, but if someone were to
send those patches to LKML, I would expect them to be rapidly tarred and
feathered.
4) Clone entire asm-i386 header files and replace them using an include
path, potentially in the sub-architecture level. While this avoids any
diffs at all to the native asm-i386 headers, it needlessly duplicates a
lot of code, and this creates a greater maintenance burden for
somebody. Who that body is can be determined later, but this creates a
lot of extra work for that unfortunate person that is wasted time that
could easily be spent bettering Linux!
5) Use the HAVE_SUBARCH_PTE_ACCESSOR type approach (similar to the way
generic optimizable PTE operations are defined in asm-generic). I have
not yet investigated the feasibility of this type of approach, but it
seems workable. At least for the MMU patch, some combination of this
and other techniques might help make things a lot cleaner.
Do you have ideas? I'm open to all suggestions here. The only goal I
have is to make high performance virtualization support in Linux the
least disruptive event possible for all parties. Although I'm a bit
biased towards i386 from a coding point of view, some of these ideas can
cross architecture boundaries as well, so I'm open to feedback from all
parties.
Thanks,
Zach
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] 8/8 Create MMU 2/3 level accessors in the sub-arch layer (i386)
2005-08-06 18:28 ` Zachary Amsden
@ 2005-08-06 22:58 ` Andrew Morton
2005-08-06 23:02 ` Chris Wright
2005-08-07 10:39 ` Zachary Amsden
0 siblings, 2 replies; 10+ messages in thread
From: Andrew Morton @ 2005-08-06 22:58 UTC (permalink / raw)
To: Zachary Amsden; +Cc: hch, ak, linux-kernel, riel, chrisw, pratap
Zachary Amsden <zach@vmware.com> wrote:
>
> >Yeah, I said ugly ones specificly. There's been some nice previous ones,
> >but most in this series (all the move of stuff to subarches) are rather
> >horrible and lack lots of explanation.
> >
> >
>
> All of my previous patches have been aimed at fixing bugs, improving
> performance, reliability and maintinability of the i386 architecture.
Yup, with one or two semi-exceptions, all the patches up to this series
seem to be good general cleanups - certainly it's good to move all those
open-coded asm statements into single-site inlines and macros: people keep
on screwing them up.
We do need to wake the Xen poeple up, make sure that these changes suit
them as well, or at least don't screw them over (hard to see how it could
though).
> If you found something that didn't fit one of those categories in my
> previous patches, then it is either not well enough explained or perhaps
> inadvertently slipped through from one of my more radical trees - or it
> could be a bug.
>
> There is a simple explanation for all of this series. The goal is to
> move all privileged instructions, sensitive instructions, and privilege
> awareness into a layer where it can be overridden by new code without
> disrupting the default architecture. On x86, there are a lot of
> instructions - popf, iret, sgdt, and others which behave differently
> under different privilege levels, but do not trap. These architectural
> features must be redefined by any architecture which virtualizes the
> x86, be it Xen, UML, or an alternative approach. Similarly, certain
> privileged processor data structures (page tables, descriptor tables)
> must be protected and accessed in a different manner if one is to
> utilize the principles of paravirtualization to achieve high performance
> inside of a virtual machine. I believe this series of patches is one
> almost clean solution to this. There are obvious problems with the MMU
> patch, and I'm still trying to come up with a way to properly address that.
Yes this later series is in a different category and will require more
review/coordination/discussion/waking-up from the Xen team.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] 8/8 Create MMU 2/3 level accessors in the sub-arch layer (i386)
2005-08-06 22:58 ` Andrew Morton
@ 2005-08-06 23:02 ` Chris Wright
2005-08-07 10:39 ` Zachary Amsden
1 sibling, 0 replies; 10+ messages in thread
From: Chris Wright @ 2005-08-06 23:02 UTC (permalink / raw)
To: Andrew Morton; +Cc: Zachary Amsden, hch, ak, linux-kernel, riel, chrisw, pratap
* Andrew Morton (akpm@osdl.org) wrote:
> Yup, with one or two semi-exceptions, all the patches up to this series
> seem to be good general cleanups - certainly it's good to move all those
> open-coded asm statements into single-site inlines and macros: people keep
> on screwing them up.
I agree.
> We do need to wake the Xen poeple up, make sure that these changes suit
> them as well, or at least don't screw them over (hard to see how it could
> though).
I have a series of similar patches that I've done for Xen that I'll
post shortly.
thanks,
-chris
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] 8/8 Create MMU 2/3 level accessors in the sub-arch layer (i386)
2005-08-06 7:23 Zachary Amsden
@ 2005-08-07 1:16 ` Chris Wright
0 siblings, 0 replies; 10+ messages in thread
From: Chris Wright @ 2005-08-07 1:16 UTC (permalink / raw)
To: Zachary Amsden
Cc: akpm, chrisw, linux-kernel, davej, hpa, Riley, pratap, chrisl
* Zachary Amsden (zach@vmware.com) wrote:
> All operations which update live page table entries have been moved to the
> sub-architecture layer. Unfortunately, this required yet another parallel set
> of pgtable-Nlevel-ops.h files, but this avoids the ugliness of having to use
> #ifdef's all of the code.
I hit similar issue...
--- linux-2.6.12-xen0-arch.orig/include/asm-i386/pgtable.h
+++ linux-2.6.12-xen0-arch/include/asm-i386/pgtable.h
@@ -200,15 +200,6 @@ extern unsigned long long __PAGE_KERNEL,
/* The boot page tables (all created as a single array) */
extern unsigned long pg0[];
-#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
-
-#define pmd_none(x) (!pmd_val(x))
-#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
-#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
-#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
-
-
#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
/*
@@ -237,6 +228,17 @@ static inline pte_t pte_mkdirty(pte_t pt
static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }
+#include <mach_pgtable.h>
+
+#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
+#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
+
+#define pmd_none(x) (!pmd_val(x))
+#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
+#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
+#define pmd_bad(x) mach_pmd_bad(x)
+
+
#ifdef CONFIG_X86_PAE
# include <asm/pgtable-3level.h>
#else
@@ -257,10 +259,7 @@ static inline int ptep_test_and_clear_yo
return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low);
}
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
-}
+#define ptep_set_wrprotect(mm,addr,ptep) mach_ptep_set_wrprotect(mm,addr,ptep)
/*
* Macro to mark a page protection value as "uncacheable". On processors which do not support
@@ -363,9 +362,9 @@ extern void noexec_setup(const char *str
#if defined(CONFIG_HIGHPTE)
#define pte_offset_map(dir, address) \
- ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
+ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
#define pte_offset_map_nested(dir, address) \
- ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
+ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
#else
@@ -403,10 +402,10 @@ extern void noexec_setup(const char *str
#endif /* !CONFIG_DISCONTIGMEM */
#define io_remap_page_range(vma, vaddr, paddr, size, prot) \
- remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot)
+ mach_io_remap_page_range(vma, vaddr, paddr, size, prot)
#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
- remap_pfn_range(vma, vaddr, pfn, size, prot)
+ mach_io_remap_pfn_range(vma, vaddr, pfn, size, prot)
#define MK_IOSPACE_PFN(space, pfn) (pfn)
#define GET_IOSPACE(pfn) 0
--- linux-2.6.12-xen0-arch.orig/include/asm-i386/pgtable-2level.h
+++ linux-2.6.12-xen0-arch/include/asm-i386/pgtable-2level.h
@@ -2,6 +2,7 @@
#define _I386_PGTABLE_2LEVEL_H
#include <asm-generic/pgtable-nopmd.h>
+#include <mach_pgtable-2level.h>
#define pte_ERROR(e) \
printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low)
@@ -16,13 +17,13 @@
#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
-#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+#define set_pmd(pmdptr, pmdval) mach_set_pmd(pmdptr, pmdval)
-#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte_low, 0))
+#define ptep_get_and_clear(mm,addr,xp) mach_ptep_get_and_clear(mm,addr,xp)
#define pte_same(a, b) ((a).pte_low == (b).pte_low)
-#define pte_page(x) pfn_to_page(pte_pfn(x))
+#define pte_page(x) mach_pte_page(x)
#define pte_none(x) (!(x).pte_low)
-#define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT)))
+#define pte_pfn(x) mach_pte_pfn(x)
#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
--- linux-2.6.12-xen0-arch.orig/include/asm-i386/pgtable-3level-defs.h
+++ linux-2.6.12-xen0-arch/include/asm-i386/pgtable-3level-defs.h
@@ -1,7 +1,7 @@
#ifndef _I386_PGTABLE_3LEVEL_DEFS_H
#define _I386_PGTABLE_3LEVEL_DEFS_H
-#define HAVE_SHARED_KERNEL_PMD 1
+#include <mach_pgtable-3level-defs.h>
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
--- /dev/null
+++ linux-2.6.12-xen0-arch/include/asm-i386/mach-default/mach_pgtable.h
@@ -0,0 +1,18 @@
+#ifndef __ASM_MACH_PGTABLE_H
+#define __ASM_MACH_PGTABLE_H
+
+#define mach_pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
+#define mach_pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+
+static inline void mach_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
+}
+
+#define mach_io_remap_page_range(vma, vaddr, paddr, size, prot) \
+ remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot)
+
+#define mach_io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
+ remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+#endif
--- /dev/null
+++ linux-2.6.12-xen0-arch/include/asm-i386/mach-default/mach_pgtable-2level.h
@@ -0,0 +1,10 @@
+#ifndef _ASM_MACH_PGTABLE_2LEVEL_H
+#define _ASM_MACH_PGTABLE_2LEVEL_H
+
+#define mach_set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+
+#define mach_ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte_low, 0))
+#define mach_pte_page(x) pfn_to_page(pte_pfn(x))
+#define mach_pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT)))
+
+#endif
--- /dev/null
+++ linux-2.6.12-xen0-arch/include/asm-i386/mach-default/mach_pgtable-3level-defs.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_MACH_PGTABLE_3LEVEL_DEFS_H
+#define _ASM_MACH_PGTABLE_3LEVEL_DEFS_H
+
+#define HAVE_SHARED_KERNEL_PMD 1
+
+#endif
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] 8/8 Create MMU 2/3 level accessors in the sub-arch layer (i386)
2005-08-06 22:58 ` Andrew Morton
2005-08-06 23:02 ` Chris Wright
@ 2005-08-07 10:39 ` Zachary Amsden
1 sibling, 0 replies; 10+ messages in thread
From: Zachary Amsden @ 2005-08-07 10:39 UTC (permalink / raw)
To: Andrew Morton; +Cc: hch, ak, linux-kernel, riel, chrisw, pratap
Andrew Morton wrote:
>Zachary Amsden <zach@vmware.com> wrote:
>
>
>>>Yeah, I said ugly ones specificly. There's been some nice previous ones,
>>>
>>>
>> >but most in this series (all the move of stuff to subarches) are rather
>> >horrible and lack lots of explanation.
>> >
>> >
>>
>> All of my previous patches have been aimed at fixing bugs, improving
>> performance, reliability and maintinability of the i386 architecture.
>>
>>
>
>Yup, with one or two semi-exceptions, all the patches up to this series
>seem to be good general cleanups - certainly it's good to move all those
>open-coded asm statements into single-site inlines and macros: people keep
>on screwing them up.
>
>We do need to wake the Xen poeple up, make sure that these changes suit
>them as well, or at least don't screw them over (hard to see how it could
>though).
>
>
This patch in particular is still quite controversial. I know at least
Andi has objections (quite valid) to the way PAE/non-PAE was dissected,
and I would definitely like to address these concerns. Although I have
no objection to you committing it to the mm tree right now, please be
advised that Chris Wright and I will have to converge quite a bit on
this patch, and will likely be doing a substantial amount of rework here
to work out Xen compatibilty issues as well as general cleanliness. If
it is more convenient for you to live without that churn, by all means
feel free to, and we can update the patch once everyone is happy.
Zach
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2005-08-07 10:40 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <42F46558.9010202@vmware.com.suse.lists.linux.kernel>
2005-08-06 11:37 ` [PATCH] 8/8 Create MMU 2/3 level accessors in the sub-arch layer (i386) Andi Kleen
2005-08-06 11:56 ` Christoph Hellwig
2005-08-06 11:58 ` Andi Kleen
2005-08-06 12:01 ` Christoph Hellwig
2005-08-06 18:28 ` Zachary Amsden
2005-08-06 22:58 ` Andrew Morton
2005-08-06 23:02 ` Chris Wright
2005-08-07 10:39 ` Zachary Amsden
2005-08-06 7:23 Zachary Amsden
2005-08-07 1:16 ` Chris Wright
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).