Re: [PATCH] support 1gb pages in guest page table walker

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Christoph Egger <Christoph.Egger@amd.com>
To: Tim Deegan <Tim.Deegan@citrix.com>
Cc: Jan,
	"xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com>,
	Beulich <JBeulich@novell.com>
Subject: Re: [PATCH] support 1gb pages in guest page table	 walker
Date: Tue, 26 Jul 2011 14:11:41 +0200	[thread overview]
Message-ID: <4E2EAEFD.2090405@amd.com> (raw)
In-Reply-To: <20110725103028.GB8970@whitby.uk.xensource.com>

[-- Attachment #1: Type: text/plain, Size: 1084 bytes --]

On 07/25/11 12:30, Tim Deegan wrote:
> At 11:25 +0100 on 25 Jul (1311593146), Tim Deegan wrote:
>> Also this function should be called from the CPUID trap handler to make
>> sure we never advertise PSE1GB when we're not going to support it.
>
> Er, not this function exactly, since CPUID should report the feature
> even when the guest's not in long_mode.  I think it needs a
> hvm_pse1G_supported that can be called from CPUID, and then
> guest_supports_1G_superpages() boils down to
> "(GUEST_PAGING_LEVEL>= 4)&&  hvm_pse1G_supported(v)"

New version attached. I removed the fake l1e calculation.
Something is still wrong: The l2 guest kernel crashes with
an unexpected null pointer derefencing but it boots w/o
this patch.

Is it a problem when l1 guest uses 1gb pages where the host
uses 4k or 2m pages?

Christoph


-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85689 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

[-- Attachment #2: xen_pagewalk.diff --]
[-- Type: text/plain, Size: 6484 bytes --]

# HG changeset patch
# User cegger
# Date 1311681725 -7200
support 1gb pages for guests

Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>

diff -r 4f2c59fb28e6 -r 6d15152fb59a xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2389,6 +2389,7 @@ void hvm_cpuid(unsigned int input, unsig
                                    unsigned int *ecx, unsigned int *edx)
 {
     struct vcpu *v = current;
+    struct domain *d = v->domain;
     unsigned int count = *ecx;
 
     if ( cpuid_viridian_leaves(input, eax, ebx, ecx, edx) )
@@ -2397,7 +2398,7 @@ void hvm_cpuid(unsigned int input, unsig
     if ( cpuid_hypervisor_leaves(input, count, eax, ebx, ecx, edx) )
         return;
 
-    domain_cpuid(v->domain, input, *ecx, eax, ebx, ecx, edx);
+    domain_cpuid(d, input, *ecx, eax, ebx, ecx, edx);
 
     switch ( input )
     {
@@ -2433,7 +2434,7 @@ void hvm_cpuid(unsigned int input, unsig
             {
                 if ( !(v->arch.xcr0 & (1ULL << sub_leaf)) )
                     continue;
-                domain_cpuid(v->domain, input, sub_leaf, &_eax, &_ebx, &_ecx, 
+                domain_cpuid(d, input, sub_leaf, &_eax, &_ebx, &_ecx, 
                              &_edx);
                 if ( (_eax + _ebx) > *ebx )
                     *ebx = _eax + _ebx;
@@ -2444,9 +2445,13 @@ void hvm_cpuid(unsigned int input, unsig
     case 0x80000001:
         /* We expose RDTSCP feature to guest only when
            tsc_mode == TSC_MODE_DEFAULT and host_tsc_is_safe() returns 1 */
-        if ( v->domain->arch.tsc_mode != TSC_MODE_DEFAULT ||
+        if ( d->arch.tsc_mode != TSC_MODE_DEFAULT ||
              !host_tsc_is_safe() )
             *edx &= ~cpufeat_mask(X86_FEATURE_RDTSCP);
+        /* Expose 1gb page feature for HVM HAP guests and hw support is
+         * available. */
+        if (hvm_pse1gb_supported(d))
+            *edx |= cpufeat_mask(X86_FEATURE_PAGE1GB);
         break;
     }
 }
diff -r 4f2c59fb28e6 -r 6d15152fb59a xen/arch/x86/mm/guest_walk.c
--- a/xen/arch/x86/mm/guest_walk.c
+++ b/xen/arch/x86/mm/guest_walk.c
@@ -134,7 +134,8 @@ guest_walk_tables(struct vcpu *v, struct
     guest_l4e_t *l4p;
 #endif
     uint32_t gflags, mflags, iflags, rc = 0;
-    int pse, smep;
+    int smep;
+    bool_t pse1G = 0, pse2M = 0;
 
     perfc_incr(guest_walk);
     memset(gw, 0, sizeof(*gw));
@@ -182,6 +183,27 @@ guest_walk_tables(struct vcpu *v, struct
     if ( rc & _PAGE_PRESENT )
         goto out;
 
+    pse1G = (guest_supports_1G_superpages(v) && 
+           (guest_l3e_get_flags(gw->l3e) & _PAGE_PSE)); 
+
+    if ( pse1G )
+    {
+        /* Shadow paging doesn't support 1gb pages so a fake
+         * shadow l1 table entry is not needed. */
+        gfn_t start = guest_l3e_get_gfn(gw->l3e);
+
+#define GUEST_L3_GFN_ALIGN (1 << (GUEST_L3_PAGETABLE_SHIFT - \
+                                  GUEST_L2_PAGETABLE_SHIFT))
+        if ( gfn_x(start) & (GUEST_L3_GFN_ALIGN - 1) & ~0x1 )
+        {
+            rc |= _PAGE_INVALID_BITS;
+        }
+#undef GUEST_L3_GFN_ALIGN
+
+        gw->l2mfn = gw->l1mfn = _mfn(INVALID_MFN);
+        goto set_ad;
+    }
+
 #else /* PAE only... */
 
     /* Get the l3e and check its flag */
@@ -219,10 +241,10 @@ guest_walk_tables(struct vcpu *v, struct
     if ( rc & _PAGE_PRESENT )
         goto out;
 
-    pse = (guest_supports_superpages(v) && 
+    pse2M = (guest_supports_superpages(v) && 
            (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)); 
 
-    if ( pse )
+    if ( pse2M )
     {
         /* Special case: this guest VA is in a PSE superpage, so there's
          * no guest l1e.  We make one up so that the propagation code
@@ -282,6 +304,9 @@ guest_walk_tables(struct vcpu *v, struct
         rc |= ((gflags & mflags) ^ mflags);
     }
 
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+set_ad:
+#endif
     /* Now re-invert the user-mode requirement for SMEP. */
     if ( smep ) 
         rc ^= _PAGE_USER;
@@ -295,17 +320,21 @@ guest_walk_tables(struct vcpu *v, struct
 #if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */
         if ( set_ad_bits(l4p + guest_l4_table_offset(va), &gw->l4e, 0) )
             paging_mark_dirty(d, mfn_x(gw->l4mfn));
-        if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e, 0) )
+        if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e,
+                         (pse1G && (pfec & PFEC_write_access))) )
             paging_mark_dirty(d, mfn_x(gw->l3mfn));
 #endif
-        if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e,
-                         (pse && (pfec & PFEC_write_access))) )
-            paging_mark_dirty(d, mfn_x(gw->l2mfn));            
-        if ( !pse ) 
+        if ( !pse1G ) 
         {
-            if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e, 
-                             (pfec & PFEC_write_access)) )
-                paging_mark_dirty(d, mfn_x(gw->l1mfn));
+            if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e,
+                             (pse2M && (pfec & PFEC_write_access))) )
+                paging_mark_dirty(d, mfn_x(gw->l2mfn));            
+            if ( !pse2M ) 
+            {
+                if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e, 
+                                 (pfec & PFEC_write_access)) )
+                    paging_mark_dirty(d, mfn_x(gw->l1mfn));
+            }
         }
     }
 
diff -r 4f2c59fb28e6 -r 6d15152fb59a xen/include/asm-x86/guest_pt.h
--- a/xen/include/asm-x86/guest_pt.h
+++ b/xen/include/asm-x86/guest_pt.h
@@ -194,6 +194,17 @@ guest_supports_superpages(struct vcpu *v
 }
 
 static inline int
+guest_supports_1G_superpages(struct vcpu *v)
+{
+    if (!guest_supports_superpages(v))
+        return 0;
+
+    return (GUEST_PAGING_LEVELS >= 4
+           && hvm_pse1gb_supported(v->domain)
+           && hvm_long_mode_enabled(v));
+}
+
+static inline int
 guest_supports_nx(struct vcpu *v)
 {
     if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx )
diff -r 4f2c59fb28e6 -r 6d15152fb59a xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -224,6 +224,9 @@ int hvm_girq_dest_2_vcpu_id(struct domai
 #define hvm_hap_has_2mb(d) \
     (hvm_funcs.hap_capabilities & HVM_HAP_SUPERPAGE_2MB)
 
+#define hvm_pse1gb_supported(d) \
+    (cpu_has_page1gb && hvm_hap_has_1gb(d) && paging_mode_hap(d))
+
 #ifdef __x86_64__
 #define hvm_long_mode_enabled(v) \
     ((v)->arch.hvm_vcpu.guest_efer & EFER_LMA)

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

next prev parent reply	other threads:[~2011-07-26 12:11 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-22 14:51 [PATCH] support 1gb pages in guest page table walker Christoph Egger
2011-07-22 15:43 ` Jan Beulich
2011-07-25  9:19   ` Christoph Egger
2011-07-25  9:31     ` Jan Beulich
2011-07-25  9:46       ` Christoph Egger
2011-07-25 10:25         ` Tim Deegan
2011-07-25 10:30           ` Tim Deegan
2011-07-26 12:11             ` Christoph Egger [this message]
2011-07-26 13:19               ` Tim Deegan
2011-07-27 13:15                 ` Christoph Egger
2011-07-28 12:58                   ` Tim Deegan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4E2EAEFD.2090405@amd.com \
    --to=christoph.egger@amd.com \
    --cc=JBeulich@novell.com \
    --cc=Tim.Deegan@citrix.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.