All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] x86: allow 64-bit PV guest kernels to suppress user mode exposure of M2P
@ 2015-04-24 14:31 Jan Beulich
  2015-04-24 14:57 ` Andrew Cooper
  2015-04-30 11:18 ` Tim Deegan
  0 siblings, 2 replies; 7+ messages in thread
From: Jan Beulich @ 2015-04-24 14:31 UTC (permalink / raw)
  To: xen-devel
  Cc: Ian Campbell, Andrew Cooper, Keir Fraser, Ian Jackson, Tim Deegan

[-- Attachment #1: Type: text/plain, Size: 10397 bytes --]

Xen L4 entries being uniformly installed into any L4 table and 64-bit
PV kernels running in ring 3 means that user mode was able to see the
read-only M2P presented by Xen to the guests. While apparently not
really representing an exploitable information leak, this still very
certainly was never meant to be that way.

Building on the fact that these guests already have separate kernel and
user mode page tables we can allow guest kernels to tell Xen that they
don't want user mode to see this table. We can't, however, do this by
default: There is no ABI requirement that kernel and user mode page
tables be separate. Therefore introduce a new VM-assist flag allowing
the guest to control respective hypervisor behavior:
- when not set, L4 tables get created with the respective slot blank,
  and whenever the L4 table gets used as a kernel one the missing
  mapping gets inserted,
- when set, L4 tables get created with the respective slot initialized
  as before, and whenever the L4 table gets used as a user one the
  mapping gets zapped.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Shadow mode changes re-written (to address the migration regression
    in v2).

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -339,7 +339,7 @@ static int setup_compat_l4(struct vcpu *
 
     l4tab = __map_domain_page(pg);
     clear_page(l4tab);
-    init_guest_l4_table(l4tab, v->domain);
+    init_guest_l4_table(l4tab, v->domain, 1);
     unmap_domain_page(l4tab);
 
     v->arch.guest_table = pagetable_from_page(pg);
@@ -971,7 +971,11 @@ int arch_set_info_guest(
         case -EINTR:
             rc = -ERESTART;
         case -ERESTART:
+            break;
         case 0:
+            if ( !compat && !VM_ASSIST(d, m2p_strict) &&
+                 !paging_mode_refcounts(d) )
+                fill_ro_mpt(cr3_gfn);
             break;
         default:
             if ( cr3_page == current->arch.old_guest_table )
@@ -1006,7 +1010,10 @@ int arch_set_info_guest(
                 default:
                     if ( cr3_page == current->arch.old_guest_table )
                         cr3_page = NULL;
+                    break;
                 case 0:
+                    if ( VM_ASSIST(d, m2p_strict) )
+                        zap_ro_mpt(cr3_gfn);
                     break;
                 }
             }
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -1205,7 +1205,7 @@ int __init construct_dom0(
         l3start = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
     }
     clear_page(l4tab);
-    init_guest_l4_table(l4tab, d);
+    init_guest_l4_table(l4tab, d, 0);
     v->arch.guest_table = pagetable_from_paddr(__pa(l4start));
     if ( is_pv_32on64_domain(d) )
         v->arch.guest_table_user = v->arch.guest_table;
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1380,7 +1380,8 @@ static int alloc_l3_table(struct page_in
     return rc > 0 ? 0 : rc;
 }
 
-void init_guest_l4_table(l4_pgentry_t l4tab[], const struct domain *d)
+void init_guest_l4_table(l4_pgentry_t l4tab[], const struct domain *d,
+                         bool_t zap_ro_mpt)
 {
     /* Xen private mappings. */
     memcpy(&l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT],
@@ -1395,6 +1396,25 @@ void init_guest_l4_table(l4_pgentry_t l4
         l4e_from_pfn(domain_page_map_to_mfn(l4tab), __PAGE_HYPERVISOR);
     l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
         l4e_from_page(d->arch.perdomain_l3_pg, __PAGE_HYPERVISOR);
+    if ( zap_ro_mpt || is_pv_32on64_domain(d) || paging_mode_refcounts(d) )
+        l4tab[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty();
+}
+
+void fill_ro_mpt(unsigned long mfn)
+{
+    l4_pgentry_t *l4tab = map_domain_page(mfn);
+
+    l4tab[l4_table_offset(RO_MPT_VIRT_START)] =
+        idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)];
+    unmap_domain_page(l4tab);
+}
+
+void zap_ro_mpt(unsigned long mfn)
+{
+    l4_pgentry_t *l4tab = map_domain_page(mfn);
+
+    l4tab[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty();
+    unmap_domain_page(l4tab);
 }
 
 static int alloc_l4_table(struct page_info *page)
@@ -1444,7 +1464,7 @@ static int alloc_l4_table(struct page_in
         adjust_guest_l4e(pl4e[i], d);
     }
 
-    init_guest_l4_table(pl4e, d);
+    init_guest_l4_table(pl4e, d, !VM_ASSIST(d, m2p_strict));
     unmap_domain_page(pl4e);
 
     return rc > 0 ? 0 : rc;
@@ -2754,6 +2774,8 @@ int new_guest_cr3(unsigned long mfn)
 
     invalidate_shadow_ldt(curr, 0);
 
+    if ( !VM_ASSIST(d, m2p_strict) && !paging_mode_refcounts(d) )
+        fill_ro_mpt(mfn);
     curr->arch.guest_table = pagetable_from_pfn(mfn);
     update_cr3(curr);
 
@@ -3111,6 +3133,8 @@ long do_mmuext_op(
                                 op.arg1.mfn);
                     break;
                 }
+                if ( VM_ASSIST(d, m2p_strict) && !paging_mode_refcounts(d) )
+                    zap_ro_mpt(op.arg1.mfn);
             }
 
             curr->arch.guest_table_user = pagetable_from_pfn(op.arg1.mfn);
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -1435,6 +1435,14 @@ void sh_install_xen_entries_in_l4(struct
         shadow_l4e_from_mfn(page_to_mfn(d->arch.perdomain_l3_pg),
                             __PAGE_HYPERVISOR);
 
+    if ( !shadow_mode_refcounts(d) && !is_pv_32on64_domain(d) &&
+         !VM_ASSIST(d, m2p_strict) )
+    {
+        /* zap_ro_mpt(mfn_x(sl4mfn)); */
+        sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] = shadow_l4e_empty();
+        zap_ro_mpt(mfn_x(gl4mfn));
+    }
+
     /* Shadow linear mapping for 4-level shadows.  N.B. for 3-level
      * shadows on 64-bit xen, this linear mapping is later replaced by the
      * monitor pagetable structure, which is built in make_monitor_table
@@ -4071,6 +4079,22 @@ sh_update_cr3(struct vcpu *v, int do_loc
     if ( sh_remove_write_access(d, gmfn, 4, 0) != 0 )
         flush_tlb_mask(d->domain_dirty_cpumask);
     sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow);
+    if ( !shadow_mode_refcounts(d) && !is_pv_32on64_domain(d) )
+    {
+        mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table[0]);
+
+        if ( !(v->arch.flags & TF_kernel_mode) && VM_ASSIST(d, m2p_strict) )
+        {
+            zap_ro_mpt(mfn_x(smfn));
+            zap_ro_mpt(mfn_x(gmfn));
+        }
+        else if ( (v->arch.flags & TF_kernel_mode) &&
+                  !VM_ASSIST(d, m2p_strict) )
+        {
+            fill_ro_mpt(mfn_x(smfn));
+            fill_ro_mpt(mfn_x(gmfn));
+        }
+    }
 #else
 #error This should never happen
 #endif
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -480,7 +480,7 @@ static int setup_m2p_table(struct mem_ho
                 l2_ro_mpt += l2_table_offset(va);
             }
 
-            /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
+            /* NB. Cannot be GLOBAL: guest user mode should not see it. */
             l2e_write(l2_ro_mpt, l2e_from_pfn(mfn,
                    /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
         }
@@ -583,7 +583,7 @@ void __init paging_init(void)
                        0x77, 1UL << L3_PAGETABLE_SHIFT);
 
                 ASSERT(!l2_table_offset(va));
-                /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
+                /* NB. Cannot be GLOBAL: guest user mode should not see it. */
                 l3e_write(&l3_ro_mpt[l3_table_offset(va)],
                     l3e_from_page(l1_pg,
                         /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
@@ -621,7 +621,7 @@ void __init paging_init(void)
                       l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
             ASSERT(!l2_table_offset(va));
         }
-        /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
+        /* NB. Cannot be GLOBAL: guest user mode should not see it. */
         if ( l1_pg )
             l2e_write(l2_ro_mpt, l2e_from_page(
                 l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -346,7 +346,8 @@ extern unsigned long xen_phys_start;
 #define NATIVE_VM_ASSIST_VALID   ((1UL << VMASST_TYPE_4gb_segments)        | \
                                   (1UL << VMASST_TYPE_4gb_segments_notify) | \
                                   (1UL << VMASST_TYPE_writable_pagetables) | \
-                                  (1UL << VMASST_TYPE_pae_extended_cr3))
+                                  (1UL << VMASST_TYPE_pae_extended_cr3)    | \
+                                  (1UL << VMASST_TYPE_m2p_strict))
 #define VM_ASSIST_VALID          NATIVE_VM_ASSIST_VALID
 #define COMPAT_VM_ASSIST_VALID   (NATIVE_VM_ASSIST_VALID & \
                                   ((1UL << COMPAT_BITS_PER_LONG) - 1))
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -318,7 +318,10 @@ static inline void *__page_to_virt(const
 int free_page_type(struct page_info *page, unsigned long type,
                    int preemptible);
 
-void init_guest_l4_table(l4_pgentry_t[], const struct domain *);
+void init_guest_l4_table(l4_pgentry_t[], const struct domain *,
+                         bool_t zap_ro_mpt);
+void fill_ro_mpt(unsigned long mfn);
+void zap_ro_mpt(unsigned long mfn);
 
 int is_iomem_page(unsigned long mfn);
 
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -486,6 +486,18 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
 /* x86/PAE guests: support PDPTs above 4GB. */
 #define VMASST_TYPE_pae_extended_cr3     3
 
+/*
+ * x86/64 guests: strictly hide M2P from user mode.
+ * This allows the guest to control respective hypervisor behavior:
+ * - when not set, L4 tables get created with the respective slot blank,
+ *   and whenever the L4 table gets used as a kernel one the missing
+ *   mapping gets inserted,
+ * - when set, L4 tables get created with the respective slot initialized
+ *   as before, and whenever the L4 table gets used as a user one the
+ *   mapping gets zapped.
+ */
+#define VMASST_TYPE_m2p_strict           32
+
 #if __XEN_INTERFACE_VERSION__ < 0x00040600
 #define MAX_VMASST_TYPE                  3
 #endif



[-- Attachment #2: x86-m2p-strict.patch --]
[-- Type: text/plain, Size: 10469 bytes --]

x86: allow 64-bit PV guest kernels to suppress user mode exposure of M2P

Xen L4 entries being uniformly installed into any L4 table and 64-bit
PV kernels running in ring 3 means that user mode was able to see the
read-only M2P presented by Xen to the guests. While apparently not
really representing an exploitable information leak, this still very
certainly was never meant to be that way.

Building on the fact that these guests already have separate kernel and
user mode page tables we can allow guest kernels to tell Xen that they
don't want user mode to see this table. We can't, however, do this by
default: There is no ABI requirement that kernel and user mode page
tables be separate. Therefore introduce a new VM-assist flag allowing
the guest to control respective hypervisor behavior:
- when not set, L4 tables get created with the respective slot blank,
  and whenever the L4 table gets used as a kernel one the missing
  mapping gets inserted,
- when set, L4 tables get created with the respective slot initialized
  as before, and whenever the L4 table gets used as a user one the
  mapping gets zapped.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Shadow mode changes re-written (to address the migration regression
    in v2).

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -339,7 +339,7 @@ static int setup_compat_l4(struct vcpu *
 
     l4tab = __map_domain_page(pg);
     clear_page(l4tab);
-    init_guest_l4_table(l4tab, v->domain);
+    init_guest_l4_table(l4tab, v->domain, 1);
     unmap_domain_page(l4tab);
 
     v->arch.guest_table = pagetable_from_page(pg);
@@ -971,7 +971,11 @@ int arch_set_info_guest(
         case -EINTR:
             rc = -ERESTART;
         case -ERESTART:
+            break;
         case 0:
+            if ( !compat && !VM_ASSIST(d, m2p_strict) &&
+                 !paging_mode_refcounts(d) )
+                fill_ro_mpt(cr3_gfn);
             break;
         default:
             if ( cr3_page == current->arch.old_guest_table )
@@ -1006,7 +1010,10 @@ int arch_set_info_guest(
                 default:
                     if ( cr3_page == current->arch.old_guest_table )
                         cr3_page = NULL;
+                    break;
                 case 0:
+                    if ( VM_ASSIST(d, m2p_strict) )
+                        zap_ro_mpt(cr3_gfn);
                     break;
                 }
             }
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -1205,7 +1205,7 @@ int __init construct_dom0(
         l3start = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
     }
     clear_page(l4tab);
-    init_guest_l4_table(l4tab, d);
+    init_guest_l4_table(l4tab, d, 0);
     v->arch.guest_table = pagetable_from_paddr(__pa(l4start));
     if ( is_pv_32on64_domain(d) )
         v->arch.guest_table_user = v->arch.guest_table;
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1380,7 +1380,8 @@ static int alloc_l3_table(struct page_in
     return rc > 0 ? 0 : rc;
 }
 
-void init_guest_l4_table(l4_pgentry_t l4tab[], const struct domain *d)
+void init_guest_l4_table(l4_pgentry_t l4tab[], const struct domain *d,
+                         bool_t zap_ro_mpt)
 {
     /* Xen private mappings. */
     memcpy(&l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT],
@@ -1395,6 +1396,25 @@ void init_guest_l4_table(l4_pgentry_t l4
         l4e_from_pfn(domain_page_map_to_mfn(l4tab), __PAGE_HYPERVISOR);
     l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
         l4e_from_page(d->arch.perdomain_l3_pg, __PAGE_HYPERVISOR);
+    if ( zap_ro_mpt || is_pv_32on64_domain(d) || paging_mode_refcounts(d) )
+        l4tab[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty();
+}
+
+void fill_ro_mpt(unsigned long mfn)
+{
+    l4_pgentry_t *l4tab = map_domain_page(mfn);
+
+    l4tab[l4_table_offset(RO_MPT_VIRT_START)] =
+        idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)];
+    unmap_domain_page(l4tab);
+}
+
+void zap_ro_mpt(unsigned long mfn)
+{
+    l4_pgentry_t *l4tab = map_domain_page(mfn);
+
+    l4tab[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty();
+    unmap_domain_page(l4tab);
 }
 
 static int alloc_l4_table(struct page_info *page)
@@ -1444,7 +1464,7 @@ static int alloc_l4_table(struct page_in
         adjust_guest_l4e(pl4e[i], d);
     }
 
-    init_guest_l4_table(pl4e, d);
+    init_guest_l4_table(pl4e, d, !VM_ASSIST(d, m2p_strict));
     unmap_domain_page(pl4e);
 
     return rc > 0 ? 0 : rc;
@@ -2754,6 +2774,8 @@ int new_guest_cr3(unsigned long mfn)
 
     invalidate_shadow_ldt(curr, 0);
 
+    if ( !VM_ASSIST(d, m2p_strict) && !paging_mode_refcounts(d) )
+        fill_ro_mpt(mfn);
     curr->arch.guest_table = pagetable_from_pfn(mfn);
     update_cr3(curr);
 
@@ -3111,6 +3133,8 @@ long do_mmuext_op(
                                 op.arg1.mfn);
                     break;
                 }
+                if ( VM_ASSIST(d, m2p_strict) && !paging_mode_refcounts(d) )
+                    zap_ro_mpt(op.arg1.mfn);
             }
 
             curr->arch.guest_table_user = pagetable_from_pfn(op.arg1.mfn);
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -1435,6 +1435,14 @@ void sh_install_xen_entries_in_l4(struct
         shadow_l4e_from_mfn(page_to_mfn(d->arch.perdomain_l3_pg),
                             __PAGE_HYPERVISOR);
 
+    if ( !shadow_mode_refcounts(d) && !is_pv_32on64_domain(d) &&
+         !VM_ASSIST(d, m2p_strict) )
+    {
+        /* zap_ro_mpt(mfn_x(sl4mfn)); */
+        sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] = shadow_l4e_empty();
+        zap_ro_mpt(mfn_x(gl4mfn));
+    }
+
     /* Shadow linear mapping for 4-level shadows.  N.B. for 3-level
      * shadows on 64-bit xen, this linear mapping is later replaced by the
      * monitor pagetable structure, which is built in make_monitor_table
@@ -4071,6 +4079,22 @@ sh_update_cr3(struct vcpu *v, int do_loc
     if ( sh_remove_write_access(d, gmfn, 4, 0) != 0 )
         flush_tlb_mask(d->domain_dirty_cpumask);
     sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow);
+    if ( !shadow_mode_refcounts(d) && !is_pv_32on64_domain(d) )
+    {
+        mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table[0]);
+
+        if ( !(v->arch.flags & TF_kernel_mode) && VM_ASSIST(d, m2p_strict) )
+        {
+            zap_ro_mpt(mfn_x(smfn));
+            zap_ro_mpt(mfn_x(gmfn));
+        }
+        else if ( (v->arch.flags & TF_kernel_mode) &&
+                  !VM_ASSIST(d, m2p_strict) )
+        {
+            fill_ro_mpt(mfn_x(smfn));
+            fill_ro_mpt(mfn_x(gmfn));
+        }
+    }
 #else
 #error This should never happen
 #endif
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -480,7 +480,7 @@ static int setup_m2p_table(struct mem_ho
                 l2_ro_mpt += l2_table_offset(va);
             }
 
-            /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
+            /* NB. Cannot be GLOBAL: guest user mode should not see it. */
             l2e_write(l2_ro_mpt, l2e_from_pfn(mfn,
                    /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
         }
@@ -583,7 +583,7 @@ void __init paging_init(void)
                        0x77, 1UL << L3_PAGETABLE_SHIFT);
 
                 ASSERT(!l2_table_offset(va));
-                /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
+                /* NB. Cannot be GLOBAL: guest user mode should not see it. */
                 l3e_write(&l3_ro_mpt[l3_table_offset(va)],
                     l3e_from_page(l1_pg,
                         /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
@@ -621,7 +621,7 @@ void __init paging_init(void)
                       l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
             ASSERT(!l2_table_offset(va));
         }
-        /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
+        /* NB. Cannot be GLOBAL: guest user mode should not see it. */
         if ( l1_pg )
             l2e_write(l2_ro_mpt, l2e_from_page(
                 l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -346,7 +346,8 @@ extern unsigned long xen_phys_start;
 #define NATIVE_VM_ASSIST_VALID   ((1UL << VMASST_TYPE_4gb_segments)        | \
                                   (1UL << VMASST_TYPE_4gb_segments_notify) | \
                                   (1UL << VMASST_TYPE_writable_pagetables) | \
-                                  (1UL << VMASST_TYPE_pae_extended_cr3))
+                                  (1UL << VMASST_TYPE_pae_extended_cr3)    | \
+                                  (1UL << VMASST_TYPE_m2p_strict))
 #define VM_ASSIST_VALID          NATIVE_VM_ASSIST_VALID
 #define COMPAT_VM_ASSIST_VALID   (NATIVE_VM_ASSIST_VALID & \
                                   ((1UL << COMPAT_BITS_PER_LONG) - 1))
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -318,7 +318,10 @@ static inline void *__page_to_virt(const
 int free_page_type(struct page_info *page, unsigned long type,
                    int preemptible);
 
-void init_guest_l4_table(l4_pgentry_t[], const struct domain *);
+void init_guest_l4_table(l4_pgentry_t[], const struct domain *,
+                         bool_t zap_ro_mpt);
+void fill_ro_mpt(unsigned long mfn);
+void zap_ro_mpt(unsigned long mfn);
 
 int is_iomem_page(unsigned long mfn);
 
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -486,6 +486,18 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
 /* x86/PAE guests: support PDPTs above 4GB. */
 #define VMASST_TYPE_pae_extended_cr3     3
 
+/*
+ * x86/64 guests: strictly hide M2P from user mode.
+ * This allows the guest to control respective hypervisor behavior:
+ * - when not set, L4 tables get created with the respective slot blank,
+ *   and whenever the L4 table gets used as a kernel one the missing
+ *   mapping gets inserted,
+ * - when set, L4 tables get created with the respective slot initialized
+ *   as before, and whenever the L4 table gets used as a user one the
+ *   mapping gets zapped.
+ */
+#define VMASST_TYPE_m2p_strict           32
+
 #if __XEN_INTERFACE_VERSION__ < 0x00040600
 #define MAX_VMASST_TYPE                  3
 #endif

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] x86: allow 64-bit PV guest kernels to suppress user mode exposure of M2P
  2015-04-24 14:31 [PATCH v3] x86: allow 64-bit PV guest kernels to suppress user mode exposure of M2P Jan Beulich
@ 2015-04-24 14:57 ` Andrew Cooper
  2015-04-24 15:07   ` Jan Beulich
  2015-04-30 11:18 ` Tim Deegan
  1 sibling, 1 reply; 7+ messages in thread
From: Andrew Cooper @ 2015-04-24 14:57 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Ian Campbell, Keir Fraser, Ian Jackson, Tim Deegan

On 24/04/15 15:31, Jan Beulich wrote:
> Xen L4 entries being uniformly installed into any L4 table and 64-bit
> PV kernels running in ring 3 means that user mode was able to see the
> read-only M2P presented by Xen to the guests. While apparently not
> really representing an exploitable information leak, this still very
> certainly was never meant to be that way.
>
> Building on the fact that these guests already have separate kernel and
> user mode page tables we can allow guest kernels to tell Xen that they
> don't want user mode to see this table. We can't, however, do this by
> default: There is no ABI requirement that kernel and user mode page
> tables be separate. Therefore introduce a new VM-assist flag allowing
> the guest to control respective hypervisor behavior:
> - when not set, L4 tables get created with the respective slot blank,
>   and whenever the L4 table gets used as a kernel one the missing
>   mapping gets inserted,
> - when set, L4 tables get created with the respective slot initialized
>   as before, and whenever the L4 table gets used as a user one the
>   mapping gets zapped.

Is this complete?

For backwards compatibility, older kernels will not have m2p_strict set,
and the m2p should unconditionally appear in all L4s.

If m2p_strict is set then the mapping should be zapped for user L4s.

~Andrew

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] x86: allow 64-bit PV guest kernels to suppress user mode exposure of M2P
  2015-04-24 14:57 ` Andrew Cooper
@ 2015-04-24 15:07   ` Jan Beulich
  2015-04-24 15:54     ` Andrew Cooper
  0 siblings, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2015-04-24 15:07 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Ian Campbell, xen-devel, Keir Fraser, Ian Jackson, Tim Deegan

>>> On 24.04.15 at 16:57, <andrew.cooper3@citrix.com> wrote:
> On 24/04/15 15:31, Jan Beulich wrote:
>> Xen L4 entries being uniformly installed into any L4 table and 64-bit
>> PV kernels running in ring 3 means that user mode was able to see the
>> read-only M2P presented by Xen to the guests. While apparently not
>> really representing an exploitable information leak, this still very
>> certainly was never meant to be that way.
>>
>> Building on the fact that these guests already have separate kernel and
>> user mode page tables we can allow guest kernels to tell Xen that they
>> don't want user mode to see this table. We can't, however, do this by
>> default: There is no ABI requirement that kernel and user mode page
>> tables be separate. Therefore introduce a new VM-assist flag allowing
>> the guest to control respective hypervisor behavior:
>> - when not set, L4 tables get created with the respective slot blank,
>>   and whenever the L4 table gets used as a kernel one the missing
>>   mapping gets inserted,
>> - when set, L4 tables get created with the respective slot initialized
>>   as before, and whenever the L4 table gets used as a user one the
>>   mapping gets zapped.
> 
> Is this complete?

Yes.

> For backwards compatibility, older kernels will not have m2p_strict set,
> and the m2p should unconditionally appear in all L4s.

No. L4s _only_ used for user mode have no need for this entry to
be non-zero. The difference between strict and relaxed mode is as
described - in strict mode, L4s default to have the entry populated
and tables used for user mode get it stripped, while in relaxed mode
L4s default to the empty and get the entry populated when used
for kernel mode. This guarantees that in non-strict mode all kernel
page tables have the entry filled, while in strict mode it guarantees
that all user page tables have it cleared.

Jan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] x86: allow 64-bit PV guest kernels to suppress user mode exposure of M2P
  2015-04-24 15:07   ` Jan Beulich
@ 2015-04-24 15:54     ` Andrew Cooper
  2015-04-24 16:00       ` Jan Beulich
  0 siblings, 1 reply; 7+ messages in thread
From: Andrew Cooper @ 2015-04-24 15:54 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Ian Campbell, xen-devel, Keir Fraser, Ian Jackson, Tim Deegan

On 24/04/15 16:07, Jan Beulich wrote:
>>>> On 24.04.15 at 16:57, <andrew.cooper3@citrix.com> wrote:
>> On 24/04/15 15:31, Jan Beulich wrote:
>>> Xen L4 entries being uniformly installed into any L4 table and 64-bit
>>> PV kernels running in ring 3 means that user mode was able to see the
>>> read-only M2P presented by Xen to the guests. While apparently not
>>> really representing an exploitable information leak, this still very
>>> certainly was never meant to be that way.
>>>
>>> Building on the fact that these guests already have separate kernel and
>>> user mode page tables we can allow guest kernels to tell Xen that they
>>> don't want user mode to see this table. We can't, however, do this by
>>> default: There is no ABI requirement that kernel and user mode page
>>> tables be separate. Therefore introduce a new VM-assist flag allowing
>>> the guest to control respective hypervisor behavior:
>>> - when not set, L4 tables get created with the respective slot blank,
>>>   and whenever the L4 table gets used as a kernel one the missing
>>>   mapping gets inserted,
>>> - when set, L4 tables get created with the respective slot initialized
>>>   as before, and whenever the L4 table gets used as a user one the
>>>   mapping gets zapped.
>> Is this complete?
> Yes.
>
>> For backwards compatibility, older kernels will not have m2p_strict set,
>> and the m2p should unconditionally appear in all L4s.
> No. L4s _only_ used for user mode have no need for this entry to
> be non-zero.

There is only ever a single L4 in a particular virtual address space. 
The M2P is part of the Xen ABI range.

Previously, the M2P was present in all L4s which is why they leaked into
user context.

If we don not wish to break backwards compatibility with this change,
then in relaxed mode the M2P must remain in all tables, or Userspace
which is actually making use of mappings will suddenly start faulting
because of a change in Xen, not a kernel change (and an unknowing kernel
might not be prepared to handle this case).

By your description, in the relaxed case a newly created L4 which is
first used as user table will have the mapping clear.

Or am I missing something?

~Andrew

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] x86: allow 64-bit PV guest kernels to suppress user mode exposure of M2P
  2015-04-24 15:54     ` Andrew Cooper
@ 2015-04-24 16:00       ` Jan Beulich
  0 siblings, 0 replies; 7+ messages in thread
From: Jan Beulich @ 2015-04-24 16:00 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Ian Campbell, xen-devel, Keir Fraser, Ian Jackson, Tim Deegan

>>> On 24.04.15 at 17:54, <andrew.cooper3@citrix.com> wrote:
> On 24/04/15 16:07, Jan Beulich wrote:
>>>>> On 24.04.15 at 16:57, <andrew.cooper3@citrix.com> wrote:
>>> On 24/04/15 15:31, Jan Beulich wrote:
>>>> Xen L4 entries being uniformly installed into any L4 table and 64-bit
>>>> PV kernels running in ring 3 means that user mode was able to see the
>>>> read-only M2P presented by Xen to the guests. While apparently not
>>>> really representing an exploitable information leak, this still very
>>>> certainly was never meant to be that way.
>>>>
>>>> Building on the fact that these guests already have separate kernel and
>>>> user mode page tables we can allow guest kernels to tell Xen that they
>>>> don't want user mode to see this table. We can't, however, do this by
>>>> default: There is no ABI requirement that kernel and user mode page
>>>> tables be separate. Therefore introduce a new VM-assist flag allowing
>>>> the guest to control respective hypervisor behavior:
>>>> - when not set, L4 tables get created with the respective slot blank,
>>>>   and whenever the L4 table gets used as a kernel one the missing
>>>>   mapping gets inserted,
>>>> - when set, L4 tables get created with the respective slot initialized
>>>>   as before, and whenever the L4 table gets used as a user one the
>>>>   mapping gets zapped.
>>> Is this complete?
>> Yes.
>>
>>> For backwards compatibility, older kernels will not have m2p_strict set,
>>> and the m2p should unconditionally appear in all L4s.
>> No. L4s _only_ used for user mode have no need for this entry to
>> be non-zero.
> 
> There is only ever a single L4 in a particular virtual address space. 
> The M2P is part of the Xen ABI range.
> 
> Previously, the M2P was present in all L4s which is why they leaked into
> user context.
> 
> If we don not wish to break backwards compatibility with this change,
> then in relaxed mode the M2P must remain in all tables, or Userspace
> which is actually making use of mappings will suddenly start faulting
> because of a change in Xen, not a kernel change (and an unknowing kernel
> might not be prepared to handle this case).

Userspace was never supposed to access this table, and hence
revoking access to the table cannot be a problem there.

> By your description, in the relaxed case a newly created L4 which is
> first used as user table will have the mapping clear.

The ordering doesn't matter. In relaxed mode, the first time a
table gets used for kernel purposes the entry will be inserted
(and hence potentially become visible to user mode).

But anyway I'm confused that you're starting this discussion
now, when the patch had already gone in and only needed to
be reverted because of breaking migration.

Jan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] x86: allow 64-bit PV guest kernels to suppress user mode exposure of M2P
  2015-04-24 14:31 [PATCH v3] x86: allow 64-bit PV guest kernels to suppress user mode exposure of M2P Jan Beulich
  2015-04-24 14:57 ` Andrew Cooper
@ 2015-04-30 11:18 ` Tim Deegan
  2015-05-04 10:33   ` Jan Beulich
  1 sibling, 1 reply; 7+ messages in thread
From: Tim Deegan @ 2015-04-30 11:18 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Ian Campbell, xen-devel, Keir Fraser, Ian Jackson, Andrew Cooper

Hi,

At 15:31 +0100 on 24 Apr (1429889471), Jan Beulich wrote:
> --- a/xen/arch/x86/mm/shadow/multi.c
> +++ b/xen/arch/x86/mm/shadow/multi.c
> @@ -1435,6 +1435,14 @@ void sh_install_xen_entries_in_l4(struct
>          shadow_l4e_from_mfn(page_to_mfn(d->arch.perdomain_l3_pg),
>                              __PAGE_HYPERVISOR);
>  
> +    if ( !shadow_mode_refcounts(d) && !is_pv_32on64_domain(d) &&

I think the right check here is !shadow_mode_external(d), i.e. that
this l4e is a mapping of the M2P and not some guest-controlled mapping.

> +         !VM_ASSIST(d, m2p_strict) )
> +    {
> +        /* zap_ro_mpt(mfn_x(sl4mfn)); */
> +        sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] = shadow_l4e_empty();
> +        zap_ro_mpt(mfn_x(gl4mfn));

Here and below -- shouldn't the existing PV paths be taking care of
zapping/filling the guest pagetable before we get here?  It doesn't
seem right for shadow pagetable code to be making this kind of change
- especially in a mapping that's not actually "shadowed" per se.

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] x86: allow 64-bit PV guest kernels to suppress user mode exposure of M2P
  2015-04-30 11:18 ` Tim Deegan
@ 2015-05-04 10:33   ` Jan Beulich
  0 siblings, 0 replies; 7+ messages in thread
From: Jan Beulich @ 2015-05-04 10:33 UTC (permalink / raw)
  To: Tim Deegan
  Cc: Ian Campbell, Andrew Cooper, Keir Fraser, Ian Jackson, xen-devel

>>> On 30.04.15 at 13:18, <tim@xen.org> wrote:
> At 15:31 +0100 on 24 Apr (1429889471), Jan Beulich wrote:
>> --- a/xen/arch/x86/mm/shadow/multi.c
>> +++ b/xen/arch/x86/mm/shadow/multi.c
>> @@ -1435,6 +1435,14 @@ void sh_install_xen_entries_in_l4(struct
>>          shadow_l4e_from_mfn(page_to_mfn(d->arch.perdomain_l3_pg),
>>                              __PAGE_HYPERVISOR);
>>  
>> +    if ( !shadow_mode_refcounts(d) && !is_pv_32on64_domain(d) &&
> 
> I think the right check here is !shadow_mode_external(d), i.e. that
> this l4e is a mapping of the M2P and not some guest-controlled mapping.

Done. As before I'm most of the time uncertain which one to use
and hence simply matched it with the paging_mode_refcounts() use
elsewhere in the patch (which you approved already).

>> +         !VM_ASSIST(d, m2p_strict) )
>> +    {
>> +        /* zap_ro_mpt(mfn_x(sl4mfn)); */
>> +        sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] = shadow_l4e_empty();
>> +        zap_ro_mpt(mfn_x(gl4mfn));
> 
> Here and below -- shouldn't the existing PV paths be taking care of
> zapping/filling the guest pagetable before we get here?  It doesn't
> seem right for shadow pagetable code to be making this kind of change
> - especially in a mapping that's not actually "shadowed" per se.

Right, and I actually wasn't sure - I added them just to be on the
safe side and had meant to try without, but then forgot. Will do so
now.

Jan

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-05-04 10:33 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-04-24 14:31 [PATCH v3] x86: allow 64-bit PV guest kernels to suppress user mode exposure of M2P Jan Beulich
2015-04-24 14:57 ` Andrew Cooper
2015-04-24 15:07   ` Jan Beulich
2015-04-24 15:54     ` Andrew Cooper
2015-04-24 16:00       ` Jan Beulich
2015-04-30 11:18 ` Tim Deegan
2015-05-04 10:33   ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.