All of lore.kernel.org
 help / color / mirror / Atom feed
* [V11 PATCH 0/4] pvh dom0 patches...
@ 2014-05-02  1:55 Mukesh Rathor
  2014-05-02  1:55 ` [V11 PATCH 1/4] pvh dom0: construct_dom0 changes Mukesh Rathor
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Mukesh Rathor @ 2014-05-02  1:55 UTC (permalink / raw)
  To: xen-devel; +Cc: George.Dunlap, keir.xen, tim, JBeulich

Hi,

Inching even closer to the finish line, please find v11 of the pvh dom0
patches based on c/s: 9f2f129

git tree: git://oss.oracle.com/git/mrathor/xen.git  branch: dom0pvh-v11

Changed in v11 from v10:
  patch 1: none

  patch 2: none

  patch 3: 
      - add pvh fixmes in p2m.c
      - add ASSERT in atomic_write_ept_entry

  patch 4: none

thanks,
Mukesh

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [V11 PATCH 1/4] pvh dom0: construct_dom0 changes
  2014-05-02  1:55 [V11 PATCH 0/4] pvh dom0 patches Mukesh Rathor
@ 2014-05-02  1:55 ` Mukesh Rathor
  2014-05-08 12:05   ` Tim Deegan
  2014-05-02  1:55 ` [V11 PATCH 2/4] pvh dom0: Add checks and restrictions for p2m_is_foreign Mukesh Rathor
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 12+ messages in thread
From: Mukesh Rathor @ 2014-05-02  1:55 UTC (permalink / raw)
  To: xen-devel; +Cc: George.Dunlap, keir.xen, tim, JBeulich

This patch changes construct_dom0() to boot in pvh mode:
  - Make sure dom0 elf supports pvh mode.
  - Call guest_physmap_add_page for pvh rather than simple p2m setting
  - Map all non-RAM regions 1:1 upto the end region in e820 or 4GB which
    ever is higher.
  - Allocate p2m, copying calculation from toolstack.
  - Allocate shared info page from the virtual space so that dom0 PT
    can be updated. Then update p2m for it with the actual mfn.
  - Since we build the page tables for pvh same as for pv, in
    pvh_fixup_page_tables_for_hap we replace the mfns with pfns.

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
 xen/arch/x86/domain_build.c | 249 +++++++++++++++++++++++++++++++++++++++++---
 xen/arch/x86/mm/hap/hap.c   |  12 +++
 xen/include/asm-x86/hap.h   |   1 +
 3 files changed, 245 insertions(+), 17 deletions(-)

diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index 1eccead..38ed9f6 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -35,6 +35,7 @@
 #include <asm/setup.h>
 #include <asm/bzimage.h> /* for bzimage_parse */
 #include <asm/io_apic.h>
+#include <asm/hap.h>
 
 #include <public/version.h>
 
@@ -307,6 +308,158 @@ static void __init process_dom0_ioports_disable(void)
     }
 }
 
+static __init void pvh_add_mem_mapping(struct domain *d, unsigned long gfn,
+                                       unsigned long mfn, unsigned long nr_mfns)
+{
+    unsigned long i;
+    int rc;
+
+    for ( i = 0; i < nr_mfns; i++ )
+        if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i))) )
+            panic("pvh_add_mem_mapping: gfn:%lx mfn:%lx i:%ld rc:%d\n",
+                  gfn, mfn, i, rc);
+}
+
+/*
+ * Set the 1:1 map for all non-RAM regions for dom 0. Thus, dom0 will have
+ * the entire io region mapped in the EPT/NPT.
+ *
+ * pvh fixme: The following doesn't map MMIO ranges when they sit above the
+ *            highest E820 covered address.
+ */
+static __init void pvh_map_all_iomem(struct domain *d)
+{
+    unsigned long start_pfn, end_pfn, end = 0, start = 0;
+    const struct e820entry *entry;
+    unsigned int i, nump;
+
+    for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ )
+    {
+        end = entry->addr + entry->size;
+
+        if ( entry->type == E820_RAM || entry->type == E820_UNUSABLE ||
+             i == e820.nr_map - 1 )
+        {
+            start_pfn = PFN_DOWN(start);
+
+            /* Unused RAM areas are marked UNUSABLE, so skip them too */
+            if ( entry->type == E820_RAM || entry->type == E820_UNUSABLE )
+                end_pfn = PFN_UP(entry->addr);
+            else
+                end_pfn = PFN_UP(end);
+
+            if ( start_pfn < end_pfn )
+            {
+                nump = end_pfn - start_pfn;
+                /* Add pages to the mapping */
+                pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
+            }
+            start = end;
+        }
+    }
+
+    /*
+     * Some BIOSes may not report io space above ram that is less than 4GB. So
+     * we map any non-ram upto 4GB.
+     */
+    if ( end < GB(4) )
+    {
+        start_pfn = PFN_UP(end);
+        end_pfn = (GB(4)) >> PAGE_SHIFT;
+        nump = end_pfn - start_pfn;
+        pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
+    }
+}
+
+static __init void dom0_update_physmap(struct domain *d, unsigned long pfn,
+                                   unsigned long mfn, unsigned long vphysmap_s)
+{
+    if ( is_pvh_domain(d) )
+    {
+        int rc = guest_physmap_add_page(d, pfn, mfn, 0);
+        BUG_ON(rc);
+        return;
+    }
+    if ( !is_pv_32on64_domain(d) )
+        ((unsigned long *)vphysmap_s)[pfn] = mfn;
+    else
+        ((unsigned int *)vphysmap_s)[pfn] = mfn;
+
+    set_gpfn_from_mfn(mfn, pfn);
+}
+
+/* Replace mfns with pfns in dom0 page tables */
+static __init void pvh_fixup_page_tables_for_hap(struct vcpu *v,
+                                                 unsigned long v_start,
+                                                 unsigned long v_end)
+{
+    int i, j, k;
+    l4_pgentry_t *pl4e, *l4start;
+    l3_pgentry_t *pl3e;
+    l2_pgentry_t *pl2e;
+    l1_pgentry_t *pl1e;
+    unsigned long cr3_pfn;
+
+    ASSERT(paging_mode_enabled(v->domain));
+
+    l4start = map_domain_page(pagetable_get_pfn(v->arch.guest_table));
+
+    /* Clear entries prior to guest L4 start */
+    pl4e = l4start + l4_table_offset(v_start);
+    memset(l4start, 0, (unsigned long)pl4e - (unsigned long)l4start);
+
+    for ( ; pl4e <= l4start + l4_table_offset(v_end - 1); pl4e++ )
+    {
+        pl3e = map_l3t_from_l4e(*pl4e);
+        for ( i = 0; i < PAGE_SIZE / sizeof(*pl3e); i++, pl3e++ )
+        {
+            if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
+                continue;
+
+            pl2e = map_l2t_from_l3e(*pl3e);
+            for ( j = 0; j < PAGE_SIZE / sizeof(*pl2e); j++, pl2e++ )
+            {
+                if ( !(l2e_get_flags(*pl2e)  & _PAGE_PRESENT) )
+                    continue;
+
+                pl1e = map_l1t_from_l2e(*pl2e);
+                for ( k = 0; k < PAGE_SIZE / sizeof(*pl1e); k++, pl1e++ )
+                {
+                    if ( !(l1e_get_flags(*pl1e) & _PAGE_PRESENT) )
+                        continue;
+
+                    *pl1e = l1e_from_pfn(get_gpfn_from_mfn(l1e_get_pfn(*pl1e)),
+                                         l1e_get_flags(*pl1e));
+                }
+                unmap_domain_page(pl1e);
+                *pl2e = l2e_from_pfn(get_gpfn_from_mfn(l2e_get_pfn(*pl2e)),
+                                     l2e_get_flags(*pl2e));
+            }
+            unmap_domain_page(pl2e);
+            *pl3e = l3e_from_pfn(get_gpfn_from_mfn(l3e_get_pfn(*pl3e)),
+                                 l3e_get_flags(*pl3e));
+        }
+        unmap_domain_page(pl3e);
+        *pl4e = l4e_from_pfn(get_gpfn_from_mfn(l4e_get_pfn(*pl4e)),
+                             l4e_get_flags(*pl4e));
+    }
+
+    /* Clear entries post guest L4. */
+    if ( (unsigned long)pl4e & (PAGE_SIZE - 1) )
+        memset(pl4e, 0, PAGE_SIZE - ((unsigned long)pl4e & (PAGE_SIZE - 1)));
+
+    unmap_domain_page(l4start);
+
+    cr3_pfn = get_gpfn_from_mfn(paddr_to_pfn(v->arch.cr3));
+    v->arch.hvm_vcpu.guest_cr[3] = pfn_to_paddr(cr3_pfn);
+
+    /*
+     * Finally, we update the paging modes (hap_update_paging_modes). This will
+     * create monitor_table for us, update v->arch.cr3, and update vmcs.cr3.
+     */
+    paging_update_paging_modes(v);
+}
+
 static __init void mark_pv_pt_pages_rdonly(struct domain *d,
                                            l4_pgentry_t *l4start,
                                            unsigned long vpt_start,
@@ -516,6 +669,8 @@ int __init construct_dom0(
     l3_pgentry_t *l3tab = NULL, *l3start = NULL;
     l2_pgentry_t *l2tab = NULL, *l2start = NULL;
     l1_pgentry_t *l1tab = NULL, *l1start = NULL;
+    paddr_t shared_info_paddr = 0;
+    u32 save_pvh_pg_mode = 0;
 
     /*
      * This fully describes the memory layout of the initial domain. All 
@@ -593,12 +748,21 @@ int __init construct_dom0(
         goto out;
     }
 
-    if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE &&
-         !test_bit(XENFEAT_dom0, parms.f_supported) )
+    if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE )
     {
-        printk("Kernel does not support Dom0 operation\n");
-        rc = -EINVAL;
-        goto out;
+        if ( !test_bit(XENFEAT_dom0, parms.f_supported) )
+        {
+            printk("Kernel does not support Dom0 operation\n");
+            rc = -EINVAL;
+            goto out;
+        }
+        if ( is_pvh_domain(d) &&
+             !test_bit(XENFEAT_hvm_callback_vector, parms.f_supported) )
+        {
+            printk("Kernel does not support PVH mode\n");
+            rc = -EINVAL;
+            goto out;
+        }
     }
 
     if ( compat32 )
@@ -663,6 +827,13 @@ int __init construct_dom0(
     vstartinfo_end   = (vstartinfo_start +
                         sizeof(struct start_info) +
                         sizeof(struct dom0_vga_console_info));
+
+    if ( is_pvh_domain(d) )
+    {
+        shared_info_paddr = round_pgup(vstartinfo_end) - v_start;
+        vstartinfo_end   += PAGE_SIZE;
+    }
+
     vpt_start        = round_pgup(vstartinfo_end);
     for ( nr_pt_pages = 2; ; nr_pt_pages++ )
     {
@@ -903,6 +1074,13 @@ int __init construct_dom0(
         (void)alloc_vcpu(d, i, cpu);
     }
 
+    /*
+     * pvh: we temporarily disable d->arch.paging.mode so that we can build cr3
+     * needed to run on dom0's page tables.
+     */
+    save_pvh_pg_mode = d->arch.paging.mode;
+    d->arch.paging.mode = 0;
+
     /* Set up CR3 value for write_ptbase */
     if ( paging_mode_enabled(d) )
         paging_update_paging_modes(v);
@@ -969,6 +1147,22 @@ int __init construct_dom0(
                          nr_pages);
     }
 
+    if ( is_pvh_domain(d) )
+    {
+        unsigned long hap_pages, memkb = nr_pages * (PAGE_SIZE / 1024);
+
+        /* Copied from: libxl_get_required_shadow_memory() */
+        memkb = 4 * (256 * d->max_vcpus + 2 * (memkb / 1024));
+        hap_pages = ( (memkb + 1023) / 1024) << (20 - PAGE_SHIFT);
+        hap_set_alloc_for_pvh_dom0(d, hap_pages);
+    }
+
+    /*
+     * We enable paging mode again so guest_physmap_add_page will do the
+     * right thing for us.
+     */
+    d->arch.paging.mode = save_pvh_pg_mode;
+
     /* Write the phys->machine and machine->phys table entries. */
     for ( pfn = 0; pfn < count; pfn++ )
     {
@@ -985,11 +1179,7 @@ int __init construct_dom0(
         if ( pfn > REVERSE_START && (vinitrd_start || pfn < initrd_pfn) )
             mfn = alloc_epfn - (pfn - REVERSE_START);
 #endif
-        if ( !is_pv_32on64_domain(d) )
-            ((unsigned long *)vphysmap_start)[pfn] = mfn;
-        else
-            ((unsigned int *)vphysmap_start)[pfn] = mfn;
-        set_gpfn_from_mfn(mfn, pfn);
+        dom0_update_physmap(d, pfn, mfn, vphysmap_start);
         if (!(pfn & 0xfffff))
             process_pending_softirqs();
     }
@@ -1005,8 +1195,8 @@ int __init construct_dom0(
             if ( !page->u.inuse.type_info &&
                  !get_page_and_type(page, d, PGT_writable_page) )
                 BUG();
-            ((unsigned long *)vphysmap_start)[pfn] = mfn;
-            set_gpfn_from_mfn(mfn, pfn);
+
+            dom0_update_physmap(d, pfn, mfn, vphysmap_start);
             ++pfn;
             if (!(pfn & 0xfffff))
                 process_pending_softirqs();
@@ -1026,11 +1216,7 @@ int __init construct_dom0(
 #ifndef NDEBUG
 #define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn)))
 #endif
-            if ( !is_pv_32on64_domain(d) )
-                ((unsigned long *)vphysmap_start)[pfn] = mfn;
-            else
-                ((unsigned int *)vphysmap_start)[pfn] = mfn;
-            set_gpfn_from_mfn(mfn, pfn);
+            dom0_update_physmap(d, pfn, mfn, vphysmap_start);
 #undef pfn
             page++; pfn++;
             if (!(pfn & 0xfffff))
@@ -1054,6 +1240,15 @@ int __init construct_dom0(
         si->console.dom0.info_size = sizeof(struct dom0_vga_console_info);
     }
 
+    /*
+     * PVH: We need to update si->shared_info while we are on dom0 page tables,
+     * but need to defer the p2m update until after we have fixed up the
+     * page tables for PVH so that the m2p for the si pte entry returns
+     * correct pfn.
+     */
+    if ( is_pvh_domain(d) )
+        si->shared_info = shared_info_paddr;
+
     if ( is_pv_32on64_domain(d) )
         xlat_start_info(si, XLAT_start_info_console_dom0);
 
@@ -1087,8 +1282,15 @@ int __init construct_dom0(
     regs->eflags = X86_EFLAGS_IF;
 
     if ( opt_dom0_shadow )
+    {
+        if ( is_pvh_domain(d) )
+        {
+            printk("Unsupported option dom0_shadow for PVH\n");
+            return -EINVAL;
+        }
         if ( paging_enable(d, PG_SH_enable) == 0 ) 
             paging_update_paging_modes(v);
+    }
 
     if ( supervisor_mode_kernel )
     {
@@ -1179,6 +1381,19 @@ int __init construct_dom0(
         printk(" Xen warning: dom0 kernel broken ELF: %s\n",
                elf_check_broken(&elf));
 
+    if ( is_pvh_domain(d) )
+    {
+        /* finally, fixup the page table, replacing mfns with pfns */
+        pvh_fixup_page_tables_for_hap(v, v_start, v_end);
+
+        /* the pt has correct pfn for si, now update the mfn in the p2m */
+        mfn = virt_to_mfn(d->shared_info);
+        pfn = shared_info_paddr >> PAGE_SHIFT;
+        dom0_update_physmap(d, pfn, mfn, 0);
+
+        pvh_map_all_iomem(d);
+    }
+
     if ( d->domain_id == hardware_domid )
         iommu_hwdom_init(d);
 
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index a7593e7..fbce461 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -587,6 +587,18 @@ int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
     }
 }
 
+void __init hap_set_alloc_for_pvh_dom0(struct domain *d,
+                                       unsigned long hap_pages)
+{
+    int rc;
+
+    paging_lock(d);
+    rc = hap_set_allocation(d, hap_pages, NULL);
+    paging_unlock(d);
+
+    BUG_ON(rc);
+}
+
 static const struct paging_mode hap_paging_real_mode;
 static const struct paging_mode hap_paging_protected_mode;
 static const struct paging_mode hap_paging_pae_mode;
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
index e03f983..5161927 100644
--- a/xen/include/asm-x86/hap.h
+++ b/xen/include/asm-x86/hap.h
@@ -63,6 +63,7 @@ int   hap_track_dirty_vram(struct domain *d,
                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
 
 extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
+void hap_set_alloc_for_pvh_dom0(struct domain *d, unsigned long num_pages);
 
 #endif /* XEN_HAP_H */
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [V11 PATCH 2/4] pvh dom0: Add checks and restrictions for p2m_is_foreign
  2014-05-02  1:55 [V11 PATCH 0/4] pvh dom0 patches Mukesh Rathor
  2014-05-02  1:55 ` [V11 PATCH 1/4] pvh dom0: construct_dom0 changes Mukesh Rathor
@ 2014-05-02  1:55 ` Mukesh Rathor
  2014-05-02  1:55 ` [V11 PATCH 3/4] pvh dom0: Add and remove foreign pages Mukesh Rathor
  2014-05-02  1:55 ` [V11 PATCH 4/4] dom0: add opt_dom0pvh to setup.c Mukesh Rathor
  3 siblings, 0 replies; 12+ messages in thread
From: Mukesh Rathor @ 2014-05-02  1:55 UTC (permalink / raw)
  To: xen-devel; +Cc: George.Dunlap, keir.xen, tim, JBeulich

In this patch, we add some checks and restrictions in the relevant
p2m paths for p2m_is_foreign.

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
Acked-by: Tim Deegan <tim@xen.org>
---
 xen/arch/x86/mm/p2m-ept.c |  1 +
 xen/arch/x86/mm/p2m.c     | 12 +++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index 5d19965..7926bc4 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -777,6 +777,7 @@ static void ept_change_entry_type_global(struct p2m_domain *p2m,
 
     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
     BUG_ON(p2m_is_mmio(ot) || p2m_is_mmio(nt));
+    BUG_ON(p2m_is_foreign(ot) || p2m_is_foreign(nt));
 
     ept_change_entry_type_page(_mfn(ept_get_asr(ept)),
                                ept_get_wl(ept), ot, nt);
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 1d0528b..5923968 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -553,6 +553,10 @@ guest_physmap_add_entry(struct domain *d, unsigned long gfn,
         return 0;
     }
 
+    /* foreign pages are added thru p2m_add_foreign */
+    if ( p2m_is_foreign(t) )
+        return -EINVAL;
+
     p2m_lock(p2m);
 
     P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
@@ -587,9 +591,9 @@ guest_physmap_add_entry(struct domain *d, unsigned long gfn,
             omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, 0, NULL);
             ASSERT(!p2m_is_shared(ot));
         }
-        if ( p2m_is_grant(ot) )
+        if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
         {
-            /* Really shouldn't be unmapping grant maps this way */
+            /* Really shouldn't be unmapping grant/foreign maps this way */
             domain_crash(d);
             p2m_unlock(p2m);
             
@@ -691,6 +695,7 @@ p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
+    BUG_ON(p2m_is_foreign(ot) || p2m_is_foreign(nt));
 
     gfn_lock(p2m, gfn, 0);
 
@@ -715,6 +720,7 @@ void p2m_change_type_range(struct domain *d,
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
+    BUG_ON(p2m_is_foreign(ot) || p2m_is_foreign(nt));
 
     p2m_lock(p2m);
     p2m->defer_nested_flush = 1;
@@ -765,7 +771,7 @@ static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
 
     gfn_lock(p2m, gfn, 0);
     omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL);
-    if ( p2m_is_grant(ot) )
+    if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
     {
         p2m_unlock(p2m);
         domain_crash(d);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [V11 PATCH 3/4] pvh dom0: Add and remove foreign pages
  2014-05-02  1:55 [V11 PATCH 0/4] pvh dom0 patches Mukesh Rathor
  2014-05-02  1:55 ` [V11 PATCH 1/4] pvh dom0: construct_dom0 changes Mukesh Rathor
  2014-05-02  1:55 ` [V11 PATCH 2/4] pvh dom0: Add checks and restrictions for p2m_is_foreign Mukesh Rathor
@ 2014-05-02  1:55 ` Mukesh Rathor
  2014-05-05 10:56   ` Jan Beulich
  2014-05-07 15:36   ` Roger Pau Monné
  2014-05-02  1:55 ` [V11 PATCH 4/4] dom0: add opt_dom0pvh to setup.c Mukesh Rathor
  3 siblings, 2 replies; 12+ messages in thread
From: Mukesh Rathor @ 2014-05-02  1:55 UTC (permalink / raw)
  To: xen-devel
  Cc: JBeulich, George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima

In this patch, a new function, p2m_add_foreign(), is added
to map pages from a foreign guest into dom0 for various purposes
like domU creation, running xentrace, etc... Such pages are
typed p2m_map_foreign.  Note, it is the nature of such pages
that a refcnt is held during their stay in the p2m. The
refcnt is added and released in the low level ept function
atomic_write_ept_entry. That macro is converted to a function to allow
for such refcounting, which only applies to leaf entries in the ept.

Also, get_pg_owner is changed to allow pvh to map foreign mappings,
and is made public to be used by p2m_add_foreign().

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
---
 xen/arch/x86/mm.c         |   9 ++--
 xen/arch/x86/mm/p2m-ept.c |  70 ++++++++++++++++++++------
 xen/arch/x86/mm/p2m-pt.c  |   7 +++
 xen/arch/x86/mm/p2m.c     | 122 +++++++++++++++++++++++++++++++++++++++++++---
 xen/include/asm-x86/mm.h  |   2 +
 xen/include/asm-x86/p2m.h |   7 +++
 6 files changed, 192 insertions(+), 25 deletions(-)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 1a8a5e0..8d12c30 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2795,7 +2795,7 @@ int new_guest_cr3(unsigned long mfn)
     return rc;
 }
 
-static struct domain *get_pg_owner(domid_t domid)
+struct domain *get_pg_owner(domid_t domid)
 {
     struct domain *pg_owner = NULL, *curr = current->domain;
 
@@ -2811,7 +2811,7 @@ static struct domain *get_pg_owner(domid_t domid)
         goto out;
     }
 
-    if ( unlikely(paging_mode_translate(curr)) )
+    if ( !is_pvh_domain(curr) && unlikely(paging_mode_translate(curr)) )
     {
         MEM_LOG("Cannot mix foreign mappings with translated domains");
         goto out;
@@ -2838,7 +2838,7 @@ static struct domain *get_pg_owner(domid_t domid)
     return pg_owner;
 }
 
-static void put_pg_owner(struct domain *pg_owner)
+void put_pg_owner(struct domain *pg_owner)
 {
     rcu_unlock_domain(pg_owner);
 }
@@ -4584,6 +4584,9 @@ int xenmem_add_to_physmap_one(
             page = mfn_to_page(mfn);
             break;
         }
+        case XENMAPSPACE_gmfn_foreign:
+            rc = p2m_add_foreign(d, idx, gpfn, foreign_domid);
+            return rc;
         default:
             break;
     }
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index 7926bc4..eb80118 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -36,8 +36,6 @@
 
 #define atomic_read_ept_entry(__pepte)                              \
     ( (ept_entry_t) { .epte = read_atomic(&(__pepte)->epte) } )
-#define atomic_write_ept_entry(__pepte, __epte)                     \
-    write_atomic(&(__pepte)->epte, (__epte).epte)
 
 #define is_epte_present(ept_entry)      ((ept_entry)->epte & 0x7)
 #define is_epte_superpage(ept_entry)    ((ept_entry)->sp)
@@ -46,6 +44,47 @@ static inline bool_t is_epte_valid(ept_entry_t *e)
     return (e->epte != 0 && e->sa_p2mt != p2m_invalid);
 }
 
+/* returns : 0 for success, -errno otherwise */
+static int atomic_write_ept_entry(ept_entry_t *entryptr, ept_entry_t new,
+                                  int level)
+{
+    bool_t same_mfn = (new.mfn == entryptr->mfn);
+    unsigned long oldmfn = INVALID_MFN;
+
+    if ( level )
+    {
+        ASSERT(!p2m_is_foreign(new.sa_p2mt));
+        write_atomic(&entryptr->epte, new.epte);
+        return 0;
+    }
+
+    if ( unlikely(p2m_is_foreign(new.sa_p2mt)) && !same_mfn )
+    {
+        struct domain *fdom;
+
+        if ( !mfn_valid(new.mfn) )
+            return -EINVAL;
+
+        fdom = page_get_owner(mfn_to_page(new.mfn));
+        if ( fdom == NULL )
+            return -ESRCH;
+
+        /* get refcount on the page */
+        if ( !get_page(mfn_to_page(new.mfn), fdom) )
+            return -EBUSY;
+    }
+
+    if ( unlikely(p2m_is_foreign(entryptr->sa_p2mt)) && !same_mfn )
+        oldmfn = entryptr->mfn;
+
+    write_atomic(&entryptr->epte, new.epte);
+
+    if ( unlikely(oldmfn != INVALID_MFN) )
+        put_page(mfn_to_page(oldmfn));
+
+    return 0;
+}
+
 static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type, p2m_access_t access)
 {
     /* First apply type permissions */
@@ -271,7 +310,7 @@ static int ept_next_level(struct p2m_domain *p2m, bool_t read_only,
     return GUEST_TABLE_NORMAL_PAGE;
 }
 
-static bool_t ept_invalidate_emt(mfn_t mfn)
+static bool_t ept_invalidate_emt(mfn_t mfn, int level)
 {
     ept_entry_t *epte = map_domain_page(mfn_x(mfn));
     unsigned int i;
@@ -286,7 +325,7 @@ static bool_t ept_invalidate_emt(mfn_t mfn)
             continue;
 
         e.emt = MTRR_NUM_TYPES;
-        atomic_write_ept_entry(&epte[i], e);
+        atomic_write_ept_entry(&epte[i], e, level);
         changed = 1;
     }
 
@@ -341,7 +380,7 @@ bool_t ept_handle_misconfig(uint64_t gpa)
                                                _mfn(e.mfn), 0, &ipat,
                                                e.sa_p2mt == p2m_mmio_direct);
                     e.ipat = ipat;
-                    atomic_write_ept_entry(&epte[i], e);
+                    atomic_write_ept_entry(&epte[i], e, level);
                 }
             }
             else
@@ -353,7 +392,7 @@ bool_t ept_handle_misconfig(uint64_t gpa)
                 {
                     if ( ept_split_super_page(p2m, &e, level, level - 1) )
                     {
-                        atomic_write_ept_entry(&epte[i], e);
+                        atomic_write_ept_entry(&epte[i], e, level);
                         unmap_domain_page(epte);
                         mfn = e.mfn;
                         continue;
@@ -364,7 +403,7 @@ bool_t ept_handle_misconfig(uint64_t gpa)
                 }
                 e.emt = emt;
                 e.ipat = ipat;
-                atomic_write_ept_entry(&epte[i], e);
+                atomic_write_ept_entry(&epte[i], e, level);
             }
 
             okay = 1;
@@ -374,10 +413,10 @@ bool_t ept_handle_misconfig(uint64_t gpa)
         if ( e.emt == MTRR_NUM_TYPES )
         {
             ASSERT(is_epte_present(&e));
-            ept_invalidate_emt(_mfn(e.mfn));
+            ept_invalidate_emt(_mfn(e.mfn), level);
             smp_wmb();
             e.emt = 0;
-            atomic_write_ept_entry(&epte[i], e);
+            atomic_write_ept_entry(&epte[i], e, level);
             unmap_domain_page(epte);
             okay = 1;
         }
@@ -444,6 +483,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
     ASSERT((target == 2 && hvm_hap_has_1gb()) ||
            (target == 1 && hvm_hap_has_2mb()) ||
            (target == 0));
+    ASSERT(!p2m_is_foreign(p2mt) || target == 0);
 
     table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m)));
 
@@ -507,7 +547,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
 
         /* now install the newly split ept sub-tree */
         /* NB: please make sure domian is paused and no in-fly VT-d DMA. */
-        atomic_write_ept_entry(ept_entry, split_ept_entry);
+        atomic_write_ept_entry(ept_entry, split_ept_entry, i);
 
         /* then move to the level we want to make real changes */
         for ( ; i > target; i-- )
@@ -546,10 +586,10 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
         ept_p2m_type_to_flags(&new_entry, p2mt, p2ma);
     }
 
-    atomic_write_ept_entry(ept_entry, new_entry);
+    rc = atomic_write_ept_entry(ept_entry, new_entry, i);
 
     /* Track the highest gfn for which we have ever had a valid mapping */
-    if ( p2mt != p2m_invalid &&
+    if ( rc == 0 && p2mt != p2m_invalid &&
          (gfn + (1UL << order) - 1 > p2m->max_mapped_pfn) )
         p2m->max_mapped_pfn = gfn + (1UL << order) - 1;
 
@@ -581,7 +621,7 @@ out:
        last thing we do, after the ept_sync_domain() and removal
        from the iommu tables, so as to avoid a potential
        use-after-free. */
-    if ( is_epte_present(&old_entry) )
+    if ( rc == 0 && is_epte_present(&old_entry) )
         ept_free_entry(p2m, &old_entry, target);
 
     return rc;
@@ -761,7 +801,7 @@ static void ept_change_entry_type_page(mfn_t ept_page_mfn, int ept_page_level,
 
             e.sa_p2mt = nt;
             ept_p2m_type_to_flags(&e, nt, e.access);
-            atomic_write_ept_entry(&epte[i], e);
+            atomic_write_ept_entry(&epte[i], e, ept_page_level);
         }
     }
 
@@ -792,7 +832,7 @@ static void ept_memory_type_changed(struct p2m_domain *p2m)
     if ( !mfn )
         return;
 
-    if ( ept_invalidate_emt(_mfn(mfn)) )
+    if ( ept_invalidate_emt(_mfn(mfn), ept_get_wl(&p2m->ept)) )
         ept_sync_domain(p2m);
 }
 
diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c
index 56a1593..1bbf0fe 100644
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -308,6 +308,13 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
         __trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), &t);
     }
 
+    if ( p2m_is_foreign(p2mt) )
+    {
+        /* pvh fixme: foreign types are only supported on ept at present */
+        gdprintk(XENLOG_WARNING, "Unimplemented foreign p2m type.\n");
+        return -EINVAL;
+    }
+
     table = map_domain_page(mfn_x(pagetable_get_mfn(p2m_get_pagetable(p2m))));
     rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
                         L4_PAGETABLE_SHIFT - PAGE_SHIFT,
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 5923968..b1e23b0 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -36,6 +36,7 @@
 #include <xen/event.h>
 #include <asm/hvm/nestedhvm.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
+#include <xsm/xsm.h>
 
 #include "mm-locks.h"
 
@@ -287,14 +288,20 @@ struct page_info *get_page_from_gfn_p2m(
         /* Fast path: look up and get out */
         p2m_read_lock(p2m);
         mfn = __get_gfn_type_access(p2m, gfn, t, a, 0, NULL, 0);
-        if ( (p2m_is_ram(*t) || p2m_is_grant(*t))
-             && mfn_valid(mfn)
+        if ( p2m_is_any_ram(*t) && mfn_valid(mfn)
              && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) )
         {
             page = mfn_to_page(mfn);
-            if ( !get_page(page, d)
-                 /* Page could be shared */
-                 && !get_page(page, dom_cow) )
+            if ( p2m_is_foreign(*t) )
+            {
+                struct domain *fdom = page_get_owner_and_reference(page);
+                ASSERT(fdom != d);
+                if ( fdom == NULL )
+                    page = NULL;
+            }
+            else if ( !get_page(page, d)
+                      /* Page could be shared */
+                      && !get_page(page, dom_cow) )
                 page = NULL;
         }
         p2m_read_unlock(p2m);
@@ -444,6 +451,10 @@ int p2m_alloc_table(struct p2m_domain *p2m)
     return rc;
 }
 
+/*
+ * pvh fixme: when adding support for pvh non-hardware domains, this path must
+ * cleanup any foreign p2m types (release refcnts on them).
+ */
 void p2m_teardown(struct p2m_domain *p2m)
 /* Return all the p2m pages to Xen.
  * We know we don't have any extra mappings to these pages */
@@ -795,8 +806,8 @@ static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
 }
 
 /* Set foreign mfn in the given guest's p2m table. */
-static int __attribute__((unused))
-set_foreign_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
+static int set_foreign_p2m_entry(struct domain *d, unsigned long gfn,
+                                 mfn_t mfn)
 {
     return set_typed_p2m_entry(d, gfn, mfn, p2m_map_foreign);
 }
@@ -1757,6 +1768,103 @@ out_p2m_audit:
 #endif /* P2M_AUDIT */
 
 /*
+ * Add frame from foreign domain to target domain's physmap. Similar to
+ * XENMAPSPACE_gmfn but the frame is foreign being mapped into current,
+ * and is not removed from foreign domain.
+ *
+ * Usage: - libxl on pvh dom0 creating a guest and doing privcmd_ioctl_mmap.
+ *        - xentrace running on dom0 mapping xenheap pages. foreigndom would
+ *          be DOMID_XEN in such a case.
+ *        etc..
+ *
+ * Side Effect: the mfn for fgfn will be refcounted in lower level routines
+ *              so it is not lost while mapped here. The refcnt is released
+ *              via the XENMEM_remove_from_physmap path.
+ *
+ * Returns: 0 ==> success
+ */
+int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
+                    unsigned long gpfn, domid_t foreigndom)
+{
+    p2m_type_t p2mt, p2mt_prev;
+    unsigned long prev_mfn, mfn;
+    struct page_info *page;
+    int rc = -EINVAL;
+    struct domain *fdom = NULL;
+
+    ASSERT(tdom);
+    if ( foreigndom == DOMID_SELF || !is_pvh_domain(tdom) )
+        return -EINVAL;
+
+    /*
+     * pvh fixme: until support is added to p2m teardown code to cleanup any
+     * foreign entries, limit this to hardware domain only.
+     */
+    if ( !is_hardware_domain(tdom) )
+        return -EPERM;
+
+    fdom = get_pg_owner(foreigndom);
+    if ( fdom == NULL )
+        return -ESRCH;
+
+    if ( tdom == fdom )
+        goto out;
+
+    rc = xsm_map_gmfn_foreign(XSM_TARGET, tdom, fdom);
+    if ( rc )
+        goto out;
+
+    /*
+     * Take a refcnt on the mfn. NB: following supported for foreign mapping:
+     *     ram_rw | ram_logdirty | ram_ro | paging_out.
+     */
+    page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
+    if ( !page ||
+         !p2m_is_ram(p2mt) || p2m_is_shared(p2mt) || p2m_is_hole(p2mt) )
+    {
+        if ( page )
+            put_page(page);
+        goto out;
+    }
+    mfn = mfn_x(page_to_mfn(page));
+
+    /* Remove previously mapped page if it is present. */
+    prev_mfn = mfn_x(get_gfn(tdom, gpfn, &p2mt_prev));
+    if ( mfn_valid(_mfn(prev_mfn)) )
+    {
+        if ( is_xen_heap_mfn(prev_mfn) )
+            /* Xen heap frames are simply unhooked from this phys slot */
+            guest_physmap_remove_page(tdom, gpfn, prev_mfn, 0);
+        else
+            /* Normal domain memory is freed, to avoid leaking memory. */
+            guest_remove_page(tdom, gpfn);
+    }
+    /*
+     * Create the new mapping. Can't use guest_physmap_add_page() because it
+     * will update the m2p table which will result in  mfn -> gpfn of dom0
+     * and not fgfn of domU.
+     */
+    rc = set_foreign_p2m_entry(tdom, gpfn, _mfn(mfn));
+    if ( rc )
+        gdprintk(XENLOG_WARNING, "set_foreign_p2m_entry failed. "
+                 "gpfn:%lx mfn:%lx fgfn:%lx td:%d fd:%d\n",
+                 gpfn, mfn, fgfn, tdom->domain_id, fdom->domain_id);
+
+    put_page(page);
+
+    /*
+     * This put_gfn for the above get_gfn for prev_mfn.  We must do this
+     * after set_foreign_p2m_entry so another cpu doesn't populate the gpfn
+     * before us.
+     */
+    put_gfn(tdom, gpfn);
+
+out:
+    if ( fdom )
+        put_pg_owner(fdom);
+    return rc;
+}
+/*
  * Local variables:
  * mode: C
  * c-file-style: "BSD"
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 7059adc..6bf6e72 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -358,6 +358,8 @@ int  put_old_guest_table(struct vcpu *);
 int  get_page_from_l1e(
     l1_pgentry_t l1e, struct domain *l1e_owner, struct domain *pg_owner);
 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner);
+struct domain *get_pg_owner(domid_t domid);
+void put_pg_owner(struct domain *pg_owner);
 
 static inline void put_page_and_type(struct page_info *page)
 {
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 86847e9..72517d2 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -183,6 +183,10 @@ typedef unsigned int p2m_query_t;
 #define p2m_is_broken(_t)   (p2m_to_mask(_t) & P2M_BROKEN_TYPES)
 #define p2m_is_foreign(_t)  (p2m_to_mask(_t) & p2m_to_mask(p2m_map_foreign))
 
+#define p2m_is_any_ram(_t)  (p2m_to_mask(_t) &                   \
+                             (P2M_RAM_TYPES | P2M_GRANT_TYPES |  \
+                              p2m_to_mask(p2m_map_foreign)))
+
 /* Per-p2m-table state */
 struct p2m_domain {
     /* Lock that protects updates to the p2m */
@@ -515,6 +519,9 @@ void p2m_memory_type_changed(struct domain *d);
 int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn);
 int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn);
 
+/* Add foreign mapping to the guest's p2m table. */
+int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
+                    unsigned long gpfn, domid_t foreign_domid);
 
 /* 
  * Populate-on-demand
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [V11 PATCH 4/4] dom0: add opt_dom0pvh to setup.c
  2014-05-02  1:55 [V11 PATCH 0/4] pvh dom0 patches Mukesh Rathor
                   ` (2 preceding siblings ...)
  2014-05-02  1:55 ` [V11 PATCH 3/4] pvh dom0: Add and remove foreign pages Mukesh Rathor
@ 2014-05-02  1:55 ` Mukesh Rathor
  3 siblings, 0 replies; 12+ messages in thread
From: Mukesh Rathor @ 2014-05-02  1:55 UTC (permalink / raw)
  To: xen-devel; +Cc: George.Dunlap, keir.xen, tim, JBeulich

Finally last patch in the series to enable creation of pvh dom0.
A pvh dom0 is created by adding dom0pvh to grub xen command line.

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 docs/misc/pvh-readme.txt            |  2 ++
 docs/misc/xen-command-line.markdown |  7 +++++++
 xen/arch/x86/setup.c                | 11 +++++++++--
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/docs/misc/pvh-readme.txt b/docs/misc/pvh-readme.txt
index 9fea137..c5b3de4 100644
--- a/docs/misc/pvh-readme.txt
+++ b/docs/misc/pvh-readme.txt
@@ -37,6 +37,8 @@ supported. Phase I patches are broken into three parts:
    - tools changes for creating a PVH guest
    - boot of 64bit dom0 in PVH mode.
 
+To boot 64bit dom0 in PVH mode, add dom0pvh to grub xen command line.
+
 Following fixme's exist in the code:
   - arch/x86/time.c: support more tsc modes.
 
diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
index 7dc938b..d248c62 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -494,6 +494,13 @@ Practices](http://wiki.xen.org/wiki/Xen_Best_Practices#Xen_dom0_dedicated_memory
 
 Pin dom0 vcpus to their respective pcpus
 
+### dom0pvh
+> `= <boolean>`
+
+> Default: `false`
+
+Flag that makes a 64bit dom0 boot in PVH mode. No 32bit support at present.
+
 ### e820-mtrr-clip
 > `= <boolean>`
 
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 2e30701..373e236 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -61,6 +61,10 @@ integer_param("maxcpus", max_cpus);
 static bool_t __initdata disable_smep;
 invbool_param("smep", disable_smep);
 
+/* Boot dom0 in pvh mode */
+static bool_t __initdata opt_dom0pvh;
+boolean_param("dom0pvh", opt_dom0pvh);
+
 /* **** Linux config option: propagated to domain0. */
 /* "acpi=off":    Sisables both ACPI table parsing and interpreter. */
 /* "acpi=force":  Override the disable blacklist.                   */
@@ -541,7 +545,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
 {
     char *memmap_type = NULL;
     char *cmdline, *kextra, *loader;
-    unsigned int initrdidx;
+    unsigned int initrdidx, domcr_flags = DOMCRF_s3_integrity;
     multiboot_info_t *mbi = __va(mbi_p);
     module_t *mod = (module_t *)__va(mbi->mods_addr);
     unsigned long nr_pages, raw_max_page, modules_headroom, *module_map;
@@ -1338,8 +1342,11 @@ void __init noreturn __start_xen(unsigned long mbi_p)
     if ( !tboot_protect_mem_regions() )
         panic("Could not protect TXT memory regions");
 
+    if ( opt_dom0pvh )
+        domcr_flags |= DOMCRF_pvh | DOMCRF_hap;
+
     /* Create initial domain 0. */
-    dom0 = domain_create(0, DOMCRF_s3_integrity, 0);
+    dom0 = domain_create(0, domcr_flags, 0);
     if ( IS_ERR(dom0) || (alloc_dom0_vcpu0(dom0) == NULL) )
         panic("Error creating domain 0");
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [V11 PATCH 3/4] pvh dom0: Add and remove foreign pages
  2014-05-02  1:55 ` [V11 PATCH 3/4] pvh dom0: Add and remove foreign pages Mukesh Rathor
@ 2014-05-05 10:56   ` Jan Beulich
  2014-05-06  1:21     ` Mukesh Rathor
  2014-05-07 15:36   ` Roger Pau Monné
  1 sibling, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2014-05-05 10:56 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 02.05.14 at 03:55, <mukesh.rathor@oracle.com> wrote:
> @@ -4584,6 +4584,9 @@ int xenmem_add_to_physmap_one(
>              page = mfn_to_page(mfn);
>              break;
>          }
> +        case XENMAPSPACE_gmfn_foreign:
> +            rc = p2m_add_foreign(d, idx, gpfn, foreign_domid);
> +            return rc;

Any reason not to simply "return p2m_add_foreign();"?

> +static int atomic_write_ept_entry(ept_entry_t *entryptr, ept_entry_t new,
> +                                  int level)
> +{
> +    bool_t same_mfn = (new.mfn == entryptr->mfn);
> +    unsigned long oldmfn = INVALID_MFN;
> +
> +    if ( level )
> +    {
> +        ASSERT(!p2m_is_foreign(new.sa_p2mt));
> +        write_atomic(&entryptr->epte, new.epte);
> +        return 0;
> +    }
> +
> +    if ( unlikely(p2m_is_foreign(new.sa_p2mt)) && !same_mfn )
> +    {
> +        struct domain *fdom;
> +
> +        if ( !mfn_valid(new.mfn) )
> +            return -EINVAL;
> +
> +        fdom = page_get_owner(mfn_to_page(new.mfn));
> +        if ( fdom == NULL )
> +            return -ESRCH;
> +
> +        /* get refcount on the page */
> +        if ( !get_page(mfn_to_page(new.mfn), fdom) )
> +            return -EBUSY;
> +    }
> +
> +    if ( unlikely(p2m_is_foreign(entryptr->sa_p2mt)) && !same_mfn )
> +        oldmfn = entryptr->mfn;

I'm afraid this "same_mfn" handling is correct only if sa_p2mt also
does not change.

> @@ -287,14 +288,20 @@ struct page_info *get_page_from_gfn_p2m(
>          /* Fast path: look up and get out */
>          p2m_read_lock(p2m);
>          mfn = __get_gfn_type_access(p2m, gfn, t, a, 0, NULL, 0);
> -        if ( (p2m_is_ram(*t) || p2m_is_grant(*t))
> -             && mfn_valid(mfn)
> +        if ( p2m_is_any_ram(*t) && mfn_valid(mfn)
>               && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) )
>          {
>              page = mfn_to_page(mfn);
> -            if ( !get_page(page, d)
> -                 /* Page could be shared */
> -                 && !get_page(page, dom_cow) )
> +            if ( p2m_is_foreign(*t) )

I think here (and possible elsewhere) use of unlikely() is desirable.

> @@ -444,6 +451,10 @@ int p2m_alloc_table(struct p2m_domain *p2m)
>      return rc;
>  }
>  
> +/*
> + * pvh fixme: when adding support for pvh non-hardware domains, this path must
> + * cleanup any foreign p2m types (release refcnts on them).
> + */

And I wonder whether you shouldn't enforce this by disallowing non-
hardware domains to create foreign mappings.

> +int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
> +                    unsigned long gpfn, domid_t foreigndom)
> +{
> +    p2m_type_t p2mt, p2mt_prev;
> +    unsigned long prev_mfn, mfn;
> +    struct page_info *page;
> +    int rc = -EINVAL;
> +    struct domain *fdom = NULL;

Two pointless initializers.

Jan

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [V11 PATCH 3/4] pvh dom0: Add and remove foreign pages
  2014-05-05 10:56   ` Jan Beulich
@ 2014-05-06  1:21     ` Mukesh Rathor
  2014-05-06  7:51       ` Jan Beulich
  0 siblings, 1 reply; 12+ messages in thread
From: Mukesh Rathor @ 2014-05-06  1:21 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Mon, 05 May 2014 11:56:05 +0100
"Jan Beulich" <JBeulich@suse.com> wrote:

> >>> On 02.05.14 at 03:55, <mukesh.rathor@oracle.com> wrote:
> > @@ -4584,6 +4584,9 @@ int xenmem_add_to_physmap_one(
> >              page = mfn_to_page(mfn);
> >              break;
> >          }
> > +        case XENMAPSPACE_gmfn_foreign:
> > +            rc = p2m_add_foreign(d, idx, gpfn, foreign_domid);
> > +            return rc;
> 
> Any reason not to simply "return p2m_add_foreign();"?

Could, rc allows quick adding of printk for debugging. But, 
I will change it.

> > +static int atomic_write_ept_entry(ept_entry_t *entryptr,
> > ept_entry_t new,
> > +                                  int level)
> > +{
> > +    bool_t same_mfn = (new.mfn == entryptr->mfn);
> > +    unsigned long oldmfn = INVALID_MFN;
> > +
> > +    if ( level )
> > +    {
> > +        ASSERT(!p2m_is_foreign(new.sa_p2mt));
> > +        write_atomic(&entryptr->epte, new.epte);
> > +        return 0;
> > +    }
> > +
> > +    if ( unlikely(p2m_is_foreign(new.sa_p2mt)) && !same_mfn )
> > +    {
> > +        struct domain *fdom;
> > +
> > +        if ( !mfn_valid(new.mfn) )
> > +            return -EINVAL;
> > +
> > +        fdom = page_get_owner(mfn_to_page(new.mfn));
> > +        if ( fdom == NULL )
> > +            return -ESRCH;
> > +
> > +        /* get refcount on the page */
> > +        if ( !get_page(mfn_to_page(new.mfn), fdom) )
> > +            return -EBUSY;
> > +    }
> > +
> > +    if ( unlikely(p2m_is_foreign(entryptr->sa_p2mt)) && !same_mfn )
> > +        oldmfn = entryptr->mfn;
> 
> I'm afraid this "same_mfn" handling is correct only if sa_p2mt also
> does not change.

Yup, my bad. Will fix it, thanks.

> > @@ -287,14 +288,20 @@ struct page_info *get_page_from_gfn_p2m(
> >          /* Fast path: look up and get out */
> >          p2m_read_lock(p2m);
> >          mfn = __get_gfn_type_access(p2m, gfn, t, a, 0, NULL, 0);
> > -        if ( (p2m_is_ram(*t) || p2m_is_grant(*t))
> > -             && mfn_valid(mfn)
> > +        if ( p2m_is_any_ram(*t) && mfn_valid(mfn)
> >               && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) )
> >          {
> >              page = mfn_to_page(mfn);
> > -            if ( !get_page(page, d)
> > -                 /* Page could be shared */
> > -                 && !get_page(page, dom_cow) )
> > +            if ( p2m_is_foreign(*t) )
> 
> I think here (and possible elsewhere) use of unlikely() is desirable.

Ok.

> > @@ -444,6 +451,10 @@ int p2m_alloc_table(struct p2m_domain *p2m)
> >      return rc;
> >  }
> >  
> > +/*
> > + * pvh fixme: when adding support for pvh non-hardware domains,
> > this path must
> > + * cleanup any foreign p2m types (release refcnts on them).
> > + */
> 
> And I wonder whether you shouldn't enforce this by disallowing non-
> hardware domains to create foreign mappings.

Hmm... Tim wanted the enforcement. That will ensure the cleanup is
implemented without falling thru the cracks.

thanks
Mukesh

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [V11 PATCH 3/4] pvh dom0: Add and remove foreign pages
  2014-05-06  1:21     ` Mukesh Rathor
@ 2014-05-06  7:51       ` Jan Beulich
  2014-05-07  1:12         ` Mukesh Rathor
  0 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2014-05-06  7:51 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 06.05.14 at 03:21, <mukesh.rathor@oracle.com> wrote:
> "Jan Beulich" <JBeulich@suse.com> wrote:
>> >>> On 02.05.14 at 03:55, <mukesh.rathor@oracle.com> wrote:
>> > @@ -444,6 +451,10 @@ int p2m_alloc_table(struct p2m_domain *p2m)
>> >      return rc;
>> >  }
>> >  
>> > +/*
>> > + * pvh fixme: when adding support for pvh non-hardware domains,
>> > this path must
>> > + * cleanup any foreign p2m types (release refcnts on them).
>> > + */
>> 
>> And I wonder whether you shouldn't enforce this by disallowing non-
>> hardware domains to create foreign mappings.
> 
> Hmm... Tim wanted the enforcement. That will ensure the cleanup is
> implemented without falling thru the cracks.

I'm confused - aren't you stating that Tim requested the same I did?
Or else, what "enforcement" are you referring to?

Jan

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [V11 PATCH 3/4] pvh dom0: Add and remove foreign pages
  2014-05-06  7:51       ` Jan Beulich
@ 2014-05-07  1:12         ` Mukesh Rathor
  2014-05-07  7:27           ` Jan Beulich
  0 siblings, 1 reply; 12+ messages in thread
From: Mukesh Rathor @ 2014-05-07  1:12 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

On Tue, 06 May 2014 08:51:24 +0100
"Jan Beulich" <JBeulich@suse.com> wrote:

> >>> On 06.05.14 at 03:21, <mukesh.rathor@oracle.com> wrote:
> > "Jan Beulich" <JBeulich@suse.com> wrote:
> >> >>> On 02.05.14 at 03:55, <mukesh.rathor@oracle.com> wrote:
> >> > @@ -444,6 +451,10 @@ int p2m_alloc_table(struct p2m_domain *p2m)
> >> >      return rc;
> >> >  }
> >> >  
> >> > +/*
> >> > + * pvh fixme: when adding support for pvh non-hardware domains,
> >> > this path must
> >> > + * cleanup any foreign p2m types (release refcnts on them).
> >> > + */
> >> 
> >> And I wonder whether you shouldn't enforce this by disallowing non-
> >> hardware domains to create foreign mappings.
> > 
> > Hmm... Tim wanted the enforcement. That will ensure the cleanup is
> > implemented without falling thru the cracks.
> 
> I'm confused - aren't you stating that Tim requested the same I did?
> Or else, what "enforcement" are you referring to?

Yes, Tim requested enforcement which I added in p2m_add_foreign():

+    /*
+     * pvh fixme: until support is added to p2m teardown code to cleanup any
+     * foreign entries, limit this to hardware domain only.
+     */
+    if ( !is_hardware_domain(tdom) )
+        return -EPERM;

Thus, non-hardware domains can't map foreign types. Not sure if there's
anything worth enforcing in p2m_teardown with above.

I'm not sure I understand what you think "I shouldn't enforce".

Mukesh

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [V11 PATCH 3/4] pvh dom0: Add and remove foreign pages
  2014-05-07  1:12         ` Mukesh Rathor
@ 2014-05-07  7:27           ` Jan Beulich
  0 siblings, 0 replies; 12+ messages in thread
From: Jan Beulich @ 2014-05-07  7:27 UTC (permalink / raw)
  To: Mukesh Rathor
  Cc: George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima, xen-devel

>>> On 07.05.14 at 03:12, <mukesh.rathor@oracle.com> wrote:
> On Tue, 06 May 2014 08:51:24 +0100
> "Jan Beulich" <JBeulich@suse.com> wrote:
> 
>> >>> On 06.05.14 at 03:21, <mukesh.rathor@oracle.com> wrote:
>> > "Jan Beulich" <JBeulich@suse.com> wrote:
>> >> >>> On 02.05.14 at 03:55, <mukesh.rathor@oracle.com> wrote:
>> >> > @@ -444,6 +451,10 @@ int p2m_alloc_table(struct p2m_domain *p2m)
>> >> >      return rc;
>> >> >  }
>> >> >  
>> >> > +/*
>> >> > + * pvh fixme: when adding support for pvh non-hardware domains,
>> >> > this path must
>> >> > + * cleanup any foreign p2m types (release refcnts on them).
>> >> > + */
>> >> 
>> >> And I wonder whether you shouldn't enforce this by disallowing non-
>> >> hardware domains to create foreign mappings.
>> > 
>> > Hmm... Tim wanted the enforcement. That will ensure the cleanup is
>> > implemented without falling thru the cracks.
>> 
>> I'm confused - aren't you stating that Tim requested the same I did?
>> Or else, what "enforcement" are you referring to?
> 
> Yes, Tim requested enforcement which I added in p2m_add_foreign():
> 
> +    /*
> +     * pvh fixme: until support is added to p2m teardown code to cleanup any
> +     * foreign entries, limit this to hardware domain only.
> +     */
> +    if ( !is_hardware_domain(tdom) )
> +        return -EPERM;
> 
> Thus, non-hardware domains can't map foreign types. Not sure if there's
> anything worth enforcing in p2m_teardown with above.

I must have overlooked this - this is exactly what I was after. Sorry
for the noise.

Jan

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [V11 PATCH 3/4] pvh dom0: Add and remove foreign pages
  2014-05-02  1:55 ` [V11 PATCH 3/4] pvh dom0: Add and remove foreign pages Mukesh Rathor
  2014-05-05 10:56   ` Jan Beulich
@ 2014-05-07 15:36   ` Roger Pau Monné
  1 sibling, 0 replies; 12+ messages in thread
From: Roger Pau Monné @ 2014-05-07 15:36 UTC (permalink / raw)
  To: Mukesh Rathor, xen-devel
  Cc: JBeulich, George.Dunlap, tim, eddie.dong, keir.xen, jun.nakajima

On 02/05/14 03:55, Mukesh Rathor wrote:
> +int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
> +                    unsigned long gpfn, domid_t foreigndom)
> +{
> +    p2m_type_t p2mt, p2mt_prev;
> +    unsigned long prev_mfn, mfn;
> +    struct page_info *page;
> +    int rc = -EINVAL;
> +    struct domain *fdom = NULL;
> +
> +    ASSERT(tdom);
> +    if ( foreigndom == DOMID_SELF || !is_pvh_domain(tdom) )
> +        return -EINVAL;
> +
> +    /*
> +     * pvh fixme: until support is added to p2m teardown code to cleanup any
> +     * foreign entries, limit this to hardware domain only.
> +     */
> +    if ( !is_hardware_domain(tdom) )
> +        return -EPERM;
> +
> +    fdom = get_pg_owner(foreigndom);
> +    if ( fdom == NULL )
> +        return -ESRCH;
> +
> +    if ( tdom == fdom )
> +        goto out;
> +
> +    rc = xsm_map_gmfn_foreign(XSM_TARGET, tdom, fdom);
> +    if ( rc )
> +        goto out;
> +
> +    /*
> +     * Take a refcnt on the mfn. NB: following supported for foreign mapping:
> +     *     ram_rw | ram_logdirty | ram_ro | paging_out.
> +     */
> +    page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
> +    if ( !page ||
> +         !p2m_is_ram(p2mt) || p2m_is_shared(p2mt) || p2m_is_hole(p2mt) )
> +    {
> +        if ( page )
> +            put_page(page);

This is missing setting rc to an error value, something like the
following is needed:

rc = -EINVAL;

Roger.

> +        goto out;
> +    }
> +    mfn = mfn_x(page_to_mfn(page));
> +
> +    /* Remove previously mapped page if it is present. */
> +    prev_mfn = mfn_x(get_gfn(tdom, gpfn, &p2mt_prev));
> +    if ( mfn_valid(_mfn(prev_mfn)) )
> +    {
> +        if ( is_xen_heap_mfn(prev_mfn) )
> +            /* Xen heap frames are simply unhooked from this phys slot */
> +            guest_physmap_remove_page(tdom, gpfn, prev_mfn, 0);
> +        else
> +            /* Normal domain memory is freed, to avoid leaking memory. */
> +            guest_remove_page(tdom, gpfn);
> +    }
> +    /*
> +     * Create the new mapping. Can't use guest_physmap_add_page() because it
> +     * will update the m2p table which will result in  mfn -> gpfn of dom0
> +     * and not fgfn of domU.
> +     */
> +    rc = set_foreign_p2m_entry(tdom, gpfn, _mfn(mfn));
> +    if ( rc )
> +        gdprintk(XENLOG_WARNING, "set_foreign_p2m_entry failed. "
> +                 "gpfn:%lx mfn:%lx fgfn:%lx td:%d fd:%d\n",
> +                 gpfn, mfn, fgfn, tdom->domain_id, fdom->domain_id);
> +
> +    put_page(page);
> +
> +    /*
> +     * This put_gfn for the above get_gfn for prev_mfn.  We must do this
> +     * after set_foreign_p2m_entry so another cpu doesn't populate the gpfn
> +     * before us.
> +     */
> +    put_gfn(tdom, gpfn);
> +
> +out:
> +    if ( fdom )
> +        put_pg_owner(fdom);
> +    return rc;
> +}

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [V11 PATCH 1/4] pvh dom0: construct_dom0 changes
  2014-05-02  1:55 ` [V11 PATCH 1/4] pvh dom0: construct_dom0 changes Mukesh Rathor
@ 2014-05-08 12:05   ` Tim Deegan
  0 siblings, 0 replies; 12+ messages in thread
From: Tim Deegan @ 2014-05-08 12:05 UTC (permalink / raw)
  To: Mukesh Rathor; +Cc: George.Dunlap, xen-devel, keir.xen, JBeulich

At 18:55 -0700 on 01 May (1398966904), Mukesh Rathor wrote:
> This patch changes construct_dom0() to boot in pvh mode:
>   - Make sure dom0 elf supports pvh mode.
>   - Call guest_physmap_add_page for pvh rather than simple p2m setting
>   - Map all non-RAM regions 1:1 upto the end region in e820 or 4GB which
>     ever is higher.
>   - Allocate p2m, copying calculation from toolstack.
>   - Allocate shared info page from the virtual space so that dom0 PT
>     can be updated. Then update p2m for it with the actual mfn.
>   - Since we build the page tables for pvh same as for pv, in
>     pvh_fixup_page_tables_for_hap we replace the mfns with pfns.
> 
> Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
> Reviewed-by: Jan Beulich <jbeulich@suse.com>

For the x86/mm changes (even though I would have preferred that to be
a paging_* API rather than a hap_* one):

Acked-by: Tim Deegan <tim@xen.org>

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2014-05-08 12:05 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-05-02  1:55 [V11 PATCH 0/4] pvh dom0 patches Mukesh Rathor
2014-05-02  1:55 ` [V11 PATCH 1/4] pvh dom0: construct_dom0 changes Mukesh Rathor
2014-05-08 12:05   ` Tim Deegan
2014-05-02  1:55 ` [V11 PATCH 2/4] pvh dom0: Add checks and restrictions for p2m_is_foreign Mukesh Rathor
2014-05-02  1:55 ` [V11 PATCH 3/4] pvh dom0: Add and remove foreign pages Mukesh Rathor
2014-05-05 10:56   ` Jan Beulich
2014-05-06  1:21     ` Mukesh Rathor
2014-05-06  7:51       ` Jan Beulich
2014-05-07  1:12         ` Mukesh Rathor
2014-05-07  7:27           ` Jan Beulich
2014-05-07 15:36   ` Roger Pau Monné
2014-05-02  1:55 ` [V11 PATCH 4/4] dom0: add opt_dom0pvh to setup.c Mukesh Rathor

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.