All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86: rework hypercall argument translation area setup
@ 2013-02-21 11:42 Jan Beulich
  2013-02-21 12:08 ` Keir Fraser
  0 siblings, 1 reply; 6+ messages in thread
From: Jan Beulich @ 2013-02-21 11:42 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 8305 bytes --]

Rather than using an order-1 Xen heap allocation, use (currently 2)
individual domain heap pages to populate space in the per-domain
mapping area.

Also fix an off-by-one mistake in is_compat_arg_xlat_range().

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -642,6 +642,14 @@ void arch_domain_destroy(struct domain *
             free_domheap_page(d->arch.perdomain_l2_pg[i]);
     free_domheap_page(d->arch.perdomain_l3_pg);
 
+    if ( d->arch.arg_xlat_l1_pg )
+    {
+        for ( i = 0; i < PFN_UP(MAX_VIRT_CPUS << ARG_XLAT_VA_SHIFT); ++i )
+            if ( d->arch.arg_xlat_l1_pg[i] )
+                free_domheap_page(d->arch.arg_xlat_l1_pg[i]);
+        xfree(d->arch.arg_xlat_l1_pg);
+    }
+
     free_xenheap_page(d->shared_info);
     cleanup_domain_irq_mapping(d);
 }
--- a/xen/arch/x86/usercopy.c
+++ b/xen/arch/x86/usercopy.c
@@ -6,8 +6,8 @@
  * Copyright 2002 Andi Kleen <ak@suse.de>
  */
 
-#include <xen/config.h>
 #include <xen/lib.h>
+#include <xen/sched.h>
 #include <asm/uaccess.h>
 
 unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned n)
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -832,27 +832,93 @@ void __init zap_low_mappings(void)
                      __PAGE_HYPERVISOR);
 }
 
-void *compat_arg_xlat_virt_base(void)
-{
-    return current->arch.compat_arg_xlat;
-}
-
 int setup_compat_arg_xlat(struct vcpu *v)
 {
-    unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE);
+    struct domain *d = v->domain;
+    struct page_info *pg;
+    unsigned int idx, i;
+    l1_pgentry_t *l1tab;
+
+    if ( !d->arch.perdomain_l2_pg[ARG_XLAT_SLOT] )
+    {
+        l3_pgentry_t *l3tab;
+
+        pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
+        if ( !pg )
+            return -ENOMEM;
+        clear_domain_page(page_to_mfn(pg));
+        d->arch.perdomain_l2_pg[ARG_XLAT_SLOT] = pg;
+
+        l3tab = __map_domain_page(d->arch.perdomain_l3_pg);
+        l3tab[l3_table_offset(ARG_XLAT_VIRT_START)] =
+            l3e_from_page(pg, __PAGE_HYPERVISOR);
+        unmap_domain_page(l3tab);
+    }
+
+    BUILD_BUG_ON((MAX_VIRT_CPUS << ARG_XLAT_VA_SHIFT) >
+                 (PERDOMAIN_SLOT_MBYTES << 20));
+
+    if ( !d->arch.arg_xlat_l1_pg )
+        d->arch.arg_xlat_l1_pg = xzalloc_array(struct page_info *,
+                                               PFN_UP(MAX_VIRT_CPUS <<
+                                                      ARG_XLAT_VA_SHIFT));
+    if ( !d->arch.arg_xlat_l1_pg )
+        return -ENOMEM;
+
+    idx = l2_table_offset(ARG_XLAT_START(v));
+    if ( !d->arch.arg_xlat_l1_pg[idx] )
+    {
+        l2_pgentry_t *l2tab;
+
+        pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
+        if ( !pg )
+            return -ENOMEM;
+        clear_domain_page(page_to_mfn(pg));
+        d->arch.arg_xlat_l1_pg[idx] = pg;
+
+        l2tab = __map_domain_page(d->arch.perdomain_l2_pg[ARG_XLAT_SLOT]);
+        l2tab[idx] = l2e_from_page(pg, __PAGE_HYPERVISOR);
+        unmap_domain_page(l2tab);
+    }
 
-    v->arch.compat_arg_xlat = alloc_xenheap_pages(order,
-                                                  MEMF_node(vcpu_to_node(v)));
+    l1tab = __map_domain_page(d->arch.arg_xlat_l1_pg[idx]);
+    idx = l1_table_offset(ARG_XLAT_START(v));
+    BUILD_BUG_ON(COMPAT_ARG_XLAT_SIZE > (1UL << ARG_XLAT_VA_SHIFT));
+    for ( i = 0; i < PFN_UP(COMPAT_ARG_XLAT_SIZE); ++i )
+    {
+        pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
+        if ( !pg )
+            return -ENOMEM;
+        l1tab[idx + i] = l1e_from_page(pg, __PAGE_HYPERVISOR);
+    }
+    unmap_domain_page(l1tab);
 
-    return v->arch.compat_arg_xlat ? 0 : -ENOMEM;
+    return 0;
 }
 
 void free_compat_arg_xlat(struct vcpu *v)
 {
-    unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE);
+    struct domain *d = v->domain;
+    unsigned long start = ARG_XLAT_START(v);
+    unsigned long va = start + COMPAT_ARG_XLAT_SIZE - 1;
+    l1_pgentry_t *l1tab =
+        __map_domain_page(d->arch.arg_xlat_l1_pg[l2_table_offset(start)]);
+
+    do {
+        l1_pgentry_t l1e = l1tab[l1_table_offset(va)];
+
+        if ( l1e_get_flags(l1e) & _PAGE_PRESENT )
+        {
+            struct page_info *pg = l1e_get_page(l1e);
+
+            l1tab[l1_table_offset(va)] = l1e_empty();
+            flush_tlb_one_mask(d->domain_dirty_cpumask, va);
+            free_domheap_page(pg);
+        }
+        va -= PAGE_SIZE;
+    } while ( va >= start );
 
-    free_xenheap_pages(v->arch.compat_arg_xlat, order);
-    v->arch.compat_arg_xlat = NULL;
+    unmap_domain_page(l1tab);
 }
 
 void cleanup_frame_table(struct mem_hotadd_info *info)
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -212,7 +212,7 @@ extern unsigned char boot_edid_info[128]
 /* Slot 260: per-domain mappings (including map cache). */
 #define PERDOMAIN_VIRT_START    (PML4_ADDR(260))
 #define PERDOMAIN_SLOT_MBYTES   (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER))
-#define PERDOMAIN_SLOTS         2
+#define PERDOMAIN_SLOTS         3
 #define PERDOMAIN_VIRT_SLOT(s)  (PERDOMAIN_VIRT_START + (s) * \
                                  (PERDOMAIN_SLOT_MBYTES << 20))
 /* Slot 261: machine-to-phys conversion table (256GB). */
@@ -304,6 +304,14 @@ extern unsigned long xen_phys_start;
 #define LDT_VIRT_START(v)    \
     (GDT_VIRT_START(v) + (64*1024))
 
+/* Argument translation area. The 2nd to last per-domain-mapping sub-area. */
+#define ARG_XLAT_SLOT            (PERDOMAIN_SLOTS - 2)
+/* Allow for at least one guard page (COMPAT_ARG_XLAT_SIZE being 2 pages): */
+#define ARG_XLAT_VA_SHIFT        (2 + PAGE_SHIFT)
+#define ARG_XLAT_VIRT_START      PERDOMAIN_VIRT_SLOT(ARG_XLAT_SLOT)
+#define ARG_XLAT_START(v)        \
+    (ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT))
+
 /* map_domain_page() map cache. The last per-domain-mapping sub-area. */
 #define MAPCACHE_VCPU_ENTRIES    (CONFIG_PAGING_LEVELS * CONFIG_PAGING_LEVELS)
 #define MAPCACHE_ENTRIES         (MAX_VIRT_CPUS * MAPCACHE_VCPU_ENTRIES)
@@ -313,7 +321,7 @@ extern unsigned long xen_phys_start;
                                   MAPCACHE_ENTRIES * PAGE_SIZE)
 
 #define PDPT_L1_ENTRIES       \
-    ((PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS - 1) - PERDOMAIN_VIRT_START) >> PAGE_SHIFT)
+    ((PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS - 2) - PERDOMAIN_VIRT_START) >> PAGE_SHIFT)
 #define PDPT_L2_ENTRIES       \
     ((PDPT_L1_ENTRIES + (1 << PAGETABLE_ORDER) - 1) >> PAGETABLE_ORDER)
 
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -244,6 +244,7 @@ struct arch_domain
     void **perdomain_pts;
     struct page_info *perdomain_l2_pg[PERDOMAIN_SLOTS];
     struct page_info *perdomain_l3_pg;
+    struct page_info **arg_xlat_l1_pg;
 
     unsigned int hv_compat_vstart;
 
@@ -448,9 +449,6 @@ struct arch_vcpu
 
     /* A secondary copy of the vcpu time info. */
     XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;
-
-    void *compat_arg_xlat;
-
 } __cacheline_aligned;
 
 /* Shorthands to improve code legibility. */
--- a/xen/include/asm-x86/x86_64/uaccess.h
+++ b/xen/include/asm-x86/x86_64/uaccess.h
@@ -1,16 +1,15 @@
 #ifndef __X86_64_UACCESS_H
 #define __X86_64_UACCESS_H
 
-#define COMPAT_ARG_XLAT_VIRT_BASE compat_arg_xlat_virt_base()
+#define COMPAT_ARG_XLAT_VIRT_BASE ((void *)ARG_XLAT_START(current))
 #define COMPAT_ARG_XLAT_SIZE      (2*PAGE_SIZE)
 struct vcpu;
-void *compat_arg_xlat_virt_base(void);
 int setup_compat_arg_xlat(struct vcpu *v);
 void free_compat_arg_xlat(struct vcpu *v);
 #define is_compat_arg_xlat_range(addr, size) ({                               \
     unsigned long __off;                                                      \
     __off = (unsigned long)(addr) - (unsigned long)COMPAT_ARG_XLAT_VIRT_BASE; \
-    (__off <= COMPAT_ARG_XLAT_SIZE) &&                                        \
+    (__off < COMPAT_ARG_XLAT_SIZE) &&                                         \
     ((__off + (unsigned long)(size)) <= COMPAT_ARG_XLAT_SIZE);                \
 })
 



[-- Attachment #2: x86-map-arg-xlat-area.patch --]
[-- Type: text/plain, Size: 8358 bytes --]

x86: rework hypercall argument translation area setup

Rather than using an order-1 Xen heap allocation, use (currently 2)
individual domain heap pages to populate space in the per-domain
mapping area.

Also fix an off-by-one mistake in is_compat_arg_xlat_range().

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -642,6 +642,14 @@ void arch_domain_destroy(struct domain *
             free_domheap_page(d->arch.perdomain_l2_pg[i]);
     free_domheap_page(d->arch.perdomain_l3_pg);
 
+    if ( d->arch.arg_xlat_l1_pg )
+    {
+        for ( i = 0; i < PFN_UP(MAX_VIRT_CPUS << ARG_XLAT_VA_SHIFT); ++i )
+            if ( d->arch.arg_xlat_l1_pg[i] )
+                free_domheap_page(d->arch.arg_xlat_l1_pg[i]);
+        xfree(d->arch.arg_xlat_l1_pg);
+    }
+
     free_xenheap_page(d->shared_info);
     cleanup_domain_irq_mapping(d);
 }
--- a/xen/arch/x86/usercopy.c
+++ b/xen/arch/x86/usercopy.c
@@ -6,8 +6,8 @@
  * Copyright 2002 Andi Kleen <ak@suse.de>
  */
 
-#include <xen/config.h>
 #include <xen/lib.h>
+#include <xen/sched.h>
 #include <asm/uaccess.h>
 
 unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned n)
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -832,27 +832,93 @@ void __init zap_low_mappings(void)
                      __PAGE_HYPERVISOR);
 }
 
-void *compat_arg_xlat_virt_base(void)
-{
-    return current->arch.compat_arg_xlat;
-}
-
 int setup_compat_arg_xlat(struct vcpu *v)
 {
-    unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE);
+    struct domain *d = v->domain;
+    struct page_info *pg;
+    unsigned int idx, i;
+    l1_pgentry_t *l1tab;
+
+    if ( !d->arch.perdomain_l2_pg[ARG_XLAT_SLOT] )
+    {
+        l3_pgentry_t *l3tab;
+
+        pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
+        if ( !pg )
+            return -ENOMEM;
+        clear_domain_page(page_to_mfn(pg));
+        d->arch.perdomain_l2_pg[ARG_XLAT_SLOT] = pg;
+
+        l3tab = __map_domain_page(d->arch.perdomain_l3_pg);
+        l3tab[l3_table_offset(ARG_XLAT_VIRT_START)] =
+            l3e_from_page(pg, __PAGE_HYPERVISOR);
+        unmap_domain_page(l3tab);
+    }
+
+    BUILD_BUG_ON((MAX_VIRT_CPUS << ARG_XLAT_VA_SHIFT) >
+                 (PERDOMAIN_SLOT_MBYTES << 20));
+
+    if ( !d->arch.arg_xlat_l1_pg )
+        d->arch.arg_xlat_l1_pg = xzalloc_array(struct page_info *,
+                                               PFN_UP(MAX_VIRT_CPUS <<
+                                                      ARG_XLAT_VA_SHIFT));
+    if ( !d->arch.arg_xlat_l1_pg )
+        return -ENOMEM;
+
+    idx = l2_table_offset(ARG_XLAT_START(v));
+    if ( !d->arch.arg_xlat_l1_pg[idx] )
+    {
+        l2_pgentry_t *l2tab;
+
+        pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
+        if ( !pg )
+            return -ENOMEM;
+        clear_domain_page(page_to_mfn(pg));
+        d->arch.arg_xlat_l1_pg[idx] = pg;
+
+        l2tab = __map_domain_page(d->arch.perdomain_l2_pg[ARG_XLAT_SLOT]);
+        l2tab[idx] = l2e_from_page(pg, __PAGE_HYPERVISOR);
+        unmap_domain_page(l2tab);
+    }
 
-    v->arch.compat_arg_xlat = alloc_xenheap_pages(order,
-                                                  MEMF_node(vcpu_to_node(v)));
+    l1tab = __map_domain_page(d->arch.arg_xlat_l1_pg[idx]);
+    idx = l1_table_offset(ARG_XLAT_START(v));
+    BUILD_BUG_ON(COMPAT_ARG_XLAT_SIZE > (1UL << ARG_XLAT_VA_SHIFT));
+    for ( i = 0; i < PFN_UP(COMPAT_ARG_XLAT_SIZE); ++i )
+    {
+        pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
+        if ( !pg )
+            return -ENOMEM;
+        l1tab[idx + i] = l1e_from_page(pg, __PAGE_HYPERVISOR);
+    }
+    unmap_domain_page(l1tab);
 
-    return v->arch.compat_arg_xlat ? 0 : -ENOMEM;
+    return 0;
 }
 
 void free_compat_arg_xlat(struct vcpu *v)
 {
-    unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE);
+    struct domain *d = v->domain;
+    unsigned long start = ARG_XLAT_START(v);
+    unsigned long va = start + COMPAT_ARG_XLAT_SIZE - 1;
+    l1_pgentry_t *l1tab =
+        __map_domain_page(d->arch.arg_xlat_l1_pg[l2_table_offset(start)]);
+
+    do {
+        l1_pgentry_t l1e = l1tab[l1_table_offset(va)];
+
+        if ( l1e_get_flags(l1e) & _PAGE_PRESENT )
+        {
+            struct page_info *pg = l1e_get_page(l1e);
+
+            l1tab[l1_table_offset(va)] = l1e_empty();
+            flush_tlb_one_mask(d->domain_dirty_cpumask, va);
+            free_domheap_page(pg);
+        }
+        va -= PAGE_SIZE;
+    } while ( va >= start );
 
-    free_xenheap_pages(v->arch.compat_arg_xlat, order);
-    v->arch.compat_arg_xlat = NULL;
+    unmap_domain_page(l1tab);
 }
 
 void cleanup_frame_table(struct mem_hotadd_info *info)
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -212,7 +212,7 @@ extern unsigned char boot_edid_info[128]
 /* Slot 260: per-domain mappings (including map cache). */
 #define PERDOMAIN_VIRT_START    (PML4_ADDR(260))
 #define PERDOMAIN_SLOT_MBYTES   (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER))
-#define PERDOMAIN_SLOTS         2
+#define PERDOMAIN_SLOTS         3
 #define PERDOMAIN_VIRT_SLOT(s)  (PERDOMAIN_VIRT_START + (s) * \
                                  (PERDOMAIN_SLOT_MBYTES << 20))
 /* Slot 261: machine-to-phys conversion table (256GB). */
@@ -304,6 +304,14 @@ extern unsigned long xen_phys_start;
 #define LDT_VIRT_START(v)    \
     (GDT_VIRT_START(v) + (64*1024))
 
+/* Argument translation area. The 2nd to last per-domain-mapping sub-area. */
+#define ARG_XLAT_SLOT            (PERDOMAIN_SLOTS - 2)
+/* Allow for at least one guard page (COMPAT_ARG_XLAT_SIZE being 2 pages): */
+#define ARG_XLAT_VA_SHIFT        (2 + PAGE_SHIFT)
+#define ARG_XLAT_VIRT_START      PERDOMAIN_VIRT_SLOT(ARG_XLAT_SLOT)
+#define ARG_XLAT_START(v)        \
+    (ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT))
+
 /* map_domain_page() map cache. The last per-domain-mapping sub-area. */
 #define MAPCACHE_VCPU_ENTRIES    (CONFIG_PAGING_LEVELS * CONFIG_PAGING_LEVELS)
 #define MAPCACHE_ENTRIES         (MAX_VIRT_CPUS * MAPCACHE_VCPU_ENTRIES)
@@ -313,7 +321,7 @@ extern unsigned long xen_phys_start;
                                   MAPCACHE_ENTRIES * PAGE_SIZE)
 
 #define PDPT_L1_ENTRIES       \
-    ((PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS - 1) - PERDOMAIN_VIRT_START) >> PAGE_SHIFT)
+    ((PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS - 2) - PERDOMAIN_VIRT_START) >> PAGE_SHIFT)
 #define PDPT_L2_ENTRIES       \
     ((PDPT_L1_ENTRIES + (1 << PAGETABLE_ORDER) - 1) >> PAGETABLE_ORDER)
 
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -244,6 +244,7 @@ struct arch_domain
     void **perdomain_pts;
     struct page_info *perdomain_l2_pg[PERDOMAIN_SLOTS];
     struct page_info *perdomain_l3_pg;
+    struct page_info **arg_xlat_l1_pg;
 
     unsigned int hv_compat_vstart;
 
@@ -448,9 +449,6 @@ struct arch_vcpu
 
     /* A secondary copy of the vcpu time info. */
     XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;
-
-    void *compat_arg_xlat;
-
 } __cacheline_aligned;
 
 /* Shorthands to improve code legibility. */
--- a/xen/include/asm-x86/x86_64/uaccess.h
+++ b/xen/include/asm-x86/x86_64/uaccess.h
@@ -1,16 +1,15 @@
 #ifndef __X86_64_UACCESS_H
 #define __X86_64_UACCESS_H
 
-#define COMPAT_ARG_XLAT_VIRT_BASE compat_arg_xlat_virt_base()
+#define COMPAT_ARG_XLAT_VIRT_BASE ((void *)ARG_XLAT_START(current))
 #define COMPAT_ARG_XLAT_SIZE      (2*PAGE_SIZE)
 struct vcpu;
-void *compat_arg_xlat_virt_base(void);
 int setup_compat_arg_xlat(struct vcpu *v);
 void free_compat_arg_xlat(struct vcpu *v);
 #define is_compat_arg_xlat_range(addr, size) ({                               \
     unsigned long __off;                                                      \
     __off = (unsigned long)(addr) - (unsigned long)COMPAT_ARG_XLAT_VIRT_BASE; \
-    (__off <= COMPAT_ARG_XLAT_SIZE) &&                                        \
+    (__off < COMPAT_ARG_XLAT_SIZE) &&                                         \
     ((__off + (unsigned long)(size)) <= COMPAT_ARG_XLAT_SIZE);                \
 })
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] x86: rework hypercall argument translation area setup
  2013-02-21 11:42 [PATCH] x86: rework hypercall argument translation area setup Jan Beulich
@ 2013-02-21 12:08 ` Keir Fraser
  2013-02-21 12:26   ` Jan Beulich
  0 siblings, 1 reply; 6+ messages in thread
From: Keir Fraser @ 2013-02-21 12:08 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 21/02/2013 11:42, "Jan Beulich" <JBeulich@suse.com> wrote:

> Rather than using an order-1 Xen heap allocation, use (currently 2)
> individual domain heap pages to populate space in the per-domain
> mapping area.
> 
> Also fix an off-by-one mistake in is_compat_arg_xlat_range().
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Well, fine in principle. This manual setup/teardown of l2/l1 pagetables in
the perdomain area is getting a bit tiresome though isn't it? Isn't there
some setup helper that could be abstracted out for all users, and perhaps
even some automated teardown-and-freeing on domain destroy? I just sighed
when I saw more skanky pagetable manipulation for what is a conceptually
simple change.

 -- Keir

> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -642,6 +642,14 @@ void arch_domain_destroy(struct domain *
>              free_domheap_page(d->arch.perdomain_l2_pg[i]);
>      free_domheap_page(d->arch.perdomain_l3_pg);
>  
> +    if ( d->arch.arg_xlat_l1_pg )
> +    {
> +        for ( i = 0; i < PFN_UP(MAX_VIRT_CPUS << ARG_XLAT_VA_SHIFT); ++i )
> +            if ( d->arch.arg_xlat_l1_pg[i] )
> +                free_domheap_page(d->arch.arg_xlat_l1_pg[i]);
> +        xfree(d->arch.arg_xlat_l1_pg);
> +    }
> +
>      free_xenheap_page(d->shared_info);
>      cleanup_domain_irq_mapping(d);
>  }
> --- a/xen/arch/x86/usercopy.c
> +++ b/xen/arch/x86/usercopy.c
> @@ -6,8 +6,8 @@
>   * Copyright 2002 Andi Kleen <ak@suse.de>
>   */
>  
> -#include <xen/config.h>
>  #include <xen/lib.h>
> +#include <xen/sched.h>
>  #include <asm/uaccess.h>
>  
>  unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned
> n)
> --- a/xen/arch/x86/x86_64/mm.c
> +++ b/xen/arch/x86/x86_64/mm.c
> @@ -832,27 +832,93 @@ void __init zap_low_mappings(void)
>                       __PAGE_HYPERVISOR);
>  }
>  
> -void *compat_arg_xlat_virt_base(void)
> -{
> -    return current->arch.compat_arg_xlat;
> -}
> -
>  int setup_compat_arg_xlat(struct vcpu *v)
>  {
> -    unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE);
> +    struct domain *d = v->domain;
> +    struct page_info *pg;
> +    unsigned int idx, i;
> +    l1_pgentry_t *l1tab;
> +
> +    if ( !d->arch.perdomain_l2_pg[ARG_XLAT_SLOT] )
> +    {
> +        l3_pgentry_t *l3tab;
> +
> +        pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
> +        if ( !pg )
> +            return -ENOMEM;
> +        clear_domain_page(page_to_mfn(pg));
> +        d->arch.perdomain_l2_pg[ARG_XLAT_SLOT] = pg;
> +
> +        l3tab = __map_domain_page(d->arch.perdomain_l3_pg);
> +        l3tab[l3_table_offset(ARG_XLAT_VIRT_START)] =
> +            l3e_from_page(pg, __PAGE_HYPERVISOR);
> +        unmap_domain_page(l3tab);
> +    }
> +
> +    BUILD_BUG_ON((MAX_VIRT_CPUS << ARG_XLAT_VA_SHIFT) >
> +                 (PERDOMAIN_SLOT_MBYTES << 20));
> +
> +    if ( !d->arch.arg_xlat_l1_pg )
> +        d->arch.arg_xlat_l1_pg = xzalloc_array(struct page_info *,
> +                                               PFN_UP(MAX_VIRT_CPUS <<
> +                                                      ARG_XLAT_VA_SHIFT));
> +    if ( !d->arch.arg_xlat_l1_pg )
> +        return -ENOMEM;
> +
> +    idx = l2_table_offset(ARG_XLAT_START(v));
> +    if ( !d->arch.arg_xlat_l1_pg[idx] )
> +    {
> +        l2_pgentry_t *l2tab;
> +
> +        pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
> +        if ( !pg )
> +            return -ENOMEM;
> +        clear_domain_page(page_to_mfn(pg));
> +        d->arch.arg_xlat_l1_pg[idx] = pg;
> +
> +        l2tab = __map_domain_page(d->arch.perdomain_l2_pg[ARG_XLAT_SLOT]);
> +        l2tab[idx] = l2e_from_page(pg, __PAGE_HYPERVISOR);
> +        unmap_domain_page(l2tab);
> +    }
>  
> -    v->arch.compat_arg_xlat = alloc_xenheap_pages(order,
> -                
> MEMF_node(vcpu_to_node(v)));
> +    l1tab = __map_domain_page(d->arch.arg_xlat_l1_pg[idx]);
> +    idx = l1_table_offset(ARG_XLAT_START(v));
> +    BUILD_BUG_ON(COMPAT_ARG_XLAT_SIZE > (1UL << ARG_XLAT_VA_SHIFT));
> +    for ( i = 0; i < PFN_UP(COMPAT_ARG_XLAT_SIZE); ++i )
> +    {
> +        pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
> +        if ( !pg )
> +            return -ENOMEM;
> +        l1tab[idx + i] = l1e_from_page(pg, __PAGE_HYPERVISOR);
> +    }
> +    unmap_domain_page(l1tab);
>  
> -    return v->arch.compat_arg_xlat ? 0 : -ENOMEM;
> +    return 0;
>  }
>  
>  void free_compat_arg_xlat(struct vcpu *v)
>  {
> -    unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE);
> +    struct domain *d = v->domain;
> +    unsigned long start = ARG_XLAT_START(v);
> +    unsigned long va = start + COMPAT_ARG_XLAT_SIZE - 1;
> +    l1_pgentry_t *l1tab =
> +        __map_domain_page(d->arch.arg_xlat_l1_pg[l2_table_offset(start)]);
> +
> +    do {
> +        l1_pgentry_t l1e = l1tab[l1_table_offset(va)];
> +
> +        if ( l1e_get_flags(l1e) & _PAGE_PRESENT )
> +        {
> +            struct page_info *pg = l1e_get_page(l1e);
> +
> +            l1tab[l1_table_offset(va)] = l1e_empty();
> +            flush_tlb_one_mask(d->domain_dirty_cpumask, va);
> +            free_domheap_page(pg);
> +        }
> +        va -= PAGE_SIZE;
> +    } while ( va >= start );
>  
> -    free_xenheap_pages(v->arch.compat_arg_xlat, order);
> -    v->arch.compat_arg_xlat = NULL;
> +    unmap_domain_page(l1tab);
>  }
>  
>  void cleanup_frame_table(struct mem_hotadd_info *info)
> --- a/xen/include/asm-x86/config.h
> +++ b/xen/include/asm-x86/config.h
> @@ -212,7 +212,7 @@ extern unsigned char boot_edid_info[128]
>  /* Slot 260: per-domain mappings (including map cache). */
>  #define PERDOMAIN_VIRT_START    (PML4_ADDR(260))
>  #define PERDOMAIN_SLOT_MBYTES   (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER))
> -#define PERDOMAIN_SLOTS         2
> +#define PERDOMAIN_SLOTS         3
>  #define PERDOMAIN_VIRT_SLOT(s)  (PERDOMAIN_VIRT_START + (s) * \
>                                   (PERDOMAIN_SLOT_MBYTES << 20))
>  /* Slot 261: machine-to-phys conversion table (256GB). */
> @@ -304,6 +304,14 @@ extern unsigned long xen_phys_start;
>  #define LDT_VIRT_START(v)    \
>      (GDT_VIRT_START(v) + (64*1024))
>  
> +/* Argument translation area. The 2nd to last per-domain-mapping sub-area. */
> +#define ARG_XLAT_SLOT            (PERDOMAIN_SLOTS - 2)
> +/* Allow for at least one guard page (COMPAT_ARG_XLAT_SIZE being 2 pages): */
> +#define ARG_XLAT_VA_SHIFT        (2 + PAGE_SHIFT)
> +#define ARG_XLAT_VIRT_START      PERDOMAIN_VIRT_SLOT(ARG_XLAT_SLOT)
> +#define ARG_XLAT_START(v)        \
> +    (ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT))
> +
>  /* map_domain_page() map cache. The last per-domain-mapping sub-area. */
>  #define MAPCACHE_VCPU_ENTRIES    (CONFIG_PAGING_LEVELS *
> CONFIG_PAGING_LEVELS)
>  #define MAPCACHE_ENTRIES         (MAX_VIRT_CPUS * MAPCACHE_VCPU_ENTRIES)
> @@ -313,7 +321,7 @@ extern unsigned long xen_phys_start;
>                                    MAPCACHE_ENTRIES * PAGE_SIZE)
>  
>  #define PDPT_L1_ENTRIES       \
> -    ((PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS - 1) - PERDOMAIN_VIRT_START) >>
> PAGE_SHIFT)
> +    ((PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS - 2) - PERDOMAIN_VIRT_START) >>
> PAGE_SHIFT)
>  #define PDPT_L2_ENTRIES       \
>      ((PDPT_L1_ENTRIES + (1 << PAGETABLE_ORDER) - 1) >> PAGETABLE_ORDER)
>  
> --- a/xen/include/asm-x86/domain.h
> +++ b/xen/include/asm-x86/domain.h
> @@ -244,6 +244,7 @@ struct arch_domain
>      void **perdomain_pts;
>      struct page_info *perdomain_l2_pg[PERDOMAIN_SLOTS];
>      struct page_info *perdomain_l3_pg;
> +    struct page_info **arg_xlat_l1_pg;
>  
>      unsigned int hv_compat_vstart;
>  
> @@ -448,9 +449,6 @@ struct arch_vcpu
>  
>      /* A secondary copy of the vcpu time info. */
>      XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;
> -
> -    void *compat_arg_xlat;
> -
>  } __cacheline_aligned;
>  
>  /* Shorthands to improve code legibility. */
> --- a/xen/include/asm-x86/x86_64/uaccess.h
> +++ b/xen/include/asm-x86/x86_64/uaccess.h
> @@ -1,16 +1,15 @@
>  #ifndef __X86_64_UACCESS_H
>  #define __X86_64_UACCESS_H
>  
> -#define COMPAT_ARG_XLAT_VIRT_BASE compat_arg_xlat_virt_base()
> +#define COMPAT_ARG_XLAT_VIRT_BASE ((void *)ARG_XLAT_START(current))
>  #define COMPAT_ARG_XLAT_SIZE      (2*PAGE_SIZE)
>  struct vcpu;
> -void *compat_arg_xlat_virt_base(void);
>  int setup_compat_arg_xlat(struct vcpu *v);
>  void free_compat_arg_xlat(struct vcpu *v);
>  #define is_compat_arg_xlat_range(addr, size) ({
> \
>      unsigned long __off;
> \
>      __off = (unsigned long)(addr) - (unsigned long)COMPAT_ARG_XLAT_VIRT_BASE;
> \
> -    (__off <= COMPAT_ARG_XLAT_SIZE) &&
> \
> +    (__off < COMPAT_ARG_XLAT_SIZE) &&
> \
>      ((__off + (unsigned long)(size)) <= COMPAT_ARG_XLAT_SIZE);
> \
>  })
>  
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] x86: rework hypercall argument translation area setup
  2013-02-21 12:08 ` Keir Fraser
@ 2013-02-21 12:26   ` Jan Beulich
  2013-02-21 12:57     ` Keir Fraser
  0 siblings, 1 reply; 6+ messages in thread
From: Jan Beulich @ 2013-02-21 12:26 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

>>> On 21.02.13 at 13:08, Keir Fraser <keir@xen.org> wrote:
> On 21/02/2013 11:42, "Jan Beulich" <JBeulich@suse.com> wrote:
> 
>> Rather than using an order-1 Xen heap allocation, use (currently 2)
>> individual domain heap pages to populate space in the per-domain
>> mapping area.
>> 
>> Also fix an off-by-one mistake in is_compat_arg_xlat_range().
>> 
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> Well, fine in principle. This manual setup/teardown of l2/l1 pagetables in
> the perdomain area is getting a bit tiresome though isn't it? Isn't there
> some setup helper that could be abstracted out for all users, and perhaps
> even some automated teardown-and-freeing on domain destroy? I just sighed
> when I saw more skanky pagetable manipulation for what is a conceptually
> simple change.

I tried to spot pieces worth folding into helper functions, but
didn't really find anything worthwhile (the purposes of the
mappings are just too different). Perhaps we could have a
map_pages_to_guest(), taking care of all intermediate page
table levels? I can't see much else, and the main problem is that
of tracking pages used for page tables here anyway.

Are we concerned about the performance of guest GDT/LDT page
(un)mappings?

Jan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] x86: rework hypercall argument translation area setup
  2013-02-21 12:26   ` Jan Beulich
@ 2013-02-21 12:57     ` Keir Fraser
  2013-02-21 13:05       ` Jan Beulich
  0 siblings, 1 reply; 6+ messages in thread
From: Keir Fraser @ 2013-02-21 12:57 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 21/02/2013 12:26, "Jan Beulich" <JBeulich@suse.com> wrote:

>> Well, fine in principle. This manual setup/teardown of l2/l1 pagetables in
>> the perdomain area is getting a bit tiresome though isn't it? Isn't there
>> some setup helper that could be abstracted out for all users, and perhaps
>> even some automated teardown-and-freeing on domain destroy? I just sighed
>> when I saw more skanky pagetable manipulation for what is a conceptually
>> simple change.
> 
> I tried to spot pieces worth folding into helper functions, but
> didn't really find anything worthwhile (the purposes of the
> mappings are just too different). Perhaps we could have a
> map_pages_to_guest(), taking care of all intermediate page
> table levels? I can't see much else, and the main problem is that
> of tracking pages used for page tables here anyway.

Well, possibly even that might be an improvement.

> Are we concerned about the performance of guest GDT/LDT page
> (un)mappings?

LDT must often change on guest process context switch? So I would think yes
we do.

 -- Keir

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] x86: rework hypercall argument translation area setup
  2013-02-21 12:57     ` Keir Fraser
@ 2013-02-21 13:05       ` Jan Beulich
  2013-02-21 13:12         ` Keir Fraser
  0 siblings, 1 reply; 6+ messages in thread
From: Jan Beulich @ 2013-02-21 13:05 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

>>> On 21.02.13 at 13:57, Keir Fraser <keir.xen@gmail.com> wrote:
> On 21/02/2013 12:26, "Jan Beulich" <JBeulich@suse.com> wrote:
>> Are we concerned about the performance of guest GDT/LDT page
>> (un)mappings?
> 
> LDT must often change on guest process context switch? So I would think yes
> we do.

Linux doesn't normally use an LDT (i.e. only exotic processes
would have one of non-zero size).

Jan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] x86: rework hypercall argument translation area setup
  2013-02-21 13:05       ` Jan Beulich
@ 2013-02-21 13:12         ` Keir Fraser
  0 siblings, 0 replies; 6+ messages in thread
From: Keir Fraser @ 2013-02-21 13:12 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 21/02/2013 13:05, "Jan Beulich" <JBeulich@suse.com> wrote:

>>>> On 21.02.13 at 13:57, Keir Fraser <keir.xen@gmail.com> wrote:
>> On 21/02/2013 12:26, "Jan Beulich" <JBeulich@suse.com> wrote:
>>> Are we concerned about the performance of guest GDT/LDT page
>>> (un)mappings?
>> 
>> LDT must often change on guest process context switch? So I would think yes
>> we do.
> 
> Linux doesn't normally use an LDT (i.e. only exotic processes
> would have one of non-zero size).

That being the case, we probably don't care so much.

 -- Keir

> Jan
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2013-02-21 13:12 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-02-21 11:42 [PATCH] x86: rework hypercall argument translation area setup Jan Beulich
2013-02-21 12:08 ` Keir Fraser
2013-02-21 12:26   ` Jan Beulich
2013-02-21 12:57     ` Keir Fraser
2013-02-21 13:05       ` Jan Beulich
2013-02-21 13:12         ` Keir Fraser

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.