xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86/VMX: sanitize VM86 TSS handling
@ 2017-02-13 13:19 Jan Beulich
  2017-02-13 13:37 ` Jan Beulich
                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Jan Beulich @ 2017-02-13 13:19 UTC (permalink / raw)
  To: xen-devel
  Cc: Kevin Tian, Stefano Stabellini, Wei Liu, George Dunlap,
	Andrew Cooper, Ian Jackson, Tim Deegan, Jun Nakajima,
	Roger Pau Monne

[-- Attachment #1: Type: text/plain, Size: 8692 bytes --]

The present way of setting this up is flawed: Leaving the I/O bitmap
pointer at zero means that the interrupt redirection bitmap lives
outside (ahead of) the allocated space of the TSS. Similarly setting a
TSS limit of 255 when only 128 bytes get allocated means that 128 extra
bytes may be accessed by the CPU during I/O port access processing.

Introduce a new HVM param to set the allocated size of the TSS, and
have the hypervisor actually take care of setting namely the I/O bitmap
pointer. Both this and the segment limit now take the allocated size
into account.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
TBD: Do we really want to re-init the TSS every time we are about to
     use it? This can happen quite often during boot, especially while
     grub is running.

--- a/tools/firmware/hvmloader/hvmloader.c
+++ b/tools/firmware/hvmloader/hvmloader.c
@@ -177,18 +177,30 @@ static void cmos_write_memory_size(void)
 }
 
 /*
- * Set up an empty TSS area for virtual 8086 mode to use. 
- * The only important thing is that it musn't have any bits set 
- * in the interrupt redirection bitmap, so all zeros will do.
+ * Set up an empty TSS area for virtual 8086 mode to use. Its content is
+ * going to be managed by Xen, but zero fill it just in case.
  */
 static void init_vm86_tss(void)
 {
+/*
+ * Have the TSS cover the ISA port range, which makes it
+ * - 104 bytes base structure
+ * - 32 bytes interrupt redirection bitmap
+ * - 128 bytes I/O bitmap
+ * - one trailing byte
+ * or a total of to 265 bytes. As it needs to be be a power of two for
+ * now (or else the alignment parameter to mem_alloc() needs adjustment),
+ * this ends up requiring 512 bytes.
+ */
+#define TSS_SIZE 512
     void *tss;
 
-    tss = mem_alloc(128, 128);
-    memset(tss, 0, 128);
+    tss = mem_alloc(TSS_SIZE, TSS_SIZE);
+    memset(tss, 0, TSS_SIZE);
     hvm_param_set(HVM_PARAM_VM86_TSS, virt_to_phys(tss));
+    hvm_param_set(HVM_PARAM_VM86_TSS_SIZE, TSS_SIZE);
     printf("vm86 TSS at %08lx\n", virt_to_phys(tss));
+#undef TSS_SIZE
 }
 
 static void apic_setup(void)
--- a/tools/libxc/xc_sr_save_x86_hvm.c
+++ b/tools/libxc/xc_sr_save_x86_hvm.c
@@ -68,6 +68,7 @@ static int write_hvm_params(struct xc_sr
         HVM_PARAM_MONITOR_RING_PFN,
         HVM_PARAM_SHARING_RING_PFN,
         HVM_PARAM_VM86_TSS,
+        HVM_PARAM_VM86_TSS_SIZE,
         HVM_PARAM_CONSOLE_PFN,
         HVM_PARAM_ACPI_IOPORTS_LOCATION,
         HVM_PARAM_VIRIDIAN,
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2850,6 +2850,43 @@ static int hvm_load_segment_selector(
     return 1;
 }
 
+struct tss32 {
+    uint16_t back_link, :16;
+    uint32_t esp0;
+    uint16_t ss0, :16;
+    uint32_t esp1;
+    uint16_t ss1, :16;
+    uint32_t esp2;
+    uint16_t ss2, :16;
+    uint32_t cr3, eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi;
+    uint16_t es, :16, cs, :16, ss, :16, ds, :16, fs, :16, gs, :16, ldt, :16;
+    uint16_t trace /* :1 */, iomap;
+};
+
+void hvm_prepare_vm86_tss(struct vcpu *v, uint32_t base, uint32_t limit)
+{
+    /*
+     * If the provided area is large enough to cover at least the ISA port
+     * range, keep the bitmaps outside the base structure. For rather small
+     * areas (namely relevant for guests having been migrated from older
+     * Xen versions), maximize interrupt vector and port coverage by pointing
+     * the I/O bitmap at 0x20 (which puts the interrupt redirection bitmap
+     * right at zero), accepting accesses to port 0x235 (represented by bit 5
+     * of byte 0x46) to trigger #GP (which will simply result in the access
+     * being handled by the emulator via a slightly different path than it
+     * would be anyway). Be sure to include one extra byte at the end of the
+     * I/O bitmap (hence the missing "- 1" in the comparison is not an
+     * off-by-one mistake), which we deliberately don't fill with all ones.
+     */
+    uint16_t iomap = (limit >= sizeof(struct tss32) + (0x100 / 8) + (0x400 / 8)
+                      ? sizeof(struct tss32) : 0) + (0x100 / 8);
+
+    ASSERT(limit >= sizeof(struct tss32) - 1);
+    hvm_copy_to_guest_phys(base, NULL, limit + 1, v);
+    hvm_copy_to_guest_phys(base + offsetof(struct tss32, iomap),
+                           &iomap, sizeof(iomap), v);
+}
+
 void hvm_task_switch(
     uint16_t tss_sel, enum hvm_task_switch_reason taskswitch_reason,
     int32_t errcode)
@@ -2862,18 +2899,7 @@ void hvm_task_switch(
     unsigned int eflags;
     pagefault_info_t pfinfo;
     int exn_raised, rc;
-    struct {
-        u16 back_link,__blh;
-        u32 esp0;
-        u16 ss0, _0;
-        u32 esp1;
-        u16 ss1, _1;
-        u32 esp2;
-        u16 ss2, _2;
-        u32 cr3, eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi;
-        u16 es, _3, cs, _4, ss, _5, ds, _6, fs, _7, gs, _8, ldt, _9;
-        u16 trace, iomap;
-    } tss;
+    struct tss32 tss;
 
     hvm_get_segment_register(v, x86_seg_gdtr, &gdt);
     hvm_get_segment_register(v, x86_seg_tr, &prev_tr);
@@ -4276,6 +4302,7 @@ static int hvm_allow_set_param(struct do
     /* The following parameters can be set by the guest. */
     case HVM_PARAM_CALLBACK_IRQ:
     case HVM_PARAM_VM86_TSS:
+    case HVM_PARAM_VM86_TSS_SIZE:
     case HVM_PARAM_ACPI_IOPORTS_LOCATION:
     case HVM_PARAM_VM_GENERATION_ID_ADDR:
     case HVM_PARAM_STORE_EVTCHN:
@@ -4484,6 +4511,31 @@ static int hvmop_set_param(
         }
         d->arch.x87_fip_width = a.value;
         break;
+
+    case HVM_PARAM_VM86_TSS:
+        /* Hardware would silently truncate high bits. */
+        if ( a.value != (uint32_t)a.value )
+        {
+            if ( d == curr_d )
+                domain_crash(d);
+            rc = -EINVAL;
+        }
+        break;
+
+    case HVM_PARAM_VM86_TSS_SIZE:
+        if ( a.value < sizeof(struct tss32) )
+        {
+            if ( d == curr_d )
+                domain_crash(d);
+            rc = -EINVAL;
+        }
+        /*
+         * Cap at the theoretically useful maximum (base structure plus
+         * 256 bits interrupt redirection bitmap + 64k bits I/O bitmap).
+         */
+        else if ( a.value > sizeof(struct tss32) + (0x100 / 8) + (0x10000 / 8) )
+            a.value = sizeof(struct tss32) + (0x100 / 8) + (0x10000 / 8);
+        break;
     }
 
     if ( rc != 0 )
@@ -4513,6 +4565,7 @@ static int hvm_allow_get_param(struct do
     /* The following parameters can be read by the guest. */
     case HVM_PARAM_CALLBACK_IRQ:
     case HVM_PARAM_VM86_TSS:
+    case HVM_PARAM_VM86_TSS_SIZE:
     case HVM_PARAM_ACPI_IOPORTS_LOCATION:
     case HVM_PARAM_VM_GENERATION_ID_ADDR:
     case HVM_PARAM_STORE_PFN:
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1120,12 +1120,20 @@ static void vmx_set_segment_register(str
         
         if ( seg == x86_seg_tr ) 
         {
-            if ( v->domain->arch.hvm_domain.params[HVM_PARAM_VM86_TSS] )
+            const struct domain *d = v->domain;
+
+            if ( d->arch.hvm_domain.params[HVM_PARAM_VM86_TSS] )
             {
                 sel = 0;
                 attr = vm86_tr_attr;
-                limit = 0xff;
-                base = v->domain->arch.hvm_domain.params[HVM_PARAM_VM86_TSS];
+                /*
+                 * Old hvmloader binaries hardcode the size to 128 bytes,
+                 * without setting HVM_PARAM_VM86_TSS_SIZE.
+                 */
+                limit = (d->arch.hvm_domain.params[HVM_PARAM_VM86_TSS_SIZE]
+                         ?: 0x80) - 1;
+                base = d->arch.hvm_domain.params[HVM_PARAM_VM86_TSS];
+                hvm_prepare_vm86_tss(v, base, limit);
                 v->arch.hvm_vmx.vm86_segment_mask &= ~(1u << seg);
             }
             else
--- a/xen/include/asm-x86/hvm/support.h
+++ b/xen/include/asm-x86/hvm/support.h
@@ -111,6 +111,8 @@ int hvm_do_hypercall(struct cpu_user_reg
 void hvm_hlt(unsigned int eflags);
 void hvm_triple_fault(void);
 
+void hvm_prepare_vm86_tss(struct vcpu *v, uint32_t base, uint32_t limit);
+
 void hvm_rdtsc_intercept(struct cpu_user_regs *regs);
 
 int __must_check hvm_handle_xsetbv(u32 index, u64 new_bv);
--- a/xen/include/public/hvm/params.h
+++ b/xen/include/public/hvm/params.h
@@ -253,6 +253,9 @@
  */
 #define HVM_PARAM_X87_FIP_WIDTH 36
 
-#define HVM_NR_PARAMS 37
+/* Size of TSS used on Intel when CR0.PE=0. */
+#define HVM_PARAM_VM86_TSS_SIZE 37
+
+#define HVM_NR_PARAMS 38
 
 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */



[-- Attachment #2: x86-HVM-VM86-TSS.patch --]
[-- Type: text/plain, Size: 8727 bytes --]

x86/VMX: sanitize VM86 TSS handling

The present way of setting this up is flawed: Leaving the I/O bitmap
pointer at zero means that the interrupt redirection bitmap lives
outside (ahead of) the allocated space of the TSS. Similarly setting a
TSS limit of 255 when only 128 bytes get allocated means that 128 extra
bytes may be accessed by the CPU during I/O port access processing.

Introduce a new HVM param to set the allocated size of the TSS, and
have the hypervisor actually take care of setting namely the I/O bitmap
pointer. Both this and the segment limit now take the allocated size
into account.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
TBD: Do we really want to re-init the TSS every time we are about to
     use it? This can happen quite often during boot, especially while
     grub is running.

--- a/tools/firmware/hvmloader/hvmloader.c
+++ b/tools/firmware/hvmloader/hvmloader.c
@@ -177,18 +177,30 @@ static void cmos_write_memory_size(void)
 }
 
 /*
- * Set up an empty TSS area for virtual 8086 mode to use. 
- * The only important thing is that it musn't have any bits set 
- * in the interrupt redirection bitmap, so all zeros will do.
+ * Set up an empty TSS area for virtual 8086 mode to use. Its content is
+ * going to be managed by Xen, but zero fill it just in case.
  */
 static void init_vm86_tss(void)
 {
+/*
+ * Have the TSS cover the ISA port range, which makes it
+ * - 104 bytes base structure
+ * - 32 bytes interrupt redirection bitmap
+ * - 128 bytes I/O bitmap
+ * - one trailing byte
+ * or a total of to 265 bytes. As it needs to be be a power of two for
+ * now (or else the alignment parameter to mem_alloc() needs adjustment),
+ * this ends up requiring 512 bytes.
+ */
+#define TSS_SIZE 512
     void *tss;
 
-    tss = mem_alloc(128, 128);
-    memset(tss, 0, 128);
+    tss = mem_alloc(TSS_SIZE, TSS_SIZE);
+    memset(tss, 0, TSS_SIZE);
     hvm_param_set(HVM_PARAM_VM86_TSS, virt_to_phys(tss));
+    hvm_param_set(HVM_PARAM_VM86_TSS_SIZE, TSS_SIZE);
     printf("vm86 TSS at %08lx\n", virt_to_phys(tss));
+#undef TSS_SIZE
 }
 
 static void apic_setup(void)
--- a/tools/libxc/xc_sr_save_x86_hvm.c
+++ b/tools/libxc/xc_sr_save_x86_hvm.c
@@ -68,6 +68,7 @@ static int write_hvm_params(struct xc_sr
         HVM_PARAM_MONITOR_RING_PFN,
         HVM_PARAM_SHARING_RING_PFN,
         HVM_PARAM_VM86_TSS,
+        HVM_PARAM_VM86_TSS_SIZE,
         HVM_PARAM_CONSOLE_PFN,
         HVM_PARAM_ACPI_IOPORTS_LOCATION,
         HVM_PARAM_VIRIDIAN,
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2850,6 +2850,43 @@ static int hvm_load_segment_selector(
     return 1;
 }
 
+struct tss32 {
+    uint16_t back_link, :16;
+    uint32_t esp0;
+    uint16_t ss0, :16;
+    uint32_t esp1;
+    uint16_t ss1, :16;
+    uint32_t esp2;
+    uint16_t ss2, :16;
+    uint32_t cr3, eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi;
+    uint16_t es, :16, cs, :16, ss, :16, ds, :16, fs, :16, gs, :16, ldt, :16;
+    uint16_t trace /* :1 */, iomap;
+};
+
+void hvm_prepare_vm86_tss(struct vcpu *v, uint32_t base, uint32_t limit)
+{
+    /*
+     * If the provided area is large enough to cover at least the ISA port
+     * range, keep the bitmaps outside the base structure. For rather small
+     * areas (namely relevant for guests having been migrated from older
+     * Xen versions), maximize interrupt vector and port coverage by pointing
+     * the I/O bitmap at 0x20 (which puts the interrupt redirection bitmap
+     * right at zero), accepting accesses to port 0x235 (represented by bit 5
+     * of byte 0x46) to trigger #GP (which will simply result in the access
+     * being handled by the emulator via a slightly different path than it
+     * would be anyway). Be sure to include one extra byte at the end of the
+     * I/O bitmap (hence the missing "- 1" in the comparison is not an
+     * off-by-one mistake), which we deliberately don't fill with all ones.
+     */
+    uint16_t iomap = (limit >= sizeof(struct tss32) + (0x100 / 8) + (0x400 / 8)
+                      ? sizeof(struct tss32) : 0) + (0x100 / 8);
+
+    ASSERT(limit >= sizeof(struct tss32) - 1);
+    hvm_copy_to_guest_phys(base, NULL, limit + 1, v);
+    hvm_copy_to_guest_phys(base + offsetof(struct tss32, iomap),
+                           &iomap, sizeof(iomap), v);
+}
+
 void hvm_task_switch(
     uint16_t tss_sel, enum hvm_task_switch_reason taskswitch_reason,
     int32_t errcode)
@@ -2862,18 +2899,7 @@ void hvm_task_switch(
     unsigned int eflags;
     pagefault_info_t pfinfo;
     int exn_raised, rc;
-    struct {
-        u16 back_link,__blh;
-        u32 esp0;
-        u16 ss0, _0;
-        u32 esp1;
-        u16 ss1, _1;
-        u32 esp2;
-        u16 ss2, _2;
-        u32 cr3, eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi;
-        u16 es, _3, cs, _4, ss, _5, ds, _6, fs, _7, gs, _8, ldt, _9;
-        u16 trace, iomap;
-    } tss;
+    struct tss32 tss;
 
     hvm_get_segment_register(v, x86_seg_gdtr, &gdt);
     hvm_get_segment_register(v, x86_seg_tr, &prev_tr);
@@ -4276,6 +4302,7 @@ static int hvm_allow_set_param(struct do
     /* The following parameters can be set by the guest. */
     case HVM_PARAM_CALLBACK_IRQ:
     case HVM_PARAM_VM86_TSS:
+    case HVM_PARAM_VM86_TSS_SIZE:
     case HVM_PARAM_ACPI_IOPORTS_LOCATION:
     case HVM_PARAM_VM_GENERATION_ID_ADDR:
     case HVM_PARAM_STORE_EVTCHN:
@@ -4484,6 +4511,31 @@ static int hvmop_set_param(
         }
         d->arch.x87_fip_width = a.value;
         break;
+
+    case HVM_PARAM_VM86_TSS:
+        /* Hardware would silently truncate high bits. */
+        if ( a.value != (uint32_t)a.value )
+        {
+            if ( d == curr_d )
+                domain_crash(d);
+            rc = -EINVAL;
+        }
+        break;
+
+    case HVM_PARAM_VM86_TSS_SIZE:
+        if ( a.value < sizeof(struct tss32) )
+        {
+            if ( d == curr_d )
+                domain_crash(d);
+            rc = -EINVAL;
+        }
+        /*
+         * Cap at the theoretically useful maximum (base structure plus
+         * 256 bits interrupt redirection bitmap + 64k bits I/O bitmap).
+         */
+        else if ( a.value > sizeof(struct tss32) + (0x100 / 8) + (0x10000 / 8) )
+            a.value = sizeof(struct tss32) + (0x100 / 8) + (0x10000 / 8);
+        break;
     }
 
     if ( rc != 0 )
@@ -4513,6 +4565,7 @@ static int hvm_allow_get_param(struct do
     /* The following parameters can be read by the guest. */
     case HVM_PARAM_CALLBACK_IRQ:
     case HVM_PARAM_VM86_TSS:
+    case HVM_PARAM_VM86_TSS_SIZE:
     case HVM_PARAM_ACPI_IOPORTS_LOCATION:
     case HVM_PARAM_VM_GENERATION_ID_ADDR:
     case HVM_PARAM_STORE_PFN:
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1120,12 +1120,20 @@ static void vmx_set_segment_register(str
         
         if ( seg == x86_seg_tr ) 
         {
-            if ( v->domain->arch.hvm_domain.params[HVM_PARAM_VM86_TSS] )
+            const struct domain *d = v->domain;
+
+            if ( d->arch.hvm_domain.params[HVM_PARAM_VM86_TSS] )
             {
                 sel = 0;
                 attr = vm86_tr_attr;
-                limit = 0xff;
-                base = v->domain->arch.hvm_domain.params[HVM_PARAM_VM86_TSS];
+                /*
+                 * Old hvmloader binaries hardcode the size to 128 bytes,
+                 * without setting HVM_PARAM_VM86_TSS_SIZE.
+                 */
+                limit = (d->arch.hvm_domain.params[HVM_PARAM_VM86_TSS_SIZE]
+                         ?: 0x80) - 1;
+                base = d->arch.hvm_domain.params[HVM_PARAM_VM86_TSS];
+                hvm_prepare_vm86_tss(v, base, limit);
                 v->arch.hvm_vmx.vm86_segment_mask &= ~(1u << seg);
             }
             else
--- a/xen/include/asm-x86/hvm/support.h
+++ b/xen/include/asm-x86/hvm/support.h
@@ -111,6 +111,8 @@ int hvm_do_hypercall(struct cpu_user_reg
 void hvm_hlt(unsigned int eflags);
 void hvm_triple_fault(void);
 
+void hvm_prepare_vm86_tss(struct vcpu *v, uint32_t base, uint32_t limit);
+
 void hvm_rdtsc_intercept(struct cpu_user_regs *regs);
 
 int __must_check hvm_handle_xsetbv(u32 index, u64 new_bv);
--- a/xen/include/public/hvm/params.h
+++ b/xen/include/public/hvm/params.h
@@ -253,6 +253,9 @@
  */
 #define HVM_PARAM_X87_FIP_WIDTH 36
 
-#define HVM_NR_PARAMS 37
+/* Size of TSS used on Intel when CR0.PE=0. */
+#define HVM_PARAM_VM86_TSS_SIZE 37
+
+#define HVM_NR_PARAMS 38
 
 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-13 13:19 [PATCH] x86/VMX: sanitize VM86 TSS handling Jan Beulich
@ 2017-02-13 13:37 ` Jan Beulich
  2017-02-13 13:45   ` Jan Beulich
  2017-02-14 17:35   ` Tim Deegan
  2017-02-13 18:26 ` Andrew Cooper
  2017-02-14 17:33 ` Tim Deegan
  2 siblings, 2 replies; 18+ messages in thread
From: Jan Beulich @ 2017-02-13 13:37 UTC (permalink / raw)
  To: xen-devel
  Cc: Kevin Tian, Stefano Stabellini, Wei Liu, George Dunlap,
	Andrew Cooper, Ian Jackson, Tim Deegan, Jun Nakajima,
	Roger Pau Monne

>>> On 13.02.17 at 14:19, <JBeulich@suse.com> wrote:
> --- a/tools/firmware/hvmloader/hvmloader.c
> +++ b/tools/firmware/hvmloader/hvmloader.c
> @@ -177,18 +177,30 @@ static void cmos_write_memory_size(void)
>  }
>  
>  /*
> - * Set up an empty TSS area for virtual 8086 mode to use. 
> - * The only important thing is that it musn't have any bits set 
> - * in the interrupt redirection bitmap, so all zeros will do.
> + * Set up an empty TSS area for virtual 8086 mode to use. Its content is
> + * going to be managed by Xen, but zero fill it just in case.
>   */
>  static void init_vm86_tss(void)
>  {
> +/*
> + * Have the TSS cover the ISA port range, which makes it
> + * - 104 bytes base structure
> + * - 32 bytes interrupt redirection bitmap
> + * - 128 bytes I/O bitmap
> + * - one trailing byte
> + * or a total of to 265 bytes. As it needs to be be a power of two for
> + * now (or else the alignment parameter to mem_alloc() needs adjustment),
> + * this ends up requiring 512 bytes.
> + */
> +#define TSS_SIZE 512
>      void *tss;
>  
> -    tss = mem_alloc(128, 128);
> -    memset(tss, 0, 128);
> +    tss = mem_alloc(TSS_SIZE, TSS_SIZE);

    tss = mem_alloc(TSS_SIZE, 128);

is sufficient here, as I've noticed (only) while reviewing Roger's
series v4 of which did trigger the creation of this patch. I've made
the change locally for now.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-13 13:37 ` Jan Beulich
@ 2017-02-13 13:45   ` Jan Beulich
  2017-02-14 17:35   ` Tim Deegan
  1 sibling, 0 replies; 18+ messages in thread
From: Jan Beulich @ 2017-02-13 13:45 UTC (permalink / raw)
  To: xen-devel
  Cc: Kevin Tian, Stefano Stabellini, Wei Liu, George Dunlap,
	Andrew Cooper, Ian Jackson, Tim Deegan, Jun Nakajima,
	Roger Pau Monne

>>> On 13.02.17 at 14:37, <JBeulich@suse.com> wrote:
>>>> On 13.02.17 at 14:19, <JBeulich@suse.com> wrote:
>> --- a/tools/firmware/hvmloader/hvmloader.c
>> +++ b/tools/firmware/hvmloader/hvmloader.c
>> @@ -177,18 +177,30 @@ static void cmos_write_memory_size(void)
>>  }
>>  
>>  /*
>> - * Set up an empty TSS area for virtual 8086 mode to use. 
>> - * The only important thing is that it musn't have any bits set 
>> - * in the interrupt redirection bitmap, so all zeros will do.
>> + * Set up an empty TSS area for virtual 8086 mode to use. Its content is
>> + * going to be managed by Xen, but zero fill it just in case.
>>   */
>>  static void init_vm86_tss(void)
>>  {
>> +/*
>> + * Have the TSS cover the ISA port range, which makes it
>> + * - 104 bytes base structure
>> + * - 32 bytes interrupt redirection bitmap
>> + * - 128 bytes I/O bitmap
>> + * - one trailing byte
>> + * or a total of to 265 bytes. As it needs to be be a power of two for
>> + * now (or else the alignment parameter to mem_alloc() needs adjustment),
>> + * this ends up requiring 512 bytes.
>> + */
>> +#define TSS_SIZE 512
>>      void *tss;
>>  
>> -    tss = mem_alloc(128, 128);
>> -    memset(tss, 0, 128);
>> +    tss = mem_alloc(TSS_SIZE, TSS_SIZE);
> 
>     tss = mem_alloc(TSS_SIZE, 128);
> 
> is sufficient here, as I've noticed (only) while reviewing Roger's
> series v4 of which did trigger the creation of this patch. I've made
> the change locally for now.

Which in turn means the size can also be reduced to 384, and then
the comment needs adjustment. Resulting hunk:

@@ -177,18 +177,29 @@ static void cmos_write_memory_size(void)
 }
 
 /*
- * Set up an empty TSS area for virtual 8086 mode to use. 
- * The only important thing is that it musn't have any bits set 
- * in the interrupt redirection bitmap, so all zeros will do.
+ * Set up an empty TSS area for virtual 8086 mode to use. Its content is
+ * going to be managed by Xen, but zero fill it just in case.
  */
 static void init_vm86_tss(void)
 {
+/*
+ * Have the TSS cover the ISA port range, which makes it
+ * - 104 bytes base structure
+ * - 32 bytes interrupt redirection bitmap
+ * - 128 bytes I/O bitmap
+ * - one trailing byte
+ * or a total of to 265 bytes. As it needs to be a multiple of the requested
+ * alignment, this ends up requiring 384 bytes.
+ */
+#define TSS_SIZE (3 * 128)
     void *tss;
 
-    tss = mem_alloc(128, 128);
-    memset(tss, 0, 128);
+    tss = mem_alloc(TSS_SIZE, 128);
+    memset(tss, 0, TSS_SIZE);
     hvm_param_set(HVM_PARAM_VM86_TSS, virt_to_phys(tss));
+    hvm_param_set(HVM_PARAM_VM86_TSS_SIZE, TSS_SIZE);
     printf("vm86 TSS at %08lx\n", virt_to_phys(tss));
+#undef TSS_SIZE
 }
 
 static void apic_setup(void)

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-13 13:19 [PATCH] x86/VMX: sanitize VM86 TSS handling Jan Beulich
  2017-02-13 13:37 ` Jan Beulich
@ 2017-02-13 18:26 ` Andrew Cooper
  2017-02-14  8:55   ` Jan Beulich
  2017-02-14 17:33 ` Tim Deegan
  2 siblings, 1 reply; 18+ messages in thread
From: Andrew Cooper @ 2017-02-13 18:26 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Kevin Tian, Stefano Stabellini, Wei Liu, George Dunlap,
	Tim Deegan, Ian Jackson, Jun Nakajima, Roger Pau Monne

On 13/02/17 13:19, Jan Beulich wrote:
> The present way of setting this up is flawed: Leaving the I/O bitmap
> pointer at zero means that the interrupt redirection bitmap lives
> outside (ahead of) the allocated space of the TSS. Similarly setting a
> TSS limit of 255 when only 128 bytes get allocated means that 128 extra
> bytes may be accessed by the CPU during I/O port access processing.
>
> Introduce a new HVM param to set the allocated size of the TSS, and
> have the hypervisor actually take care of setting namely the I/O bitmap
> pointer. Both this and the segment limit now take the allocated size
> into account.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> TBD: Do we really want to re-init the TSS every time we are about to
>      use it? This can happen quite often during boot, especially while
>      grub is running.

The only case we need worry about is if the guest manually writes to the
area covered by the vm86 tss.  I think, looking at the logic, that we
properly restore the old %tr when re-entering protected mode, so we
aren't at risk of having the vm86 tss on the leaving-side of a hardware
task switch.

We have lasted thus far without, but given the issues recently
identified, the only safe conclusion to be drawn is that this
functionality hasn't been used in anger.

For sensible performance, we need to keep the IO bitmap clear to avoid
taking the #GP emulation path.

For correct behaviour, we need the entire software bitmap to be 0.

As this functionality exists only for first-gen VT-x lacking
unrestricted_guest, I can't claim to care too much about the performance
impact.

> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -2850,6 +2850,43 @@ static int hvm_load_segment_selector(
>      return 1;
>  }
>  
> +struct tss32 {
> +    uint16_t back_link, :16;
> +    uint32_t esp0;
> +    uint16_t ss0, :16;
> +    uint32_t esp1;
> +    uint16_t ss1, :16;
> +    uint32_t esp2;
> +    uint16_t ss2, :16;
> +    uint32_t cr3, eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi;
> +    uint16_t es, :16, cs, :16, ss, :16, ds, :16, fs, :16, gs, :16, ldt, :16;
> +    uint16_t trace /* :1 */, iomap;
> +};
> +
> +void hvm_prepare_vm86_tss(struct vcpu *v, uint32_t base, uint32_t limit)
> +{
> +    /*
> +     * If the provided area is large enough to cover at least the ISA port
> +     * range, keep the bitmaps outside the base structure. For rather small
> +     * areas (namely relevant for guests having been migrated from older
> +     * Xen versions), maximize interrupt vector and port coverage by pointing
> +     * the I/O bitmap at 0x20 (which puts the interrupt redirection bitmap
> +     * right at zero), accepting accesses to port 0x235 (represented by bit 5
> +     * of byte 0x46) to trigger #GP (which will simply result in the access
> +     * being handled by the emulator via a slightly different path than it
> +     * would be anyway). Be sure to include one extra byte at the end of the
> +     * I/O bitmap (hence the missing "- 1" in the comparison is not an
> +     * off-by-one mistake), which we deliberately don't fill with all ones.
> +     */
> +    uint16_t iomap = (limit >= sizeof(struct tss32) + (0x100 / 8) + (0x400 / 8)
> +                      ? sizeof(struct tss32) : 0) + (0x100 / 8);
> +
> +    ASSERT(limit >= sizeof(struct tss32) - 1);
> +    hvm_copy_to_guest_phys(base, NULL, limit + 1, v);
> +    hvm_copy_to_guest_phys(base + offsetof(struct tss32, iomap),
> +                           &iomap, sizeof(iomap), v);

This should be linear rather than phys.

In practice it will only make a difference if the guest manages to
corrupt its IDENT_PT (which is why I suggested that we move the IDENT_PT
entirely outside of the guest control), but in such a case, we will
re-enter the guest with the vm86 tss pointing to something other than
what we have just initialised.

> +}
> +
>  void hvm_task_switch(
>      uint16_t tss_sel, enum hvm_task_switch_reason taskswitch_reason,
>      int32_t errcode)
> @@ -2862,18 +2899,7 @@ void hvm_task_switch(
>      unsigned int eflags;
>      pagefault_info_t pfinfo;
>      int exn_raised, rc;
> -    struct {
> -        u16 back_link,__blh;
> -        u32 esp0;
> -        u16 ss0, _0;
> -        u32 esp1;
> -        u16 ss1, _1;
> -        u32 esp2;
> -        u16 ss2, _2;
> -        u32 cr3, eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi;
> -        u16 es, _3, cs, _4, ss, _5, ds, _6, fs, _7, gs, _8, ldt, _9;
> -        u16 trace, iomap;
> -    } tss;
> +    struct tss32 tss;
>  
>      hvm_get_segment_register(v, x86_seg_gdtr, &gdt);
>      hvm_get_segment_register(v, x86_seg_tr, &prev_tr);
> @@ -4276,6 +4302,7 @@ static int hvm_allow_set_param(struct do
>      /* The following parameters can be set by the guest. */
>      case HVM_PARAM_CALLBACK_IRQ:
>      case HVM_PARAM_VM86_TSS:
> +    case HVM_PARAM_VM86_TSS_SIZE:
>      case HVM_PARAM_ACPI_IOPORTS_LOCATION:
>      case HVM_PARAM_VM_GENERATION_ID_ADDR:
>      case HVM_PARAM_STORE_EVTCHN:
> @@ -4484,6 +4511,31 @@ static int hvmop_set_param(
>          }
>          d->arch.x87_fip_width = a.value;
>          break;
> +
> +    case HVM_PARAM_VM86_TSS:
> +        /* Hardware would silently truncate high bits. */

Does it?  I'd have thought it would be a vmentry failure.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-13 18:26 ` Andrew Cooper
@ 2017-02-14  8:55   ` Jan Beulich
  2017-02-14 15:48     ` Andrew Cooper
  0 siblings, 1 reply; 18+ messages in thread
From: Jan Beulich @ 2017-02-14  8:55 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: KevinTian, Stefano Stabellini, Wei Liu, George Dunlap, Tim Deegan,
	Ian Jackson, Jun Nakajima, xen-devel, Roger Pau Monne

>>> On 13.02.17 at 19:26, <andrew.cooper3@citrix.com> wrote:
> On 13/02/17 13:19, Jan Beulich wrote:
>> ---
>> TBD: Do we really want to re-init the TSS every time we are about to
>>      use it? This can happen quite often during boot, especially while
>>      grub is running.
> 
> The only case we need worry about is if the guest manually writes to the
> area covered by the vm86 tss.  I think, looking at the logic, that we
> properly restore the old %tr when re-entering protected mode, so we
> aren't at risk of having the vm86 tss on the leaving-side of a hardware
> task switch.

Yes. But that's the whole point of the question: The guest could
equally well write to the TSS manually right after we've initialized
it. And it only harms itself by doing so, hence we could as well
init the TSS on a less frequently executed path.

> We have lasted thus far without, but given the issues recently
> identified, the only safe conclusion to be drawn is that this
> functionality hasn't been used in anger.
> 
> For sensible performance, we need to keep the IO bitmap clear to avoid
> taking the #GP emulation path.
> 
> For correct behaviour, we need the entire software bitmap to be 0.

This one is just for reasonable performance too, afaict. If faulting,
just like with port accesses, we'd emulate the INT $nn.

>> +void hvm_prepare_vm86_tss(struct vcpu *v, uint32_t base, uint32_t limit)
>> +{
>> +    /*
>> +     * If the provided area is large enough to cover at least the ISA port
>> +     * range, keep the bitmaps outside the base structure. For rather small
>> +     * areas (namely relevant for guests having been migrated from older
>> +     * Xen versions), maximize interrupt vector and port coverage by pointing
>> +     * the I/O bitmap at 0x20 (which puts the interrupt redirection bitmap
>> +     * right at zero), accepting accesses to port 0x235 (represented by bit 5
>> +     * of byte 0x46) to trigger #GP (which will simply result in the access
>> +     * being handled by the emulator via a slightly different path than it
>> +     * would be anyway). Be sure to include one extra byte at the end of the
>> +     * I/O bitmap (hence the missing "- 1" in the comparison is not an
>> +     * off-by-one mistake), which we deliberately don't fill with all ones.
>> +     */
>> +    uint16_t iomap = (limit >= sizeof(struct tss32) + (0x100 / 8) + (0x400 / 8)
>> +                      ? sizeof(struct tss32) : 0) + (0x100 / 8);
>> +
>> +    ASSERT(limit >= sizeof(struct tss32) - 1);
>> +    hvm_copy_to_guest_phys(base, NULL, limit + 1, v);
>> +    hvm_copy_to_guest_phys(base + offsetof(struct tss32, iomap),
>> +                           &iomap, sizeof(iomap), v);
> 
> This should be linear rather than phys.

Strictly speaking yes, but since we're running with an ident map,
it doesn't matter. And I'd prefer not to have to introduce a vcpu
parameter to the linear copy function, as that would mean quite
a few changes elsewhere. Would you be okay with me just
attaching a respective comment here?

> In practice it will only make a difference if the guest manages to
> corrupt its IDENT_PT (which is why I suggested that we move the IDENT_PT
> entirely outside of the guest control), but in such a case, we will
> re-enter the guest with the vm86 tss pointing to something other than
> what we have just initialised.

Right, but again the guest would only harm itself.

>> @@ -4484,6 +4511,31 @@ static int hvmop_set_param(
>>          }
>>          d->arch.x87_fip_width = a.value;
>>          break;
>> +
>> +    case HVM_PARAM_VM86_TSS:
>> +        /* Hardware would silently truncate high bits. */
> 
> Does it?  I'd have thought it would be a vmentry failure.

I'm pretty sure I've tried it out, as I didn't have it that way initially.
I think a VM entry failure would result only if the address was
non-canonical.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-14  8:55   ` Jan Beulich
@ 2017-02-14 15:48     ` Andrew Cooper
  2017-02-15  8:24       ` Jan Beulich
  0 siblings, 1 reply; 18+ messages in thread
From: Andrew Cooper @ 2017-02-14 15:48 UTC (permalink / raw)
  To: Jan Beulich
  Cc: KevinTian, Stefano Stabellini, Wei Liu, George Dunlap, Tim Deegan,
	Ian Jackson, Jun Nakajima, xen-devel, Roger Pau Monne

On 14/02/17 08:55, Jan Beulich wrote:
>>>> On 13.02.17 at 19:26, <andrew.cooper3@citrix.com> wrote:
>> On 13/02/17 13:19, Jan Beulich wrote:
>>> ---
>>> TBD: Do we really want to re-init the TSS every time we are about to
>>>      use it? This can happen quite often during boot, especially while
>>>      grub is running.
>> The only case we need worry about is if the guest manually writes to the
>> area covered by the vm86 tss.  I think, looking at the logic, that we
>> properly restore the old %tr when re-entering protected mode, so we
>> aren't at risk of having the vm86 tss on the leaving-side of a hardware
>> task switch.
> Yes. But that's the whole point of the question: The guest could
> equally well write to the TSS manually right after we've initialized
> it. And it only harms itself by doing so, hence we could as well
> init the TSS on a less frequently executed path.

Per this patch (I think) we initialise it each time the guest tries to
clear CR0.PE

Unless the VM86_TSS location is below the 1MB boundary, the guest can't
actually clobber it.


As an alternative, if you don't reinitialise each time, when would you
do so?

>
>> We have lasted thus far without, but given the issues recently
>> identified, the only safe conclusion to be drawn is that this
>> functionality hasn't been used in anger.
>>
>> For sensible performance, we need to keep the IO bitmap clear to avoid
>> taking the #GP emulation path.
>>
>> For correct behaviour, we need the entire software bitmap to be 0.
> This one is just for reasonable performance too, afaict. If faulting,
> just like with port accesses, we'd emulate the INT $nn.

Does that actually work?  Upon re-injection of the event, won't hardware
follow the same bad path which caused the #GP fault to start with?

>
>>> +void hvm_prepare_vm86_tss(struct vcpu *v, uint32_t base, uint32_t limit)
>>> +{
>>> +    /*
>>> +     * If the provided area is large enough to cover at least the ISA port
>>> +     * range, keep the bitmaps outside the base structure. For rather small
>>> +     * areas (namely relevant for guests having been migrated from older
>>> +     * Xen versions), maximize interrupt vector and port coverage by pointing
>>> +     * the I/O bitmap at 0x20 (which puts the interrupt redirection bitmap
>>> +     * right at zero), accepting accesses to port 0x235 (represented by bit 5
>>> +     * of byte 0x46) to trigger #GP (which will simply result in the access
>>> +     * being handled by the emulator via a slightly different path than it
>>> +     * would be anyway). Be sure to include one extra byte at the end of the
>>> +     * I/O bitmap (hence the missing "- 1" in the comparison is not an
>>> +     * off-by-one mistake), which we deliberately don't fill with all ones.
>>> +     */
>>> +    uint16_t iomap = (limit >= sizeof(struct tss32) + (0x100 / 8) + (0x400 / 8)
>>> +                      ? sizeof(struct tss32) : 0) + (0x100 / 8);
>>> +
>>> +    ASSERT(limit >= sizeof(struct tss32) - 1);
>>> +    hvm_copy_to_guest_phys(base, NULL, limit + 1, v);
>>> +    hvm_copy_to_guest_phys(base + offsetof(struct tss32, iomap),
>>> +                           &iomap, sizeof(iomap), v);
>> This should be linear rather than phys.
> Strictly speaking yes, but since we're running with an ident map,
> it doesn't matter. And I'd prefer not to have to introduce a vcpu
> parameter to the linear copy function, as that would mean quite
> a few changes elsewhere. Would you be okay with me just
> attaching a respective comment here?

Ok.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-13 13:19 [PATCH] x86/VMX: sanitize VM86 TSS handling Jan Beulich
  2017-02-13 13:37 ` Jan Beulich
  2017-02-13 18:26 ` Andrew Cooper
@ 2017-02-14 17:33 ` Tim Deegan
  2017-02-15  8:18   ` Jan Beulich
  2 siblings, 1 reply; 18+ messages in thread
From: Tim Deegan @ 2017-02-14 17:33 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Kevin Tian, Stefano Stabellini, Wei Liu, George Dunlap,
	Andrew Cooper, Ian Jackson, Jun Nakajima, xen-devel,
	Roger Pau Monne

Hi,

At 06:19 -0700 on 13 Feb (1486966797), Jan Beulich wrote:
> The present way of setting this up is flawed: Leaving the I/O bitmap
> pointer at zero means that the interrupt redirection bitmap lives
> outside (ahead of) the allocated space of the TSS. Similarly setting a
> TSS limit of 255 when only 128 bytes get allocated means that 128 extra
> bytes may be accessed by the CPU during I/O port access processing.
> 
> Introduce a new HVM param to set the allocated size of the TSS, and
> have the hypervisor actually take care of setting namely the I/O bitmap
> pointer. Both this and the segment limit now take the allocated size
> into account.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

This looks pretty good to me.  Once the question below is resolved,
Reviewed-by: Tim Deegan <tim@xen.org>

> ---
> TBD: Do we really want to re-init the TSS every time we are about to
>      use it?

No - I think we should init it when the guest writes the param(s) and
leave it at that.  Hvmloader marks it as reserved so the guest should
know better than to write to it, and we can't protect it against all
the possible ways the guest could break itself.

If you do want to re-init it more often, then I think it would still
be better to legacy guests' (lack of a) size limit once, when the guest
writes the base param.

Cheers,

Tim.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-13 13:37 ` Jan Beulich
  2017-02-13 13:45   ` Jan Beulich
@ 2017-02-14 17:35   ` Tim Deegan
  2017-02-15  8:13     ` Jan Beulich
  1 sibling, 1 reply; 18+ messages in thread
From: Tim Deegan @ 2017-02-14 17:35 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Kevin Tian, Stefano Stabellini, Wei Liu, George Dunlap,
	Andrew Cooper, Ian Jackson, Jun Nakajima, xen-devel,
	Roger Pau Monne

At 06:37 -0700 on 13 Feb (1486967832), Jan Beulich wrote:
> >>> On 13.02.17 at 14:19, <JBeulich@suse.com> wrote:
> > -    tss = mem_alloc(128, 128);
> > -    memset(tss, 0, 128);
> > +    tss = mem_alloc(TSS_SIZE, TSS_SIZE);
> 
>     tss = mem_alloc(TSS_SIZE, 128);
> 
> is sufficient here, as I've noticed (only) while reviewing Roger's
> series v4 of which did trigger the creation of this patch. I've made
> the change locally for now.

Should Xen check the alignment when the param gets written?

Tim.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-14 17:35   ` Tim Deegan
@ 2017-02-15  8:13     ` Jan Beulich
  2017-02-15 10:54       ` Tim Deegan
  0 siblings, 1 reply; 18+ messages in thread
From: Jan Beulich @ 2017-02-15  8:13 UTC (permalink / raw)
  To: Tim Deegan
  Cc: Kevin Tian, Stefano Stabellini, Wei Liu, George Dunlap,
	Andrew Cooper, Ian Jackson, Jun Nakajima, xen-devel,
	Roger Pau Monne

>>> On 14.02.17 at 18:35, <tim@xen.org> wrote:
> At 06:37 -0700 on 13 Feb (1486967832), Jan Beulich wrote:
>> >>> On 13.02.17 at 14:19, <JBeulich@suse.com> wrote:
>> > -    tss = mem_alloc(128, 128);
>> > -    memset(tss, 0, 128);
>> > +    tss = mem_alloc(TSS_SIZE, TSS_SIZE);
>> 
>>     tss = mem_alloc(TSS_SIZE, 128);
>> 
>> is sufficient here, as I've noticed (only) while reviewing Roger's
>> series v4 of which did trigger the creation of this patch. I've made
>> the change locally for now.
> 
> Should Xen check the alignment when the param gets written?

I did think about this too, but then decided not to, since the guest
would only shoot itself in the foot (the more that in non-root mode
no actual task switching by the hardware occurs, so the alignment
requirement is pretty theoretical anyway).

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-14 17:33 ` Tim Deegan
@ 2017-02-15  8:18   ` Jan Beulich
  2017-02-15 11:21     ` Tim Deegan
  0 siblings, 1 reply; 18+ messages in thread
From: Jan Beulich @ 2017-02-15  8:18 UTC (permalink / raw)
  To: Tim Deegan
  Cc: Kevin Tian, Stefano Stabellini, Wei Liu, George Dunlap,
	Andrew Cooper, Ian Jackson, Jun Nakajima, xen-devel,
	Roger Pau Monne

>>> On 14.02.17 at 18:33, <tim@xen.org> wrote:
> Hi,
> 
> At 06:19 -0700 on 13 Feb (1486966797), Jan Beulich wrote:
>> The present way of setting this up is flawed: Leaving the I/O bitmap
>> pointer at zero means that the interrupt redirection bitmap lives
>> outside (ahead of) the allocated space of the TSS. Similarly setting a
>> TSS limit of 255 when only 128 bytes get allocated means that 128 extra
>> bytes may be accessed by the CPU during I/O port access processing.
>> 
>> Introduce a new HVM param to set the allocated size of the TSS, and
>> have the hypervisor actually take care of setting namely the I/O bitmap
>> pointer. Both this and the segment limit now take the allocated size
>> into account.
>> 
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> This looks pretty good to me.  Once the question below is resolved,
> Reviewed-by: Tim Deegan <tim@xen.org>
> 
>> ---
>> TBD: Do we really want to re-init the TSS every time we are about to
>>      use it?
> 
> No - I think we should init it when the guest writes the param(s) and
> leave it at that.  Hvmloader marks it as reserved so the guest should
> know better than to write to it, and we can't protect it against all
> the possible ways the guest could break itself.
> 
> If you do want to re-init it more often, then I think it would still
> be better to legacy guests' (lack of a) size limit once, when the guest
> writes the base param.

The only problem with this being that at the time the base gets
written we don't know the size yet (nor whether the guest is
going to write it), and hence we don't know how must space to
initialize. The lower limit we enforce on the size (if being set) is
below the 128 byte default for old guests.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-14 15:48     ` Andrew Cooper
@ 2017-02-15  8:24       ` Jan Beulich
  0 siblings, 0 replies; 18+ messages in thread
From: Jan Beulich @ 2017-02-15  8:24 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: KevinTian, Stefano Stabellini, Wei Liu, George Dunlap, Tim Deegan,
	Ian Jackson, Jun Nakajima, xen-devel, Roger Pau Monne

>>> On 14.02.17 at 16:48, <andrew.cooper3@citrix.com> wrote:
> On 14/02/17 08:55, Jan Beulich wrote:
>>>>> On 13.02.17 at 19:26, <andrew.cooper3@citrix.com> wrote:
>>> On 13/02/17 13:19, Jan Beulich wrote:
>>>> ---
>>>> TBD: Do we really want to re-init the TSS every time we are about to
>>>>      use it? This can happen quite often during boot, especially while
>>>>      grub is running.
>>> The only case we need worry about is if the guest manually writes to the
>>> area covered by the vm86 tss.  I think, looking at the logic, that we
>>> properly restore the old %tr when re-entering protected mode, so we
>>> aren't at risk of having the vm86 tss on the leaving-side of a hardware
>>> task switch.
>> Yes. But that's the whole point of the question: The guest could
>> equally well write to the TSS manually right after we've initialized
>> it. And it only harms itself by doing so, hence we could as well
>> init the TSS on a less frequently executed path.
> 
> Per this patch (I think) we initialise it each time the guest tries to
> clear CR0.PE
> 
> Unless the VM86_TSS location is below the 1MB boundary, the guest can't
> actually clobber it.

Is that the case? Don't we mimic big real mode (by using the
emulator for all insns)?

> As an alternative, if you don't reinitialise each time, when would you
> do so?

Well, ideally when the params get set, but see the reply just sent
to Tim's question in that direction. Alternatively we could track
whether either parameter changed, and gate the call to
hvm_prepare_vm86_tss() on that flag.

>>> We have lasted thus far without, but given the issues recently
>>> identified, the only safe conclusion to be drawn is that this
>>> functionality hasn't been used in anger.
>>>
>>> For sensible performance, we need to keep the IO bitmap clear to avoid
>>> taking the #GP emulation path.
>>>
>>> For correct behaviour, we need the entire software bitmap to be 0.
>> This one is just for reasonable performance too, afaict. If faulting,
>> just like with port accesses, we'd emulate the INT $nn.
> 
> Does that actually work?  Upon re-injection of the event, won't hardware
> follow the same bad path which caused the #GP fault to start with?

realmode_deliver_exception() does everything manually, so I don't
think there's any re-injection.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-15  8:13     ` Jan Beulich
@ 2017-02-15 10:54       ` Tim Deegan
  0 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2017-02-15 10:54 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Kevin Tian, Stefano Stabellini, Wei Liu, George Dunlap,
	Andrew Cooper, Ian Jackson, Jun Nakajima, xen-devel,
	Roger Pau Monne

At 01:13 -0700 on 15 Feb (1487121231), Jan Beulich wrote:
> >>> On 14.02.17 at 18:35, <tim@xen.org> wrote:
> > At 06:37 -0700 on 13 Feb (1486967832), Jan Beulich wrote:
> >> >>> On 13.02.17 at 14:19, <JBeulich@suse.com> wrote:
> >> > -    tss = mem_alloc(128, 128);
> >> > -    memset(tss, 0, 128);
> >> > +    tss = mem_alloc(TSS_SIZE, TSS_SIZE);
> >> 
> >>     tss = mem_alloc(TSS_SIZE, 128);
> >> 
> >> is sufficient here, as I've noticed (only) while reviewing Roger's
> >> series v4 of which did trigger the creation of this patch. I've made
> >> the change locally for now.
> > 
> > Should Xen check the alignment when the param gets written?
> 
> I did think about this too, but then decided not to, since the guest
> would only shoot itself in the foot (the more that in non-root mode
> no actual task switching by the hardware occurs, so the alignment
> requirement is pretty theoretical anyway).

Righto.

Tim.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-15  8:18   ` Jan Beulich
@ 2017-02-15 11:21     ` Tim Deegan
  2017-02-15 11:35       ` Jan Beulich
  2017-02-16 11:14       ` Jan Beulich
  0 siblings, 2 replies; 18+ messages in thread
From: Tim Deegan @ 2017-02-15 11:21 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Kevin Tian, Stefano Stabellini, Wei Liu, George Dunlap,
	Andrew Cooper, Ian Jackson, Jun Nakajima, xen-devel,
	Roger Pau Monne

At 01:18 -0700 on 15 Feb (1487121525), Jan Beulich wrote:
> >>> On 14.02.17 at 18:33, <tim@xen.org> wrote:
> >> TBD: Do we really want to re-init the TSS every time we are about to
> >>      use it?
> > 
> > No - I think we should init it when the guest writes the param(s) and
> > leave it at that.  Hvmloader marks it as reserved so the guest should
> > know better than to write to it, and we can't protect it against all
> > the possible ways the guest could break itself.
> > 
> > If you do want to re-init it more often, then I think it would still
> > be better to legacy guests' (lack of a) size limit once, when the guest
> > writes the base param.
> 
> The only problem with this being that at the time the base gets
> written we don't know the size yet (nor whether the guest is
> going to write it), and hence we don't know how must space to
> initialize. The lower limit we enforce on the size (if being set) is
> below the 128 byte default for old guests.

Why not make the lower limit 128?  I'd happily exchange simpler
hypervisor code for the theoretical case of a guest that needs to run
realmode code and cares about those few bytes.

Cheers,

Tim.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-15 11:21     ` Tim Deegan
@ 2017-02-15 11:35       ` Jan Beulich
  2017-02-16 11:14       ` Jan Beulich
  1 sibling, 0 replies; 18+ messages in thread
From: Jan Beulich @ 2017-02-15 11:35 UTC (permalink / raw)
  To: Tim Deegan
  Cc: Kevin Tian, Stefano Stabellini, Wei Liu, George Dunlap,
	Andrew Cooper, Ian Jackson, Jun Nakajima, xen-devel,
	Roger Pau Monne

>>> On 15.02.17 at 12:21, <tim@xen.org> wrote:
> At 01:18 -0700 on 15 Feb (1487121525), Jan Beulich wrote:
>> >>> On 14.02.17 at 18:33, <tim@xen.org> wrote:
>> >> TBD: Do we really want to re-init the TSS every time we are about to
>> >>      use it?
>> > 
>> > No - I think we should init it when the guest writes the param(s) and
>> > leave it at that.  Hvmloader marks it as reserved so the guest should
>> > know better than to write to it, and we can't protect it against all
>> > the possible ways the guest could break itself.
>> > 
>> > If you do want to re-init it more often, then I think it would still
>> > be better to legacy guests' (lack of a) size limit once, when the guest
>> > writes the base param.
>> 
>> The only problem with this being that at the time the base gets
>> written we don't know the size yet (nor whether the guest is
>> going to write it), and hence we don't know how must space to
>> initialize. The lower limit we enforce on the size (if being set) is
>> below the 128 byte default for old guests.
> 
> Why not make the lower limit 128?  I'd happily exchange simpler
> hypervisor code for the theoretical case of a guest that needs to run
> realmode code and cares about those few bytes.

Well, this would seem a pretty arbitrary limit to me, which I
dislike (but could probably live with).

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-15 11:21     ` Tim Deegan
  2017-02-15 11:35       ` Jan Beulich
@ 2017-02-16 11:14       ` Jan Beulich
  2017-02-16 11:49         ` Jan Beulich
  1 sibling, 1 reply; 18+ messages in thread
From: Jan Beulich @ 2017-02-16 11:14 UTC (permalink / raw)
  To: Andrew Cooper, Tim Deegan
  Cc: Kevin Tian, Stefano Stabellini, Wei Liu, George Dunlap,
	Ian Jackson, Jun Nakajima, xen-devel, Roger Pau Monne

>>> On 15.02.17 at 12:21, <tim@xen.org> wrote:
> At 01:18 -0700 on 15 Feb (1487121525), Jan Beulich wrote:
>> >>> On 14.02.17 at 18:33, <tim@xen.org> wrote:
>> >> TBD: Do we really want to re-init the TSS every time we are about to
>> >>      use it?
>> > 
>> > No - I think we should init it when the guest writes the param(s) and
>> > leave it at that.  Hvmloader marks it as reserved so the guest should
>> > know better than to write to it, and we can't protect it against all
>> > the possible ways the guest could break itself.
>> > 
>> > If you do want to re-init it more often, then I think it would still
>> > be better to legacy guests' (lack of a) size limit once, when the guest
>> > writes the base param.
>> 
>> The only problem with this being that at the time the base gets
>> written we don't know the size yet (nor whether the guest is
>> going to write it), and hence we don't know how must space to
>> initialize. The lower limit we enforce on the size (if being set) is
>> below the 128 byte default for old guests.
> 
> Why not make the lower limit 128?  I'd happily exchange simpler
> hypervisor code for the theoretical case of a guest that needs to run
> realmode code and cares about those few bytes.

Actually there's one more issue with doing it when the parameter is
being set: If a guest wanted to move the TSS (the parameter isn't
one-shot after all), we can't use the old value of the other parameter
when the first of the two is being changed. Of course we could make
both parameters one-shot, but this would again seem arbitrary. So I
think the better model is to record when either parameter changed,
and do the initialization just once after that.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-16 11:14       ` Jan Beulich
@ 2017-02-16 11:49         ` Jan Beulich
  2017-02-17 10:40           ` Roger Pau Monne
  2017-02-20 10:40           ` Tim Deegan
  0 siblings, 2 replies; 18+ messages in thread
From: Jan Beulich @ 2017-02-16 11:49 UTC (permalink / raw)
  To: Andrew Cooper, Tim Deegan
  Cc: Kevin Tian, Stefano Stabellini, Wei Liu, George Dunlap,
	Ian Jackson, Jun Nakajima, xen-devel, Roger Pau Monne

>>> On 16.02.17 at 12:14, <JBeulich@suse.com> wrote:
>>>> On 15.02.17 at 12:21, <tim@xen.org> wrote:
>> At 01:18 -0700 on 15 Feb (1487121525), Jan Beulich wrote:
>>> >>> On 14.02.17 at 18:33, <tim@xen.org> wrote:
>>> >> TBD: Do we really want to re-init the TSS every time we are about to
>>> >>      use it?
>>> > 
>>> > No - I think we should init it when the guest writes the param(s) and
>>> > leave it at that.  Hvmloader marks it as reserved so the guest should
>>> > know better than to write to it, and we can't protect it against all
>>> > the possible ways the guest could break itself.
>>> > 
>>> > If you do want to re-init it more often, then I think it would still
>>> > be better to legacy guests' (lack of a) size limit once, when the guest
>>> > writes the base param.
>>> 
>>> The only problem with this being that at the time the base gets
>>> written we don't know the size yet (nor whether the guest is
>>> going to write it), and hence we don't know how must space to
>>> initialize. The lower limit we enforce on the size (if being set) is
>>> below the 128 byte default for old guests.
>> 
>> Why not make the lower limit 128?  I'd happily exchange simpler
>> hypervisor code for the theoretical case of a guest that needs to run
>> realmode code and cares about those few bytes.
> 
> Actually there's one more issue with doing it when the parameter is
> being set: If a guest wanted to move the TSS (the parameter isn't
> one-shot after all), we can't use the old value of the other parameter
> when the first of the two is being changed. Of course we could make
> both parameters one-shot, but this would again seem arbitrary. So I
> think the better model is to record when either parameter changed,
> and do the initialization just once after that.

Actually no, this wouldn't work either, for the same reason (we might
again be using an inconsistent pair of parameters). Which makes me
come back to my original plan (not mentioned anywhere so far): 
Instead of a new size param, we need one which allows setting both
size and address at the same time. Since the address needs to be
below 4Gb anyway, we could simply use the high half of the 64-bit
value to hold the size.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-16 11:49         ` Jan Beulich
@ 2017-02-17 10:40           ` Roger Pau Monne
  2017-02-20 10:40           ` Tim Deegan
  1 sibling, 0 replies; 18+ messages in thread
From: Roger Pau Monne @ 2017-02-17 10:40 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Kevin Tian, Stefano Stabellini, Wei Liu, George Dunlap,
	Andrew Cooper, Ian Jackson, Tim Deegan, Jun Nakajima, xen-devel

On Thu, Feb 16, 2017 at 04:49:18AM -0700, Jan Beulich wrote:
> >>> On 16.02.17 at 12:14, <JBeulich@suse.com> wrote:
> >>>> On 15.02.17 at 12:21, <tim@xen.org> wrote:
> >> At 01:18 -0700 on 15 Feb (1487121525), Jan Beulich wrote:
> >>> >>> On 14.02.17 at 18:33, <tim@xen.org> wrote:
> >>> >> TBD: Do we really want to re-init the TSS every time we are about to
> >>> >>      use it?
> >>> > 
> >>> > No - I think we should init it when the guest writes the param(s) and
> >>> > leave it at that.  Hvmloader marks it as reserved so the guest should
> >>> > know better than to write to it, and we can't protect it against all
> >>> > the possible ways the guest could break itself.
> >>> > 
> >>> > If you do want to re-init it more often, then I think it would still
> >>> > be better to legacy guests' (lack of a) size limit once, when the guest
> >>> > writes the base param.
> >>> 
> >>> The only problem with this being that at the time the base gets
> >>> written we don't know the size yet (nor whether the guest is
> >>> going to write it), and hence we don't know how must space to
> >>> initialize. The lower limit we enforce on the size (if being set) is
> >>> below the 128 byte default for old guests.
> >> 
> >> Why not make the lower limit 128?  I'd happily exchange simpler
> >> hypervisor code for the theoretical case of a guest that needs to run
> >> realmode code and cares about those few bytes.
> > 
> > Actually there's one more issue with doing it when the parameter is
> > being set: If a guest wanted to move the TSS (the parameter isn't
> > one-shot after all), we can't use the old value of the other parameter
> > when the first of the two is being changed. Of course we could make
> > both parameters one-shot, but this would again seem arbitrary. So I
> > think the better model is to record when either parameter changed,
> > and do the initialization just once after that.
> 
> Actually no, this wouldn't work either, for the same reason (we might
> again be using an inconsistent pair of parameters). Which makes me
> come back to my original plan (not mentioned anywhere so far): 
> Instead of a new size param, we need one which allows setting both
> size and address at the same time. Since the address needs to be
> below 4Gb anyway, we could simply use the high half of the 64-bit
> value to hold the size.

IMHO this looks sensible. I've rebased my PVHv2 Dom0 series on top of this (and
fixed the TSS allocation in Dom0 build), and it seems to work fine.

Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] x86/VMX: sanitize VM86 TSS handling
  2017-02-16 11:49         ` Jan Beulich
  2017-02-17 10:40           ` Roger Pau Monne
@ 2017-02-20 10:40           ` Tim Deegan
  1 sibling, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2017-02-20 10:40 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Kevin Tian, Stefano Stabellini, Wei Liu, George Dunlap,
	Andrew Cooper, Ian Jackson, Jun Nakajima, xen-devel,
	Roger Pau Monne

At 04:49 -0700 on 16 Feb (1487220558), Jan Beulich wrote:
> >>> On 16.02.17 at 12:14, <JBeulich@suse.com> wrote:
> >>>> On 15.02.17 at 12:21, <tim@xen.org> wrote:
> >> At 01:18 -0700 on 15 Feb (1487121525), Jan Beulich wrote:
> >>> >>> On 14.02.17 at 18:33, <tim@xen.org> wrote:
> >>> >> TBD: Do we really want to re-init the TSS every time we are about to
> >>> >>      use it?
> >>> > 
> >>> > No - I think we should init it when the guest writes the param(s) and
> >>> > leave it at that.  Hvmloader marks it as reserved so the guest should
> >>> > know better than to write to it, and we can't protect it against all
> >>> > the possible ways the guest could break itself.
> >>> > 
> >>> > If you do want to re-init it more often, then I think it would still
> >>> > be better to legacy guests' (lack of a) size limit once, when the guest
> >>> > writes the base param.
> >>> 
> >>> The only problem with this being that at the time the base gets
> >>> written we don't know the size yet (nor whether the guest is
> >>> going to write it), and hence we don't know how must space to
> >>> initialize. The lower limit we enforce on the size (if being set) is
> >>> below the 128 byte default for old guests.
> >> 
> >> Why not make the lower limit 128?  I'd happily exchange simpler
> >> hypervisor code for the theoretical case of a guest that needs to run
> >> realmode code and cares about those few bytes.
> > 
> > Actually there's one more issue with doing it when the parameter is
> > being set: If a guest wanted to move the TSS (the parameter isn't
> > one-shot after all), we can't use the old value of the other parameter
> > when the first of the two is being changed. Of course we could make
> > both parameters one-shot, but this would again seem arbitrary. So I
> > think the better model is to record when either parameter changed,
> > and do the initialization just once after that.
> 
> Actually no, this wouldn't work either, for the same reason (we might
> again be using an inconsistent pair of parameters). Which makes me
> come back to my original plan (not mentioned anywhere so far): 
> Instead of a new size param, we need one which allows setting both
> size and address at the same time. Since the address needs to be
> below 4Gb anyway, we could simply use the high half of the 64-bit
> value to hold the size.

Sure, that seems like it avoids a lot of potential pitfalls.

Tim.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2017-02-20 10:40 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-02-13 13:19 [PATCH] x86/VMX: sanitize VM86 TSS handling Jan Beulich
2017-02-13 13:37 ` Jan Beulich
2017-02-13 13:45   ` Jan Beulich
2017-02-14 17:35   ` Tim Deegan
2017-02-15  8:13     ` Jan Beulich
2017-02-15 10:54       ` Tim Deegan
2017-02-13 18:26 ` Andrew Cooper
2017-02-14  8:55   ` Jan Beulich
2017-02-14 15:48     ` Andrew Cooper
2017-02-15  8:24       ` Jan Beulich
2017-02-14 17:33 ` Tim Deegan
2017-02-15  8:18   ` Jan Beulich
2017-02-15 11:21     ` Tim Deegan
2017-02-15 11:35       ` Jan Beulich
2017-02-16 11:14       ` Jan Beulich
2017-02-16 11:49         ` Jan Beulich
2017-02-17 10:40           ` Roger Pau Monne
2017-02-20 10:40           ` Tim Deegan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).