All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] Fixes to NMI shootdown
@ 2015-02-10 17:12 Andrew Cooper
  2015-02-10 17:12 ` [PATCH 1/3] x86/traps: Export the exception_table[] function pointer table to C Andrew Cooper
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Andrew Cooper @ 2015-02-10 17:12 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper

This series fixes an issue introduced by c/s 7dd3b06ff

On my testbox I can observe the cpu which doesn't get shot down to still be
executing after the crash kernel has finished running.

[   25.806178] sd 2:0:0:0: [sda] Synchronizing SCSI cache
[   25.812038] Restarting system.
[   25.815121] reboot: machine restart
(XEN) [  192.854548] vmx.c:3030:d1v0 Bad vmexit (reason 0x3)
(XEN) [  192.859861] domain_crash called from vmx.c:3031
(XEN) [  192.864900] Domain 1 (vcpu#0) crashed on cpu#1:
(XEN) [  192.869967] ----[ Xen-4.4.1  x86_64  debug=y  Not tainted ]----
(XEN) [  192.8
PXELINUX 3.70 2008-06-30  Copyright (C) 1994-2008 H. Peter Anvin
boot:
Booting from local disk...

where exit reason 3 is for receving an INIT.

Andrew Cooper (3):
  x86/traps: Export the exception_table[] function pointer table to C
  x86/traps: Use write_atomic() when updating potentially-live
    descriptors
  x86/nmi: Fix shootdown of pcpus running in VMX non-root mode

 xen/arch/x86/crash.c            |   59 ++++++++++++++++-----------------------
 xen/arch/x86/hvm/vmx/vmx.c      |    2 +-
 xen/arch/x86/x86_64/entry.S     |   14 ++--------
 xen/include/asm-x86/desc.h      |    3 +-
 xen/include/asm-x86/processor.h |   11 ++++++--
 5 files changed, 38 insertions(+), 51 deletions(-)

-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/3] x86/traps: Export the exception_table[] function pointer table to C
  2015-02-10 17:12 [PATCH 0/3] Fixes to NMI shootdown Andrew Cooper
@ 2015-02-10 17:12 ` Andrew Cooper
  2015-02-10 17:12 ` [PATCH 2/3] x86/traps: Use write_atomic() when updating potentially-live descriptors Andrew Cooper
  2015-02-10 17:12 ` [PATCH 3/3] x86/nmi: Fix shootdown of pcpus running in VMX non-root mode Andrew Cooper
  2 siblings, 0 replies; 9+ messages in thread
From: Andrew Cooper @ 2015-02-10 17:12 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Tim Deegan, Jan Beulich

and use it in preference to the direct call to do_nmi() in vmx.c

The value 'TRAP_last_reserved' was only used where 'TRAP_nr' would be more
appropriate, so is replaced.

No functional change

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Jan Beulich <JBeulich@suse.com>
CC: Tim Deegan <tim@xen.org>

---
Diffing the dissassembly confirms no change:

 @@ -236036,10 +236036,10 @@
 ffff82d0801e1a9d:      48 3d 00 02 00 00       cmp    $0x200,%rax
 ffff82d0801e1aa3:      0f 85 84 fe ff ff       jne    ffff82d0801e192d <vmx_vmexit_handler+0x10d>
 ffff82d0801e1aa9:      48 89 ef                mov    %rbp,%rdi
-ffff82d0801e1aac:      e8 4f c3 fa ff          callq  ffff82d08018de00 <do_nmi>
-ffff82d0801e1ab1:      e8 ea 7f 04 00          callq  ffff82d080229aa0 <enable_nmis>
-ffff82d0801e1ab6:      e9 72 fe ff ff          jmpq   ffff82d0801e192d <vmx_vmexit_handler+0x10d>
-ffff82d0801e1abb:      0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
+ffff82d0801e1aac:      ff 15 be 62 05 00       callq  *0x562be(%rip)
+ffff82d0801e1ab2:      e8 e9 7f 04 00          callq  ffff82d080229aa0 <enable_nmis>
+ffff82d0801e1ab7:      e9 71 fe ff ff          jmpq   ffff82d0801e192d <vmx_vmexit_handler+0x10d>
+ffff82d0801e1abc:      0f 1f 40 00             nopl   0x0(%rax)
 ffff82d0801e1ac0:      f6 83 c3 03 00 00 80    testb  $0x80,0x3c3(%rbx)
 ffff82d0801e1ac7:      0f 84 07 fe ff ff       je     ffff82d0801e18d4 <vmx_vmexit_handler+0xb4>
 ffff82d0801e1acd:      48 8b 83 08 04 00 00    mov    0x408(%rbx),%rax
---
 xen/arch/x86/hvm/vmx/vmx.c      |    2 +-
 xen/arch/x86/x86_64/entry.S     |    5 +++--
 xen/include/asm-x86/processor.h |    5 ++++-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 88b7821..357ef6c 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2701,7 +2701,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
              && ((intr_info & INTR_INFO_INTR_TYPE_MASK) ==
                  (X86_EVENTTYPE_NMI << 8)) )
         {
-            do_nmi(regs);
+            exception_table[TRAP_nmi](regs);
             enable_nmis();
         }
         break;
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index b3d6e32..062c690 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -728,9 +728,10 @@ ENTRY(exception_table)
         .quad do_alignment_check
         .quad do_machine_check
         .quad do_simd_coprocessor_error
-        .rept TRAP_last_reserved + 1 - ((. - exception_table) / 8)
+        .rept TRAP_nr - ((. - exception_table) / 8)
         .quad do_reserved_trap /* Architecturally reserved exceptions. */
         .endr
+        .size exception_table, . - exception_table
 
 ENTRY(hypercall_table)
         .quad do_set_trap_table     /*  0 */
@@ -857,7 +858,7 @@ autogen_stubs: /* Automatically generated stubs. */
         entrypoint 1b
 
         /* Reserved exceptions, heading towards do_reserved_trap(). */
-        .elseif vec == TRAP_copro_seg || vec == TRAP_spurious_int || (vec > TRAP_simd_error && vec <= TRAP_last_reserved)
+        .elseif vec == TRAP_copro_seg || vec == TRAP_spurious_int || (vec > TRAP_simd_error && vec < TRAP_nr)
 
 1:      test  $8,%spl        /* 64bit exception frames are 16 byte aligned, but the word */
         jz    2f             /* size is 8 bytes.  Check whether the processor gave us an */
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index 20eade6..2773ea8 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -114,7 +114,7 @@
 #define TRAP_machine_check    18
 #define TRAP_simd_error       19
 #define TRAP_virtualisation   20
-#define TRAP_last_reserved    31
+#define TRAP_nr               32
 
 /* Set for entry via SYSCALL. Informs return code to use SYSRETQ not IRETQ. */
 /* NB. Same as VGCF_in_syscall. No bits in common with any other TRAP_ defn. */
@@ -492,6 +492,9 @@ extern void mtrr_bp_init(void);
 
 void mcheck_init(struct cpuinfo_x86 *c, bool_t bsp);
 
+/* Dispatch table for exceptions */
+extern void (* const exception_table[TRAP_nr])(struct cpu_user_regs *regs);
+
 #define DECLARE_TRAP_HANDLER(_name)                    \
     void _name(void);                                  \
     void do_ ## _name(struct cpu_user_regs *regs)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/3] x86/traps: Use write_atomic() when updating potentially-live descriptors
  2015-02-10 17:12 [PATCH 0/3] Fixes to NMI shootdown Andrew Cooper
  2015-02-10 17:12 ` [PATCH 1/3] x86/traps: Export the exception_table[] function pointer table to C Andrew Cooper
@ 2015-02-10 17:12 ` Andrew Cooper
  2015-02-11 10:46   ` Jan Beulich
  2015-02-10 17:12 ` [PATCH 3/3] x86/nmi: Fix shootdown of pcpus running in VMX non-root mode Andrew Cooper
  2 siblings, 1 reply; 9+ messages in thread
From: Andrew Cooper @ 2015-02-10 17:12 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Tim Deegan, Jan Beulich

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Jan Beulich <JBeulich@suse.com>
CC: Tim Deegan <tim@xen.org>

---

atomic.h currently can't be included in either location because of dependency
issues.  As this is part of a bugfix series which needs backporting, I opted
to opencode write_atomic() rather than something more invasive to the header
files.
---
 xen/include/asm-x86/desc.h      |    3 ++-
 xen/include/asm-x86/processor.h |    5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/xen/include/asm-x86/desc.h b/xen/include/asm-x86/desc.h
index 20c47d2..30f1a7a 100644
--- a/xen/include/asm-x86/desc.h
+++ b/xen/include/asm-x86/desc.h
@@ -116,7 +116,8 @@ static inline void _write_gate_lower(volatile idt_entry_t *gate,
                                      const idt_entry_t *new)
 {
     ASSERT(gate->b == new->b);
-    gate->a = new->a;
+    /* TODO: untangle the #include hierachy and use write_atomic() here. */
+    asm volatile ("movq %1,%0" : "=m" (gate->a) : "r" (new->a));
 }
 
 #define _set_gate(gate_addr,type,dpl,addr)               \
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index 2773ea8..c703581 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -444,9 +444,12 @@ struct __packed __cacheline_aligned tss_struct {
  * descriptor table entry. */
 static always_inline void set_ist(idt_entry_t *idt, unsigned long ist)
 {
+    u64 new_a = (idt->a & ~(7UL << 32)) | (ist << 32);
+
     /* IST is a 3 bit field, 32 bits into the IDT entry. */
     ASSERT(ist <= IST_MAX);
-    idt->a = (idt->a & ~(7UL << 32)) | (ist << 32);
+    /* TODO: untangle the #include hierachy and use write_atomic() here. */
+    asm volatile ("movq %1,%0" : "=m" (idt->a) : "r" (new_a));
 }
 
 #define IDT_ENTRIES 256
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/3] x86/nmi: Fix shootdown of pcpus running in VMX non-root mode
  2015-02-10 17:12 [PATCH 0/3] Fixes to NMI shootdown Andrew Cooper
  2015-02-10 17:12 ` [PATCH 1/3] x86/traps: Export the exception_table[] function pointer table to C Andrew Cooper
  2015-02-10 17:12 ` [PATCH 2/3] x86/traps: Use write_atomic() when updating potentially-live descriptors Andrew Cooper
@ 2015-02-10 17:12 ` Andrew Cooper
  2015-02-11 11:02   ` Jan Beulich
  2 siblings, 1 reply; 9+ messages in thread
From: Andrew Cooper @ 2015-02-10 17:12 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Tim Deegan, Jan Beulich

c/s 7dd3b06ff "vmx: fix handling of NMI VMEXIT" fixed one issue but
inadvertently introduced a regression when it came to the NMI shootdown.  The
shootdown code worked by patching vector 2 in each IDT, but the introduced
direct call to do_nmi() bypassed this.

Instead of patching each IDT, take a different approach by updating the
existing dispatch table.  This allows for the removal of the remote IDT
patching and the removal of the nmi_crash() entry point.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Jan Beulich <JBeulich@suse.com>
CC: Tim Deegan <tim@xen.org>
---
 xen/arch/x86/crash.c            |   59 ++++++++++++++++-----------------------
 xen/arch/x86/x86_64/entry.S     |    9 ------
 xen/include/asm-x86/processor.h |    1 -
 3 files changed, 24 insertions(+), 45 deletions(-)

diff --git a/xen/arch/x86/crash.c b/xen/arch/x86/crash.c
index c0b83df..e175871 100644
--- a/xen/arch/x86/crash.c
+++ b/xen/arch/x86/crash.c
@@ -36,9 +36,11 @@ static unsigned int crashing_cpu;
 static DEFINE_PER_CPU_READ_MOSTLY(bool_t, crash_save_done);
 
 /* This becomes the NMI handler for non-crashing CPUs, when Xen is crashing. */
-void do_nmi_crash(struct cpu_user_regs *regs)
+static void noreturn do_nmi_crash(const struct cpu_user_regs *regs)
 {
-    int cpu = smp_processor_id();
+    unsigned int cpu = smp_processor_id();
+
+    stac();
 
     /* nmi_shootdown_cpus() should ensure that this assertion is correct. */
     ASSERT(cpu != crashing_cpu);
@@ -113,11 +115,10 @@ void do_nmi_crash(struct cpu_user_regs *regs)
         halt();
 }
 
-void nmi_crash(void);
 static void nmi_shootdown_cpus(void)
 {
     unsigned long msecs;
-    int i, cpu = smp_processor_id();
+    unsigned int cpu = smp_processor_id();
 
     disable_lapic_nmi_watchdog();
     local_irq_disable();
@@ -127,38 +128,26 @@ static void nmi_shootdown_cpus(void)
 
     cpumask_andnot(&waiting_to_crash, &cpu_online_map, cpumask_of(cpu));
 
-    /* Change NMI trap handlers.  Non-crashing pcpus get nmi_crash which
-     * invokes do_nmi_crash (above), which cause them to write state and
-     * fall into a loop.  The crashing pcpu gets the nop handler to
-     * cause it to return to this function ASAP.
+    /*
+     * Disable IST for MCEs to avoid stack corruption race conditions, and
+     * change the NMI hanlder to a nop to avoid deviation from this codepath.
      */
-    for ( i = 0; i < nr_cpu_ids; i++ )
-    {
-        if ( idt_tables[i] == NULL )
-            continue;
-
-        if ( i == cpu )
-        {
-            /*
-             * Disable the interrupt stack tables for this cpu's MCE and NMI 
-             * handlers, and alter the NMI handler to have no operation.  
-             * Disabling the stack tables prevents stack corruption race 
-             * conditions, while changing the handler helps prevent cascading 
-             * faults; we are certainly going to crash by this point.
-             *
-             * This update is safe from a security point of view, as this pcpu 
-             * is never going to try to sysret back to a PV vcpu.
-             */
-            _set_gate_lower(&idt_tables[i][TRAP_nmi],
-                            SYS_DESC_irq_gate, 0, &trap_nop);
-            set_ist(&idt_tables[i][TRAP_machine_check], IST_NONE);
-        }
-        else
-        {
-            /* Do not update stack table for other pcpus. */
-            _update_gate_addr_lower(&idt_tables[i][TRAP_nmi], &nmi_crash);
-        }
-    }
+    _set_gate_lower(&idt_tables[cpu][TRAP_nmi],
+                    SYS_DESC_irq_gate, 0, &trap_nop);
+    set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
+
+    /*
+     * Ideally would be:
+     *   exception_table[TRAP_nmi] = &do_nmi_crash;
+     *
+     * but the exception_table is read only.  Borrow and unused fixmap entry
+     * to construct a writable mapping.
+     */
+    set_fixmap(FIX_TBOOT_MAP_ADDRESS, __pa(&exception_table[TRAP_nmi]));
+    write_atomic((unsigned long *)
+                 (fix_to_virt(FIX_TBOOT_MAP_ADDRESS) +
+                  ((unsigned long)&exception_table[TRAP_nmi] & ~PAGE_MASK)),
+                 (unsigned long)&do_nmi_crash);
 
     /* Ensure the new callback function is set before sending out the NMI. */
     wmb();
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 062c690..2d25d57 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -669,15 +669,6 @@ handle_ist_exception:
         je    restore_all_guest
         jmp   compat_restore_all_guest
 
-ENTRY(nmi_crash)
-        pushq $0
-        movl $TRAP_nmi,4(%rsp)
-        /* Set AC to reduce chance of further SMAP faults */
-        SAVE_ALL STAC
-        movq %rsp,%rdi
-        callq do_nmi_crash /* Does not return */
-        ud2
-
 ENTRY(machine_check)
         pushq $0
         movl  $TRAP_machine_check,4(%rsp)
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index c703581..75c077c 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -530,7 +530,6 @@ DECLARE_TRAP_HANDLER(alignment_check);
 
 void trap_nop(void);
 void enable_nmis(void);
-void noreturn do_nmi_crash(struct cpu_user_regs *regs);
 void do_reserved_trap(struct cpu_user_regs *regs);
 
 void syscall_enter(void);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] x86/traps: Use write_atomic() when updating potentially-live descriptors
  2015-02-10 17:12 ` [PATCH 2/3] x86/traps: Use write_atomic() when updating potentially-live descriptors Andrew Cooper
@ 2015-02-11 10:46   ` Jan Beulich
  2015-02-11 15:06     ` [PATCH v2 2/3] x86/traps: Avoid interleaved writes " Andrew Cooper
  0 siblings, 1 reply; 9+ messages in thread
From: Jan Beulich @ 2015-02-11 10:46 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Tim Deegan, Xen-devel

>>> On 10.02.15 at 18:12, <andrew.cooper3@citrix.com> wrote:
> --- a/xen/include/asm-x86/desc.h
> +++ b/xen/include/asm-x86/desc.h
> @@ -116,7 +116,8 @@ static inline void _write_gate_lower(volatile idt_entry_t *gate,
>                                       const idt_entry_t *new)
>  {
>      ASSERT(gate->b == new->b);
> -    gate->a = new->a;
> +    /* TODO: untangle the #include hierachy and use write_atomic() here. */
> +    asm volatile ("movq %1,%0" : "=m" (gate->a) : "r" (new->a));

Considering the volatile modifier I don't think this is needed.

> --- a/xen/include/asm-x86/processor.h
> +++ b/xen/include/asm-x86/processor.h
> @@ -444,9 +444,12 @@ struct __packed __cacheline_aligned tss_struct {
>   * descriptor table entry. */
>  static always_inline void set_ist(idt_entry_t *idt, unsigned long ist)
>  {
> +    u64 new_a = (idt->a & ~(7UL << 32)) | (ist << 32);
> +
>      /* IST is a 3 bit field, 32 bits into the IDT entry. */
>      ASSERT(ist <= IST_MAX);
> -    idt->a = (idt->a & ~(7UL << 32)) | (ist << 32);
> +    /* TODO: untangle the #include hierachy and use write_atomic() here. */
> +    asm volatile ("movq %1,%0" : "=m" (idt->a) : "r" (new_a));

And I would then recommend using _write_gate_lower() here
instead of open coding anything.

Jan

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] x86/nmi: Fix shootdown of pcpus running in VMX non-root mode
  2015-02-10 17:12 ` [PATCH 3/3] x86/nmi: Fix shootdown of pcpus running in VMX non-root mode Andrew Cooper
@ 2015-02-11 11:02   ` Jan Beulich
  2015-02-11 12:13     ` Andrew Cooper
  0 siblings, 1 reply; 9+ messages in thread
From: Jan Beulich @ 2015-02-11 11:02 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Tim Deegan, Xen-devel

>>> On 10.02.15 at 18:12, <andrew.cooper3@citrix.com> wrote:
> @@ -127,38 +128,26 @@ static void nmi_shootdown_cpus(void)
>  
>      cpumask_andnot(&waiting_to_crash, &cpu_online_map, cpumask_of(cpu));
>  
> -    /* Change NMI trap handlers.  Non-crashing pcpus get nmi_crash which
> -     * invokes do_nmi_crash (above), which cause them to write state and
> -     * fall into a loop.  The crashing pcpu gets the nop handler to
> -     * cause it to return to this function ASAP.
> +    /*
> +     * Disable IST for MCEs to avoid stack corruption race conditions, and
> +     * change the NMI hanlder to a nop to avoid deviation from this codepath.

handler

>       */
> -    for ( i = 0; i < nr_cpu_ids; i++ )
> -    {
> -        if ( idt_tables[i] == NULL )
> -            continue;
> -
> -        if ( i == cpu )
> -        {
> -            /*
> -             * Disable the interrupt stack tables for this cpu's MCE and NMI 
> -             * handlers, and alter the NMI handler to have no operation.  
> -             * Disabling the stack tables prevents stack corruption race 
> -             * conditions, while changing the handler helps prevent cascading 
> -             * faults; we are certainly going to crash by this point.
> -             *
> -             * This update is safe from a security point of view, as this pcpu 
> -             * is never going to try to sysret back to a PV vcpu.
> -             */
> -            _set_gate_lower(&idt_tables[i][TRAP_nmi],
> -                            SYS_DESC_irq_gate, 0, &trap_nop);
> -            set_ist(&idt_tables[i][TRAP_machine_check], IST_NONE);
> -        }
> -        else
> -        {
> -            /* Do not update stack table for other pcpus. */
> -            _update_gate_addr_lower(&idt_tables[i][TRAP_nmi], &nmi_crash);
> -        }
> -    }
> +    _set_gate_lower(&idt_tables[cpu][TRAP_nmi],
> +                    SYS_DESC_irq_gate, 0, &trap_nop);
> +    set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
> +
> +    /*
> +     * Ideally would be:
> +     *   exception_table[TRAP_nmi] = &do_nmi_crash;
> +     *
> +     * but the exception_table is read only.  Borrow and unused fixmap entry

... an unused ...

> +     * to construct a writable mapping.
> +     */
> +    set_fixmap(FIX_TBOOT_MAP_ADDRESS, __pa(&exception_table[TRAP_nmi]));
> +    write_atomic((unsigned long *)
> +                 (fix_to_virt(FIX_TBOOT_MAP_ADDRESS) +
> +                  ((unsigned long)&exception_table[TRAP_nmi] & ~PAGE_MASK)),
> +                 (unsigned long)&do_nmi_crash);

By converting the first cast to (void **) or even
(typeof(do_nmi_crash) **) it would seem possible to drop the last
cast altogether. While at it you could also drop the unnecessary &.

Jan

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] x86/nmi: Fix shootdown of pcpus running in VMX non-root mode
  2015-02-11 11:02   ` Jan Beulich
@ 2015-02-11 12:13     ` Andrew Cooper
  2015-02-11 13:22       ` Jan Beulich
  0 siblings, 1 reply; 9+ messages in thread
From: Andrew Cooper @ 2015-02-11 12:13 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Tim Deegan, Xen-devel

On 11/02/15 11:02, Jan Beulich wrote:
>>>> On 10.02.15 at 18:12, <andrew.cooper3@citrix.com> wrote:
>> @@ -127,38 +128,26 @@ static void nmi_shootdown_cpus(void)
>>  
>>      cpumask_andnot(&waiting_to_crash, &cpu_online_map, cpumask_of(cpu));
>>  
>> -    /* Change NMI trap handlers.  Non-crashing pcpus get nmi_crash which
>> -     * invokes do_nmi_crash (above), which cause them to write state and
>> -     * fall into a loop.  The crashing pcpu gets the nop handler to
>> -     * cause it to return to this function ASAP.
>> +    /*
>> +     * Disable IST for MCEs to avoid stack corruption race conditions, and
>> +     * change the NMI hanlder to a nop to avoid deviation from this codepath.
> handler
>
>>       */
>> -    for ( i = 0; i < nr_cpu_ids; i++ )
>> -    {
>> -        if ( idt_tables[i] == NULL )
>> -            continue;
>> -
>> -        if ( i == cpu )
>> -        {
>> -            /*
>> -             * Disable the interrupt stack tables for this cpu's MCE and NMI 
>> -             * handlers, and alter the NMI handler to have no operation.  
>> -             * Disabling the stack tables prevents stack corruption race 
>> -             * conditions, while changing the handler helps prevent cascading 
>> -             * faults; we are certainly going to crash by this point.
>> -             *
>> -             * This update is safe from a security point of view, as this pcpu 
>> -             * is never going to try to sysret back to a PV vcpu.
>> -             */
>> -            _set_gate_lower(&idt_tables[i][TRAP_nmi],
>> -                            SYS_DESC_irq_gate, 0, &trap_nop);
>> -            set_ist(&idt_tables[i][TRAP_machine_check], IST_NONE);
>> -        }
>> -        else
>> -        {
>> -            /* Do not update stack table for other pcpus. */
>> -            _update_gate_addr_lower(&idt_tables[i][TRAP_nmi], &nmi_crash);
>> -        }
>> -    }
>> +    _set_gate_lower(&idt_tables[cpu][TRAP_nmi],
>> +                    SYS_DESC_irq_gate, 0, &trap_nop);
>> +    set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
>> +
>> +    /*
>> +     * Ideally would be:
>> +     *   exception_table[TRAP_nmi] = &do_nmi_crash;
>> +     *
>> +     * but the exception_table is read only.  Borrow and unused fixmap entry
> ... an unused ...
>
>> +     * to construct a writable mapping.
>> +     */
>> +    set_fixmap(FIX_TBOOT_MAP_ADDRESS, __pa(&exception_table[TRAP_nmi]));
>> +    write_atomic((unsigned long *)
>> +                 (fix_to_virt(FIX_TBOOT_MAP_ADDRESS) +
>> +                  ((unsigned long)&exception_table[TRAP_nmi] & ~PAGE_MASK)),
>> +                 (unsigned long)&do_nmi_crash);
> By converting the first cast to (void **) or even
> (typeof(do_nmi_crash) **) it would seem possible to drop the last
> cast altogether. While at it you could also drop the unnecessary &.

None of these casts can be dropped, because write_atomic() uses explicit
uint??_t casts.  Gcc objects because of -Werror=pointer-to-int-cast.

~Andrew

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] x86/nmi: Fix shootdown of pcpus running in VMX non-root mode
  2015-02-11 12:13     ` Andrew Cooper
@ 2015-02-11 13:22       ` Jan Beulich
  0 siblings, 0 replies; 9+ messages in thread
From: Jan Beulich @ 2015-02-11 13:22 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Tim Deegan, Xen-devel

>>> On 11.02.15 at 13:13, <andrew.cooper3@citrix.com> wrote:
> On 11/02/15 11:02, Jan Beulich wrote:
>>>>> On 10.02.15 at 18:12, <andrew.cooper3@citrix.com> wrote:
>>> +     * to construct a writable mapping.
>>> +     */
>>> +    set_fixmap(FIX_TBOOT_MAP_ADDRESS, __pa(&exception_table[TRAP_nmi]));
>>> +    write_atomic((unsigned long *)
>>> +                 (fix_to_virt(FIX_TBOOT_MAP_ADDRESS) +
>>> +                  ((unsigned long)&exception_table[TRAP_nmi] & ~PAGE_MASK)),
>>> +                 (unsigned long)&do_nmi_crash);
>> By converting the first cast to (void **) or even
>> (typeof(do_nmi_crash) **) it would seem possible to drop the last
>> cast altogether. While at it you could also drop the unnecessary &.
> 
> None of these casts can be dropped, because write_atomic() uses explicit
> uint??_t casts.  Gcc objects because of -Werror=pointer-to-int-cast.

Oh, indeed (and all that on only the unused cases of the switch).
In that case no need to re-submit, fixing the typos can be done
while committing.

Jan

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v2 2/3] x86/traps: Avoid interleaved writes when updating potentially-live descriptors
  2015-02-11 10:46   ` Jan Beulich
@ 2015-02-11 15:06     ` Andrew Cooper
  0 siblings, 0 replies; 9+ messages in thread
From: Andrew Cooper @ 2015-02-11 15:06 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Tim Deegan, Jan Beulich

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Jan Beulich <JBeulich@suse.com>
CC: Tim Deegan <tim@xen.org>

---

v2:
 * Use _write_gate_lower() instead of opencoding write_atomic()
 * Drop write_atomic() in _write_gate_lower().  It doesn't appear to make a
   practical difference (although certainly does cause a difference to the
   register scheduling).
---
 xen/include/asm-x86/processor.h |    5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index 2773ea8..c5c647a 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -444,9 +444,12 @@ struct __packed __cacheline_aligned tss_struct {
  * descriptor table entry. */
 static always_inline void set_ist(idt_entry_t *idt, unsigned long ist)
 {
+    idt_entry_t new = *idt;
+
     /* IST is a 3 bit field, 32 bits into the IDT entry. */
     ASSERT(ist <= IST_MAX);
-    idt->a = (idt->a & ~(7UL << 32)) | (ist << 32);
+    new.a = (idt->a & ~(7UL << 32)) | (ist << 32);
+    _write_gate_lower(idt, &new);
 }
 
 #define IDT_ENTRIES 256
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-02-11 15:06 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-02-10 17:12 [PATCH 0/3] Fixes to NMI shootdown Andrew Cooper
2015-02-10 17:12 ` [PATCH 1/3] x86/traps: Export the exception_table[] function pointer table to C Andrew Cooper
2015-02-10 17:12 ` [PATCH 2/3] x86/traps: Use write_atomic() when updating potentially-live descriptors Andrew Cooper
2015-02-11 10:46   ` Jan Beulich
2015-02-11 15:06     ` [PATCH v2 2/3] x86/traps: Avoid interleaved writes " Andrew Cooper
2015-02-10 17:12 ` [PATCH 3/3] x86/nmi: Fix shootdown of pcpus running in VMX non-root mode Andrew Cooper
2015-02-11 11:02   ` Jan Beulich
2015-02-11 12:13     ` Andrew Cooper
2015-02-11 13:22       ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.