kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC/PATCH 0/3] x86/signal/64: A better attempt at SS cleanup
@ 2015-08-13 20:18 Andy Lutomirski
  2015-08-13 20:18 ` [RFC/PATCH 1/3] x86/kvm: Rename VMX's segment access rights defines Andy Lutomirski
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Andy Lutomirski @ 2015-08-13 20:18 UTC (permalink / raw)
  To: Linus Torvalds, Stas Sergeev, x86
  Cc: Cyrill Gorcunov, Pavel Emelyanov, kvm list, Andy Lutomirski

This is almost certainly not 4.2 material.

This applies to -linux before the sigcontext revert.  If people like
these, I'll rebase them on top of the revert and I'll do something about
the UAPI build issue (if necessary -- renaming __pad0 may actually be
fine).

This should allow new programs to opt in to sane SS handling.  It makes
signal delivery reliable in the face of weird SS values (it was
unreliable before Linux 4.1, and it's unreliable again after the
revert).  Unlike the previous try, it should *not* break DOSEMU's hack
to figure out what SS was at the time of signal delivery, at least in
cases where DOSEMU wouldn't crash outright due to completely bogus SS
values.  Also unlike the previous try, it won't crash DOSEMU when DOSEMU
invalidates the old SS from a signal handler but doesn't know to update
the sigcontext.

The sigreturn_64 selftest is updated to use the new flag.  It passes.
For a real version of these patches, I'll add more tests to make sure
that we get the weird corner cases right.  (There are probably cases
where this isn't quite right on Xen, too, but we might not care.)

Andy Lutomirski (3):
  x86/kvm: Rename VMX's segment access rights defines
  x86/signal/64: Try to preserve hardware SS across 64-bit signal
    delivery
  x86/signal/64: Add explicit controls for sigcontext SS handling

 arch/x86/include/asm/desc_defs.h        | 23 +++++++++++
 arch/x86/include/asm/sighandling.h      |  1 -
 arch/x86/include/asm/vmx.h              | 46 +++++++++++-----------
 arch/x86/include/uapi/asm/ucontext.h    | 26 +++++++++---
 arch/x86/kernel/signal.c                | 70 ++++++++++++++++++++++++++++-----
 arch/x86/kvm/vmx.c                      | 14 +++----
 tools/testing/selftests/x86/sigreturn.c | 26 ++++++++++++
 7 files changed, 160 insertions(+), 46 deletions(-)

-- 
2.4.3


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [RFC/PATCH 1/3] x86/kvm: Rename VMX's segment access rights defines
  2015-08-13 20:18 [RFC/PATCH 0/3] x86/signal/64: A better attempt at SS cleanup Andy Lutomirski
@ 2015-08-13 20:18 ` Andy Lutomirski
  2015-08-14 22:47   ` Paolo Bonzini
  2015-08-13 20:18 ` [RFC/PATCH 2/3] x86/signal/64: Try to preserve hardware SS across 64-bit signal delivery Andy Lutomirski
  2015-08-13 20:18 ` [RFC/PATCH 3/3] x86/signal/64: Add explicit controls for sigcontext SS handling Andy Lutomirski
  2 siblings, 1 reply; 13+ messages in thread
From: Andy Lutomirski @ 2015-08-13 20:18 UTC (permalink / raw)
  To: Linus Torvalds, Stas Sergeev, x86
  Cc: Cyrill Gorcunov, Pavel Emelyanov, kvm list, Andy Lutomirski

VMX encodes access rights differently from LAR, and the latter is
most likely what x86 people think of when they think of "access
rights".

Rename them to avoid confusion.

Cc: kvm@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/include/asm/vmx.h | 46 +++++++++++++++++++++++-----------------------
 arch/x86/kvm/vmx.c         | 14 +++++++-------
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index da772edd19ab..78e243ae1786 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -367,29 +367,29 @@ enum vmcs_field {
 #define TYPE_PHYSICAL_APIC_EVENT        (10 << 12)
 #define TYPE_PHYSICAL_APIC_INST         (15 << 12)
 
-/* segment AR */
-#define SEGMENT_AR_L_MASK (1 << 13)
-
-#define AR_TYPE_ACCESSES_MASK 1
-#define AR_TYPE_READABLE_MASK (1 << 1)
-#define AR_TYPE_WRITEABLE_MASK (1 << 2)
-#define AR_TYPE_CODE_MASK (1 << 3)
-#define AR_TYPE_MASK 0x0f
-#define AR_TYPE_BUSY_64_TSS 11
-#define AR_TYPE_BUSY_32_TSS 11
-#define AR_TYPE_BUSY_16_TSS 3
-#define AR_TYPE_LDT 2
-
-#define AR_UNUSABLE_MASK (1 << 16)
-#define AR_S_MASK (1 << 4)
-#define AR_P_MASK (1 << 7)
-#define AR_L_MASK (1 << 13)
-#define AR_DB_MASK (1 << 14)
-#define AR_G_MASK (1 << 15)
-#define AR_DPL_SHIFT 5
-#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3)
-
-#define AR_RESERVD_MASK 0xfffe0f00
+/* segment AR in VMCS -- these are different from what LAR reports */
+#define VMX_SEGMENT_AR_L_MASK (1 << 13)
+
+#define VMX_AR_TYPE_ACCESSES_MASK 1
+#define VMX_AR_TYPE_READABLE_MASK (1 << 1)
+#define VMX_AR_TYPE_WRITEABLE_MASK (1 << 2)
+#define VMX_AR_TYPE_CODE_MASK (1 << 3)
+#define VMX_AR_TYPE_MASK 0x0f
+#define VMX_AR_TYPE_BUSY_64_TSS 11
+#define VMX_AR_TYPE_BUSY_32_TSS 11
+#define VMX_AR_TYPE_BUSY_16_TSS 3
+#define VMX_AR_TYPE_LDT 2
+
+#define VMX_AR_UNUSABLE_MASK (1 << 16)
+#define VMX_AR_S_MASK (1 << 4)
+#define VMX_AR_P_MASK (1 << 7)
+#define VMX_AR_L_MASK (1 << 13)
+#define VMX_AR_DB_MASK (1 << 14)
+#define VMX_AR_G_MASK (1 << 15)
+#define VMX_AR_DPL_SHIFT 5
+#define VMX_AR_DPL(ar) (((ar) >> VMX_AR_DPL_SHIFT) & 3)
+
+#define VMX_AR_RESERVD_MASK 0xfffe0f00
 
 #define TSS_PRIVATE_MEMSLOT			(KVM_USER_MEM_SLOTS + 0)
 #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	(KVM_USER_MEM_SLOTS + 1)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e856dd566f4c..d7ff79a5135b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3423,12 +3423,12 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
 	vmx_segment_cache_clear(to_vmx(vcpu));
 
 	guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
-	if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
+	if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
 		pr_debug_ratelimited("%s: tss fixup for long mode. \n",
 				     __func__);
 		vmcs_write32(GUEST_TR_AR_BYTES,
-			     (guest_tr_ar & ~AR_TYPE_MASK)
-			     | AR_TYPE_BUSY_64_TSS);
+			     (guest_tr_ar & ~VMX_AR_TYPE_MASK)
+			     | VMX_AR_TYPE_BUSY_64_TSS);
 	}
 	vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
 }
@@ -3719,7 +3719,7 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu)
 		return 0;
 	else {
 		int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
-		return AR_DPL(ar);
+		return VMX_AR_DPL(ar);
 	}
 }
 
@@ -3847,11 +3847,11 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
 
 	if (cs.unusable)
 		return false;
-	if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
+	if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
 		return false;
 	if (!cs.s)
 		return false;
-	if (cs.type & AR_TYPE_WRITEABLE_MASK) {
+	if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
 		if (cs.dpl > cs_rpl)
 			return false;
 	} else {
@@ -3901,7 +3901,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
 		return false;
 	if (!var.present)
 		return false;
-	if (~var.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK)) {
+	if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
 		if (var.dpl < rpl) /* DPL < RPL */
 			return false;
 	}
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC/PATCH 2/3] x86/signal/64: Try to preserve hardware SS across 64-bit signal delivery
  2015-08-13 20:18 [RFC/PATCH 0/3] x86/signal/64: A better attempt at SS cleanup Andy Lutomirski
  2015-08-13 20:18 ` [RFC/PATCH 1/3] x86/kvm: Rename VMX's segment access rights defines Andy Lutomirski
@ 2015-08-13 20:18 ` Andy Lutomirski
  2015-08-13 20:25   ` Andy Lutomirski
  2015-08-13 20:18 ` [RFC/PATCH 3/3] x86/signal/64: Add explicit controls for sigcontext SS handling Andy Lutomirski
  2 siblings, 1 reply; 13+ messages in thread
From: Andy Lutomirski @ 2015-08-13 20:18 UTC (permalink / raw)
  To: Linus Torvalds, Stas Sergeev, x86
  Cc: Cyrill Gorcunov, Pavel Emelyanov, kvm list, Andy Lutomirski

Linux used to have nearly useless SS handling for 64-bit signals.  Signal
delivery to a 64-bit process would preserve the SS selector, but the
selector wasn't saved in sigcontext.  Sigreturn would then clobber SS.
If the signal being delivered was due to a bad SS, signal delivery would
fail and the task would be killed.

As of Linux 4.1, it's fixed: signal delivery sets up a valid SS in the
hardware SS selector, saves the old SS in the sigcontext, and restores it
properly in sigreturn.

DOSEMU had a curious workaround for the old behavior: it saved the
hardware SS selector it was given during signal delivery and fudged
RIP and CS so that sigreturn would return to a trampoline that
restored the old RIP, CS, and, importantly, SS.

The upshot is that the change in sigcontext had no effect on DOSEMU
(DOSEMU doesn't care what was in sigcontext, and the fact that the
old SS is presented to the trampoline in new kernels is irrelevant
because the trampoline uses long mode), but the change in signal
delivery caused DOSEMU's workaround to restore __USER_DS instead of
the correct pre-signal SS value.

Do our best to work around it: explicitly check whether the old SS
is usable and leave it alone during signal delivery if it is.
Sigreturn is unchanged.

Reported-by: Stas Sergeev <stsp@list.ru>
Fixes: c6f2062935c8 ("x86/signal/64: Fix SS handling for signals delivered to 64-bit programs")
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/include/asm/desc_defs.h | 23 +++++++++++++++++++++++
 arch/x86/kernel/signal.c         | 29 +++++++++++++++++++++++++++--
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index 278441f39856..00971705a16d 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -98,4 +98,27 @@ struct desc_ptr {
 
 #endif /* !__ASSEMBLY__ */
 
+/* Access rights as returned by LAR */
+#define AR_TYPE_RODATA		(0 * (1 << 9))
+#define AR_TYPE_RWDATA		(1 * (1 << 9))
+#define AR_TYPE_RODATA_EXPDOWN	(2 * (1 << 9))
+#define AR_TYPE_RWDATA_EXPDOWN	(3 * (1 << 9))
+#define AR_TYPE_XOCODE		(4 * (1 << 9))
+#define AR_TYPE_XRCODE		(5 * (1 << 9))
+#define AR_TYPE_XOCODE_CONF	(6 * (1 << 9))
+#define AR_TYPE_XRCODE_CONF	(7 * (1 << 9))
+#define AR_TYPE_MASK		(7 * (1 << 9))
+
+#define AR_DPL0			(0 * (1 << 13))
+#define AR_DPL3			(3 * (1 << 13))
+#define AR_DPL_MASK		(3 * (1 << 13))
+
+#define AR_A			(1 << 8)	/* A means "accessed" */
+#define AR_S			(1 << 12)	/* S means "not system" */
+#define AR_P			(1 << 15)	/* P means "present" */
+#define AR_AVL			(1 << 20)	/* AVL does nothing */
+#define AR_L			(1 << 21)	/* L means "long mode" */
+#define AR_DB			(1 << 22)	/* D or B, depending on type */
+#define AR_G			(1 << 23)	/* G means "limit in pages" */
+
 #endif /* _ASM_X86_DESC_DEFS_H */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 206996c1669d..784af1e49fc1 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -460,10 +460,35 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 	 * SS descriptor, but we do need SS to be valid.  It's possible
 	 * that the old SS is entirely bogus -- this can happen if the
 	 * signal we're trying to deliver is #GP or #SS caused by a bad
-	 * SS value.
+	 * SS value.  We also have a compatbility issue here: DOSEMU
+	 * relies on the contents of the SS register indicating the
+	 * SS value at the time of the signal, even though that code in
+	 * DOSEMU predates sigreturn's ability to restore SS.  (DOSEMU
+	 * avoids relying on sigreturn to restore SS; instead it uses
+	 * a trampoline.)  So we do our best: if the old SS was valid,
+	 * we keep it.  Otherwise we replace it.
 	 */
 	regs->cs = __USER_CS;
-	regs->ss = __USER_DS;
+
+	if (unlikely(regs->ss != __USER_DS)) {
+		u32 ar;
+		asm ("lar %[old_ss], %[ar]\n\t"
+		     "jz 1f\n\t"
+		     "xorl %[ar], %[ar]\n\t"	/* If invalid, set ar = 0 */
+		     "1:"
+		     : [ar] "=r" (ar)
+		     : [old_ss] "rm" ((u16)regs->ss));
+
+		/*
+		 * For a valid 64-bit user context, we need DPL 3, type
+		 * read-write data or read-write exp-down data, and S and P
+		 * set.
+		 */
+		ar &= AR_DPL_MASK | AR_S | AR_P | AR_TYPE_MASK;
+		if (ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA) &&
+		    ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN))
+			regs->ss = __USER_DS;
+	}
 
 	return 0;
 }
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC/PATCH 3/3] x86/signal/64: Add explicit controls for sigcontext SS handling
  2015-08-13 20:18 [RFC/PATCH 0/3] x86/signal/64: A better attempt at SS cleanup Andy Lutomirski
  2015-08-13 20:18 ` [RFC/PATCH 1/3] x86/kvm: Rename VMX's segment access rights defines Andy Lutomirski
  2015-08-13 20:18 ` [RFC/PATCH 2/3] x86/signal/64: Try to preserve hardware SS across 64-bit signal delivery Andy Lutomirski
@ 2015-08-13 20:18 ` Andy Lutomirski
  2015-08-14 20:55   ` Cyrill Gorcunov
  2 siblings, 1 reply; 13+ messages in thread
From: Andy Lutomirski @ 2015-08-13 20:18 UTC (permalink / raw)
  To: Linus Torvalds, Stas Sergeev, x86
  Cc: Cyrill Gorcunov, Pavel Emelyanov, kvm list, Andy Lutomirski

This adds two new uc_flags flags.  UC_SAVED_SS will be set for all
64-bit signals (including x32).  It indicates that the saved SS field
is valid and that the kernel understands UC_RESTORE_SS.

The kernel will *not* set UC_RESTORE_SS.  User signal handlers can
set UC_RESTORE_SS themselves to indicate that sigreturn should
restore SS from the sigcontext.

64-bit programs that use segmentation are encouraged to check
UC_SAVED_SS and set UC_RESTORE_SS in their signal handlers.  This is
the only straightforward way to cause sigreturn to restore SS.  (The
only non-test program that I know of that uses segmentation in a
64-bit binary is DOSEMU, and DOSEMU currently uses a nasty
trampoline to work around the lack of this mechanism in old kernels.
It could detect UC_RESTORE_SS and use it to avoid needing a
trampoline.

Cc: Stas Sergeev <stsp@list.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/include/asm/sighandling.h      |  1 -
 arch/x86/include/uapi/asm/ucontext.h    | 26 +++++++++++++++++----
 arch/x86/kernel/signal.c                | 41 ++++++++++++++++++++++++++-------
 tools/testing/selftests/x86/sigreturn.c | 26 +++++++++++++++++++++
 4 files changed, 80 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index 89db46752a8f..452c88b8ad06 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -13,7 +13,6 @@
 			 X86_EFLAGS_CF | X86_EFLAGS_RF)
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
 int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 		     struct pt_regs *regs, unsigned long mask);
 
diff --git a/arch/x86/include/uapi/asm/ucontext.h b/arch/x86/include/uapi/asm/ucontext.h
index b7c29c8017f2..964bc3b46ff3 100644
--- a/arch/x86/include/uapi/asm/ucontext.h
+++ b/arch/x86/include/uapi/asm/ucontext.h
@@ -1,11 +1,27 @@
 #ifndef _ASM_X86_UCONTEXT_H
 #define _ASM_X86_UCONTEXT_H
 
-#define UC_FP_XSTATE	0x1	/* indicates the presence of extended state
-				 * information in the memory layout pointed
-				 * by the fpstate pointer in the ucontext's
-				 * sigcontext struct (uc_mcontext).
-				 */
+/*
+ * indicates the presence of extended state
+ * information in the memory layout pointed
+ * by the fpstate pointer in the ucontext's
+ * sigcontext struct (uc_mcontext).
+ */
+#define UC_FP_XSTATE	0x1
+
+#ifdef __x86_64__
+/*
+ * UC_SAVED_SS will be set when delivering 64-bit or x32 signals on
+ * kernels that save SS in the sigcontext.  Kernels that set UC_SAVED_SS
+ * allow signal handlers to set UC_RESTORE_SS; if UC_RESTORE_SS is set,
+ * then sigreturn will restore SS.
+ *
+ * For compatibility with old programs, the kernel will *not* set
+ * UC_RESTORE_SS when delivering signals.
+ */
+#define UC_SAVED_SS	0x2
+#define UC_RESTORE_SS	0x4
+#endif
 
 #include <asm-generic/ucontext.h>
 
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 784af1e49fc1..746250e9bce1 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -61,7 +61,9 @@
 	regs->seg = GET_SEG(seg) | 3;			\
 } while (0)
 
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
+static int restore_sigcontext(struct pt_regs *regs,
+			      struct sigcontext __user *sc,
+			      unsigned long uc_flags)
 {
 	void __user *buf;
 	unsigned int tmpflags;
@@ -94,7 +96,19 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
 #endif /* CONFIG_X86_64 */
 
 		COPY_SEG_CPL3(cs);
+
+#ifdef CONFIG_X86_64
+		/*
+		 * For the 64-bit ABI, we only restore SS if UC_RESTORE_SS
+		 * is set.  Otherwise we rely on the fact that regs->ss
+		 * is already set to __USER_DS by the SYSCALL entry code.
+		 */
+		if (uc_flags & UC_RESTORE_SS)
+			COPY_SEG_CPL3(ss);
+#else
+		/* For the 32-bit ABI, we always restore SS. */
 		COPY_SEG_CPL3(ss);
+#endif
 
 		get_user_ex(tmpflags, &sc->flags);
 		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
@@ -336,6 +350,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 	void __user *restorer;
 	int err = 0;
 	void __user *fpstate = NULL;
+	unsigned long flags = 0;
 
 	frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate);
 
@@ -349,9 +364,12 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 
 		/* Create the ucontext.  */
 		if (cpu_has_xsave)
-			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
-		else
-			put_user_ex(0, &frame->uc.uc_flags);
+			flags = UC_FP_XSTATE;
+#ifdef CONFIG_X86_64
+		flags |= UC_SAVED_SS;
+#endif
+		put_user_ex(flags, &frame->uc.uc_flags);
+
 		put_user_ex(0, &frame->uc.uc_link);
 		save_altstack_ex(&frame->uc.uc_stack, regs->sp);
 
@@ -517,9 +535,10 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
 	put_user_try {
 		/* Create the ucontext.  */
 		if (cpu_has_xsave)
-			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
+			put_user_ex(UC_FP_XSTATE | UC_SAVED_SS,
+				    &frame->uc.uc_flags);
 		else
-			put_user_ex(0, &frame->uc.uc_flags);
+			put_user_ex(UC_SAVED_SS, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
 		compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
 		put_user_ex(0, &frame->uc.uc__pad0);
@@ -597,16 +616,19 @@ asmlinkage long sys_rt_sigreturn(void)
 	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe __user *frame;
 	sigset_t set;
+	unsigned long uc_flags;
 
 	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
+	if (__get_user(uc_flags, &frame->uc.uc_flags))
+		goto badframe;
 
 	set_current_blocked(&set);
 
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
 		goto badframe;
 
 	if (restore_altstack(&frame->uc.uc_stack))
@@ -810,6 +832,7 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
 	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe_x32 __user *frame;
 	sigset_t set;
+	unsigned long uc_flags;
 
 	frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
 
@@ -817,10 +840,12 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
 		goto badframe;
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
+	if (__get_user(uc_flags, &frame->uc.uc_flags))
+		goto badframe;
 
 	set_current_blocked(&set);
 
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
 		goto badframe;
 
 	if (compat_restore_altstack(&frame->uc.uc_stack))
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index b5aa1bab7416..8f0176b91854 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -55,6 +55,24 @@
 #include <sys/user.h>
 
 /*
+ * Copies from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
+ * headers.
+ */
+#ifdef __x86_64__
+/*
+ * UC_SAVED_SS will be set when delivering 64-bit or x32 signals on
+ * kernels that save SS in the sigcontext.  Kernels that set UC_SAVED_SS
+ * allow signal handlers to set UC_RESTORE_SS; if UC_RESTORE_SS is set,
+ * then sigreturn will restore SS.
+ *
+ * For compatibility with old programs, the kernel will *not* set
+ * UC_RESTORE_SS when delivering signals.
+ */
+#define UC_SAVED_SS	0x2
+#define UC_RESTORE_SS	0x4
+#endif
+
+/*
  * In principle, this test can run on Linux emulation layers (e.g.
  * Illumos "LX branded zones").  Solaris-based kernels reserve LDT
  * entries 0-5 for their own internal purposes, so start our LDT
@@ -330,6 +348,10 @@ static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
 	memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
 	requested_regs[REG_AX] = *ssptr(ctx);	/* The asm code does this. */
 
+#ifdef __x86_64__
+	ctx->uc_flags |= UC_RESTORE_SS;
+#endif
+
 	return;
 }
 
@@ -358,6 +380,10 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
 	memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
 	memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
 
+#ifdef __x86_64__
+	ctx->uc_flags |= UC_RESTORE_SS;
+#endif
+
 	sig_trapped = sig;
 }
 
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 2/3] x86/signal/64: Try to preserve hardware SS across 64-bit signal delivery
  2015-08-13 20:18 ` [RFC/PATCH 2/3] x86/signal/64: Try to preserve hardware SS across 64-bit signal delivery Andy Lutomirski
@ 2015-08-13 20:25   ` Andy Lutomirski
  2015-08-13 21:26     ` Andy Lutomirski
  0 siblings, 1 reply; 13+ messages in thread
From: Andy Lutomirski @ 2015-08-13 20:25 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Linus Torvalds, Stas Sergeev, X86 ML, Cyrill Gorcunov,
	Pavel Emelyanov, kvm list

On Thu, Aug 13, 2015 at 1:18 PM, Andy Lutomirski <luto@kernel.org> wrote:
> Linux used to have nearly useless SS handling for 64-bit signals.  Signal
> delivery to a 64-bit process would preserve the SS selector, but the
> selector wasn't saved in sigcontext.  Sigreturn would then clobber SS.
> If the signal being delivered was due to a bad SS, signal delivery would
> fail and the task would be killed.
>
> As of Linux 4.1, it's fixed: signal delivery sets up a valid SS in the
> hardware SS selector, saves the old SS in the sigcontext, and restores it
> properly in sigreturn.
>
> DOSEMU had a curious workaround for the old behavior: it saved the
> hardware SS selector it was given during signal delivery and fudged
> RIP and CS so that sigreturn would return to a trampoline that
> restored the old RIP, CS, and, importantly, SS.
>
> The upshot is that the change in sigcontext had no effect on DOSEMU
> (DOSEMU doesn't care what was in sigcontext, and the fact that the
> old SS is presented to the trampoline in new kernels is irrelevant
> because the trampoline uses long mode), but the change in signal
> delivery caused DOSEMU's workaround to restore __USER_DS instead of
> the correct pre-signal SS value.
>
> Do our best to work around it: explicitly check whether the old SS
> is usable and leave it alone during signal delivery if it is.
> Sigreturn is unchanged.
>
> Reported-by: Stas Sergeev <stsp@list.ru>
> Fixes: c6f2062935c8 ("x86/signal/64: Fix SS handling for signals delivered to 64-bit programs")
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---

> +               asm ("lar %[old_ss], %[ar]\n\t"
> +                    "jz 1f\n\t"
> +                    "xorl %[ar], %[ar]\n\t"    /* If invalid, set ar = 0 */
> +                    "1:"
> +                    : [ar] "=r" (ar)
> +                    : [old_ss] "rm" ((u16)regs->ss));
> +

Now that I sent this...

I should learn to think very carefully before doubting Linus'
off-the-cuff intuition about the x86 instruction set.  This can use
VERW after all, as long as it's careful to set RPL==3.

For purposes of testing DOSEMU testing, the version I sent should be
fine.  For purposes of review, please pretend that I was sensible and
used VERW instead of LAR.  This also means that the VMX fixlet is
optional, but I still think it's a good idea.

--Andy

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 2/3] x86/signal/64: Try to preserve hardware SS across 64-bit signal delivery
  2015-08-13 20:25   ` Andy Lutomirski
@ 2015-08-13 21:26     ` Andy Lutomirski
  2015-08-13 21:41       ` Linus Torvalds
  0 siblings, 1 reply; 13+ messages in thread
From: Andy Lutomirski @ 2015-08-13 21:26 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Linus Torvalds, Stas Sergeev, X86 ML, Cyrill Gorcunov,
	Pavel Emelyanov, kvm list

On Thu, Aug 13, 2015 at 1:25 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Thu, Aug 13, 2015 at 1:18 PM, Andy Lutomirski <luto@kernel.org> wrote:
>> Linux used to have nearly useless SS handling for 64-bit signals.  Signal
>> delivery to a 64-bit process would preserve the SS selector, but the
>> selector wasn't saved in sigcontext.  Sigreturn would then clobber SS.
>> If the signal being delivered was due to a bad SS, signal delivery would
>> fail and the task would be killed.
>>
>> As of Linux 4.1, it's fixed: signal delivery sets up a valid SS in the
>> hardware SS selector, saves the old SS in the sigcontext, and restores it
>> properly in sigreturn.
>>
>> DOSEMU had a curious workaround for the old behavior: it saved the
>> hardware SS selector it was given during signal delivery and fudged
>> RIP and CS so that sigreturn would return to a trampoline that
>> restored the old RIP, CS, and, importantly, SS.
>>
>> The upshot is that the change in sigcontext had no effect on DOSEMU
>> (DOSEMU doesn't care what was in sigcontext, and the fact that the
>> old SS is presented to the trampoline in new kernels is irrelevant
>> because the trampoline uses long mode), but the change in signal
>> delivery caused DOSEMU's workaround to restore __USER_DS instead of
>> the correct pre-signal SS value.
>>
>> Do our best to work around it: explicitly check whether the old SS
>> is usable and leave it alone during signal delivery if it is.
>> Sigreturn is unchanged.
>>
>> Reported-by: Stas Sergeev <stsp@list.ru>
>> Fixes: c6f2062935c8 ("x86/signal/64: Fix SS handling for signals delivered to 64-bit programs")
>> Signed-off-by: Andy Lutomirski <luto@kernel.org>
>> ---
>
>> +               asm ("lar %[old_ss], %[ar]\n\t"
>> +                    "jz 1f\n\t"
>> +                    "xorl %[ar], %[ar]\n\t"    /* If invalid, set ar = 0 */
>> +                    "1:"
>> +                    : [ar] "=r" (ar)
>> +                    : [old_ss] "rm" ((u16)regs->ss));
>> +
>
> Now that I sent this...
>
> I should learn to think very carefully before doubting Linus'
> off-the-cuff intuition about the x86 instruction set.  This can use
> VERW after all, as long as it's careful to set RPL==3.

And the corollary to that is: I should also assume that Linus is out
to get me :)

VERW is no good, because it considers non-present segments to be
writable.  Test cases for the win!

--Andy

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 2/3] x86/signal/64: Try to preserve hardware SS across 64-bit signal delivery
  2015-08-13 21:26     ` Andy Lutomirski
@ 2015-08-13 21:41       ` Linus Torvalds
  2015-08-13 21:49         ` Andy Lutomirski
  0 siblings, 1 reply; 13+ messages in thread
From: Linus Torvalds @ 2015-08-13 21:41 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andy Lutomirski, Stas Sergeev, X86 ML, Cyrill Gorcunov,
	Pavel Emelyanov, kvm list

On Thu, Aug 13, 2015 at 2:26 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>
> VERW is no good, because it considers non-present segments to be
> writable.  Test cases for the win!

Seriously? That's crazy. I don't think I've actually ever used VERW,
but the documentation certainly says that the segment has to be
writable, and I quote

  "The validation performed is the same as is performed when a segment
selector is loaded into the DS, ES, FS, or GS register, and the
indicated access (read or write) is performed"

which damn well shouldn't work for non-present segments. Odd.

                  Linus

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 2/3] x86/signal/64: Try to preserve hardware SS across 64-bit signal delivery
  2015-08-13 21:41       ` Linus Torvalds
@ 2015-08-13 21:49         ` Andy Lutomirski
  2015-08-13 22:03           ` Andy Lutomirski
  0 siblings, 1 reply; 13+ messages in thread
From: Andy Lutomirski @ 2015-08-13 21:49 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andy Lutomirski, Stas Sergeev, X86 ML, Cyrill Gorcunov,
	Pavel Emelyanov, kvm list

On Thu, Aug 13, 2015 at 2:41 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> On Thu, Aug 13, 2015 at 2:26 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>>
>> VERW is no good, because it considers non-present segments to be
>> writable.  Test cases for the win!
>
> Seriously? That's crazy. I don't think I've actually ever used VERW,
> but the documentation certainly says that the segment has to be
> writable, and I quote
>
>   "The validation performed is the same as is performed when a segment
> selector is loaded into the DS, ES, FS, or GS register, and the
> indicated access (read or write) is performed"
>
> which damn well shouldn't work for non-present segments. Odd.
>

I can try to come up with a self-contained test case, but I'm
reasonably confident that I did it right and that I sprinkled the
right printks around.

--Andy

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 2/3] x86/signal/64: Try to preserve hardware SS across 64-bit signal delivery
  2015-08-13 21:49         ` Andy Lutomirski
@ 2015-08-13 22:03           ` Andy Lutomirski
  0 siblings, 0 replies; 13+ messages in thread
From: Andy Lutomirski @ 2015-08-13 22:03 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andy Lutomirski, Stas Sergeev, X86 ML, Cyrill Gorcunov,
	Pavel Emelyanov, kvm list

On Thu, Aug 13, 2015 at 2:49 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Thu, Aug 13, 2015 at 2:41 PM, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
>> On Thu, Aug 13, 2015 at 2:26 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>>>
>>> VERW is no good, because it considers non-present segments to be
>>> writable.  Test cases for the win!
>>
>> Seriously? That's crazy. I don't think I've actually ever used VERW,
>> but the documentation certainly says that the segment has to be
>> writable, and I quote
>>
>>   "The validation performed is the same as is performed when a segment
>> selector is loaded into the DS, ES, FS, or GS register, and the
>> indicated access (read or write) is performed"
>>
>> which damn well shouldn't work for non-present segments. Odd.
>>
>
> I can try to come up with a self-contained test case, but I'm
> reasonably confident that I did it right and that I sprinkled the
> right printks around.

The SDM pseudocode, the APM's description:

A segment is writable if all of the following apply:
 - the selector is not a null selector.
 - the descriptor is within the GDT or LDT limit.
 - the segment is a writable data segment.
 - the descriptor DPL is greater than or equal to both the CPL and RPL.

and the SDM's bullet points:

To set the ZF flag, the following conditions must be met:
 - The segment selector is not NULL.
 - The selector must denote a descriptor within the bounds of the
descriptor table (GDT or LDT).
 - The selector must denote the descriptor of a code or data segment
(not that of a system segment or gate).
 - For the VERR instruction, the segment must be readable.
 - For the VERW instruction, the segment must be a writable data segment.
 - If the segment is not a conforming code segment, the segment’s DPL
must be greater than...

all seem to suggest that P isn't checked.

If I quote a bit farther than you did:

The validation performed is the same as is performed when a segment
selector is loaded into the DS, ES, FS, or GS
register, and the indicated access (read or write) is performed. The
segment selector's value cannot result in a
protection exception, enabling the software to anticipate possible
segment access problems.

I think the idea is that VERW is supposed to check protection but not
presence, the idea being that a hypothetical non-paged segmented OS
would swap out a segment and mark it not-present, and the resulting
failure would be #NP, which isn't a "protection exception".

Did anyone ever write an OS that used this stuff?  The Internet
suggests that OS/2 1.0 on the 286 supported swapping, so I bet it
actually used this mechanism, and woe unto any user (ahem, ring 1-3)
app that used LAR, checked the present bit, and blew up when a segment
was paged out.

--Andy

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 3/3] x86/signal/64: Add explicit controls for sigcontext SS handling
  2015-08-13 20:18 ` [RFC/PATCH 3/3] x86/signal/64: Add explicit controls for sigcontext SS handling Andy Lutomirski
@ 2015-08-14 20:55   ` Cyrill Gorcunov
  2015-08-14 20:57     ` Andy Lutomirski
  0 siblings, 1 reply; 13+ messages in thread
From: Cyrill Gorcunov @ 2015-08-14 20:55 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Linus Torvalds, Stas Sergeev, x86, Pavel Emelyanov, kvm list

On Thu, Aug 13, 2015 at 01:18:50PM -0700, Andy Lutomirski wrote:
> This adds two new uc_flags flags.  UC_SAVED_SS will be set for all
> 64-bit signals (including x32).  It indicates that the saved SS field
> is valid and that the kernel understands UC_RESTORE_SS.
> 
> The kernel will *not* set UC_RESTORE_SS.  User signal handlers can
> set UC_RESTORE_SS themselves to indicate that sigreturn should
> restore SS from the sigcontext.
> 
> 64-bit programs that use segmentation are encouraged to check
> UC_SAVED_SS and set UC_RESTORE_SS in their signal handlers.  This is
> the only straightforward way to cause sigreturn to restore SS.  (The
> only non-test program that I know of that uses segmentation in a
> 64-bit binary is DOSEMU, and DOSEMU currently uses a nasty
> trampoline to work around the lack of this mechanism in old kernels.
> It could detect UC_RESTORE_SS and use it to avoid needing a
> trampoline.
> 
> Cc: Stas Sergeev <stsp@list.ru>
> Cc: Linus Torvalds <torvalds@linux-foundation.org>
> Cc: Cyrill Gorcunov <gorcunov@gmail.com>
> Cc: Pavel Emelyanov <xemul@parallels.com>
> Signed-off-by: Andy Lutomirski <luto@kernel.org>

Looks reasonable to me. Andy, Linus, what the final conclusion --
are we about to introduce this flag or simply continue with
revert? Should I test this one? (from the code I don't excpect it
break criu anyhow but still).

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 3/3] x86/signal/64: Add explicit controls for sigcontext SS handling
  2015-08-14 20:55   ` Cyrill Gorcunov
@ 2015-08-14 20:57     ` Andy Lutomirski
  2015-08-14 21:05       ` Cyrill Gorcunov
  0 siblings, 1 reply; 13+ messages in thread
From: Andy Lutomirski @ 2015-08-14 20:57 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Andy Lutomirski, Linus Torvalds, Stas Sergeev, X86 ML,
	Pavel Emelyanov, kvm list

On Fri, Aug 14, 2015 at 1:55 PM, Cyrill Gorcunov <gorcunov@gmail.com> wrote:
> On Thu, Aug 13, 2015 at 01:18:50PM -0700, Andy Lutomirski wrote:
>> This adds two new uc_flags flags.  UC_SAVED_SS will be set for all
>> 64-bit signals (including x32).  It indicates that the saved SS field
>> is valid and that the kernel understands UC_RESTORE_SS.
>>
>> The kernel will *not* set UC_RESTORE_SS.  User signal handlers can
>> set UC_RESTORE_SS themselves to indicate that sigreturn should
>> restore SS from the sigcontext.
>>
>> 64-bit programs that use segmentation are encouraged to check
>> UC_SAVED_SS and set UC_RESTORE_SS in their signal handlers.  This is
>> the only straightforward way to cause sigreturn to restore SS.  (The
>> only non-test program that I know of that uses segmentation in a
>> 64-bit binary is DOSEMU, and DOSEMU currently uses a nasty
>> trampoline to work around the lack of this mechanism in old kernels.
>> It could detect UC_RESTORE_SS and use it to avoid needing a
>> trampoline.
>>
>> Cc: Stas Sergeev <stsp@list.ru>
>> Cc: Linus Torvalds <torvalds@linux-foundation.org>
>> Cc: Cyrill Gorcunov <gorcunov@gmail.com>
>> Cc: Pavel Emelyanov <xemul@parallels.com>
>> Signed-off-by: Andy Lutomirski <luto@kernel.org>
>
> Looks reasonable to me. Andy, Linus, what the final conclusion --
> are we about to introduce this flag or simply continue with
> revert? Should I test this one? (from the code I don't excpect it
> break criu anyhow but still).

Don't bother testing yet.  I'm waffling between trying something like
this and adding SA_SAVE_SS.  I have partially written patches for the
latter.

--Andy




-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 3/3] x86/signal/64: Add explicit controls for sigcontext SS handling
  2015-08-14 20:57     ` Andy Lutomirski
@ 2015-08-14 21:05       ` Cyrill Gorcunov
  0 siblings, 0 replies; 13+ messages in thread
From: Cyrill Gorcunov @ 2015-08-14 21:05 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andy Lutomirski, Linus Torvalds, Stas Sergeev, X86 ML,
	Pavel Emelyanov, kvm list

On Fri, Aug 14, 2015 at 01:57:42PM -0700, Andy Lutomirski wrote:
> 
> Don't bother testing yet.  I'm waffling between trying something like
> this and adding SA_SAVE_SS.  I have partially written patches for the
> latter.

ok, ping me if anything

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 1/3] x86/kvm: Rename VMX's segment access rights defines
  2015-08-13 20:18 ` [RFC/PATCH 1/3] x86/kvm: Rename VMX's segment access rights defines Andy Lutomirski
@ 2015-08-14 22:47   ` Paolo Bonzini
  0 siblings, 0 replies; 13+ messages in thread
From: Paolo Bonzini @ 2015-08-14 22:47 UTC (permalink / raw)
  To: Andy Lutomirski, Linus Torvalds, Stas Sergeev, x86
  Cc: Cyrill Gorcunov, Pavel Emelyanov, kvm list



On 13/08/2015 22:18, Andy Lutomirski wrote:
> VMX encodes access rights differently from LAR, and the latter is
> most likely what x86 people think of when they think of "access
> rights".
> 
> Rename them to avoid confusion.

Good idea, I've gone ahead and applied it for 4.3.

> Cc: kvm@vger.kernel.org
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
>  arch/x86/include/asm/vmx.h | 46 +++++++++++++++++++++++-----------------------
>  arch/x86/kvm/vmx.c         | 14 +++++++-------
>  2 files changed, 30 insertions(+), 30 deletions(-)
> 
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index da772edd19ab..78e243ae1786 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -367,29 +367,29 @@ enum vmcs_field {
>  #define TYPE_PHYSICAL_APIC_EVENT        (10 << 12)
>  #define TYPE_PHYSICAL_APIC_INST         (15 << 12)
>  
> -/* segment AR */
> -#define SEGMENT_AR_L_MASK (1 << 13)
> -
> -#define AR_TYPE_ACCESSES_MASK 1
> -#define AR_TYPE_READABLE_MASK (1 << 1)
> -#define AR_TYPE_WRITEABLE_MASK (1 << 2)
> -#define AR_TYPE_CODE_MASK (1 << 3)
> -#define AR_TYPE_MASK 0x0f
> -#define AR_TYPE_BUSY_64_TSS 11
> -#define AR_TYPE_BUSY_32_TSS 11
> -#define AR_TYPE_BUSY_16_TSS 3
> -#define AR_TYPE_LDT 2
> -
> -#define AR_UNUSABLE_MASK (1 << 16)
> -#define AR_S_MASK (1 << 4)
> -#define AR_P_MASK (1 << 7)
> -#define AR_L_MASK (1 << 13)
> -#define AR_DB_MASK (1 << 14)
> -#define AR_G_MASK (1 << 15)
> -#define AR_DPL_SHIFT 5
> -#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3)
> -
> -#define AR_RESERVD_MASK 0xfffe0f00
> +/* segment AR in VMCS -- these are different from what LAR reports */
> +#define VMX_SEGMENT_AR_L_MASK (1 << 13)
> +
> +#define VMX_AR_TYPE_ACCESSES_MASK 1
> +#define VMX_AR_TYPE_READABLE_MASK (1 << 1)
> +#define VMX_AR_TYPE_WRITEABLE_MASK (1 << 2)
> +#define VMX_AR_TYPE_CODE_MASK (1 << 3)
> +#define VMX_AR_TYPE_MASK 0x0f
> +#define VMX_AR_TYPE_BUSY_64_TSS 11
> +#define VMX_AR_TYPE_BUSY_32_TSS 11
> +#define VMX_AR_TYPE_BUSY_16_TSS 3
> +#define VMX_AR_TYPE_LDT 2
> +
> +#define VMX_AR_UNUSABLE_MASK (1 << 16)
> +#define VMX_AR_S_MASK (1 << 4)
> +#define VMX_AR_P_MASK (1 << 7)
> +#define VMX_AR_L_MASK (1 << 13)
> +#define VMX_AR_DB_MASK (1 << 14)
> +#define VMX_AR_G_MASK (1 << 15)
> +#define VMX_AR_DPL_SHIFT 5
> +#define VMX_AR_DPL(ar) (((ar) >> VMX_AR_DPL_SHIFT) & 3)
> +
> +#define VMX_AR_RESERVD_MASK 0xfffe0f00
>  
>  #define TSS_PRIVATE_MEMSLOT			(KVM_USER_MEM_SLOTS + 0)
>  #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	(KVM_USER_MEM_SLOTS + 1)
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index e856dd566f4c..d7ff79a5135b 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3423,12 +3423,12 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
>  	vmx_segment_cache_clear(to_vmx(vcpu));
>  
>  	guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
> -	if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
> +	if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
>  		pr_debug_ratelimited("%s: tss fixup for long mode. \n",
>  				     __func__);
>  		vmcs_write32(GUEST_TR_AR_BYTES,
> -			     (guest_tr_ar & ~AR_TYPE_MASK)
> -			     | AR_TYPE_BUSY_64_TSS);
> +			     (guest_tr_ar & ~VMX_AR_TYPE_MASK)
> +			     | VMX_AR_TYPE_BUSY_64_TSS);
>  	}
>  	vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
>  }
> @@ -3719,7 +3719,7 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu)
>  		return 0;
>  	else {
>  		int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
> -		return AR_DPL(ar);
> +		return VMX_AR_DPL(ar);
>  	}
>  }
>  
> @@ -3847,11 +3847,11 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
>  
>  	if (cs.unusable)
>  		return false;
> -	if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
> +	if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
>  		return false;
>  	if (!cs.s)
>  		return false;
> -	if (cs.type & AR_TYPE_WRITEABLE_MASK) {
> +	if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
>  		if (cs.dpl > cs_rpl)
>  			return false;
>  	} else {
> @@ -3901,7 +3901,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
>  		return false;
>  	if (!var.present)
>  		return false;
> -	if (~var.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK)) {
> +	if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
>  		if (var.dpl < rpl) /* DPL < RPL */
>  			return false;
>  	}
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2015-08-14 22:47 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-08-13 20:18 [RFC/PATCH 0/3] x86/signal/64: A better attempt at SS cleanup Andy Lutomirski
2015-08-13 20:18 ` [RFC/PATCH 1/3] x86/kvm: Rename VMX's segment access rights defines Andy Lutomirski
2015-08-14 22:47   ` Paolo Bonzini
2015-08-13 20:18 ` [RFC/PATCH 2/3] x86/signal/64: Try to preserve hardware SS across 64-bit signal delivery Andy Lutomirski
2015-08-13 20:25   ` Andy Lutomirski
2015-08-13 21:26     ` Andy Lutomirski
2015-08-13 21:41       ` Linus Torvalds
2015-08-13 21:49         ` Andy Lutomirski
2015-08-13 22:03           ` Andy Lutomirski
2015-08-13 20:18 ` [RFC/PATCH 3/3] x86/signal/64: Add explicit controls for sigcontext SS handling Andy Lutomirski
2015-08-14 20:55   ` Cyrill Gorcunov
2015-08-14 20:57     ` Andy Lutomirski
2015-08-14 21:05       ` Cyrill Gorcunov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).