From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Jan Beulich <JBeulich@suse.com>,
xen-devel <xen-devel@lists.xenproject.org>
Cc: Feng Wu <feng.wu@intel.com>, Keir Fraser <keir@xen.org>
Subject: Re: [PATCH 2/4] x86: suppress SMAP and SMEP while running 32-bit PV guest code
Date: Mon, 7 Mar 2016 16:59:18 +0000 [thread overview]
Message-ID: <56DDB366.8020207@citrix.com> (raw)
In-Reply-To: <56D97F4802000078000D9561@prv-mh.provo.novell.com>
On 04/03/16 11:27, Jan Beulich wrote:
> Since such guests' kernel code runs in ring 1, their memory accesses,
> at the paging layer, are supervisor mode ones, and hence subject to
> SMAP/SMEP checks. Such guests cannot be expected to be aware of those
> two features though (and so far we also don't expose the respective
> feature flags), and hence may suffer page faults they cannot deal with.
>
> While the placement of the re-enabling slightly weakens the intended
> protection, it was selected such that 64-bit paths would remain
> unaffected where possible. At the expense of a further performance hit
> the re-enabling could be put right next to the CLACs.
>
> Note that this introduces a number of extra TLB flushes - CR4.SMEP
> transitioning from 0 to 1 always causes a flush, and it transitioning
> from 1 to 0 may also do.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -67,6 +67,8 @@ boolean_param("smep", opt_smep);
> static bool_t __initdata opt_smap = 1;
> boolean_param("smap", opt_smap);
>
> +unsigned long __read_mostly cr4_smep_smap_mask;
Are we liable to gain any other cr4 features which would want to be
included in this? Might it be wise to chose a slightly more generic
name such as cr4_pv32_mask ?
> #define SHADOW_BYTES 16 /* Shadow EIP + shadow hypercall # */
> #else
> /* Relocate argument registers and zero-extend to 64 bits. */
> - movl %eax,%eax /* Hypercall # */
> xchgl %ecx,%esi /* Arg 2, Arg 4 */
> movl %edx,%edx /* Arg 3 */
> movl %edi,%r8d /* Arg 5 */
> @@ -174,10 +174,43 @@ compat_bad_hypercall:
> /* %rbx: struct vcpu, interrupts disabled */
> ENTRY(compat_restore_all_guest)
> ASSERT_INTERRUPTS_DISABLED
> +.Lcr4_orig:
> + ASM_NOP3 /* mov %cr4, %rax */
> + ASM_NOP6 /* and $..., %rax */
> + ASM_NOP3 /* mov %rax, %cr4 */
> + .pushsection .altinstr_replacement, "ax"
> +.Lcr4_alt:
> + mov %cr4, %rax
> + and $~(X86_CR4_SMEP|X86_CR4_SMAP), %rax
> + mov %rax, %cr4
> +.Lcr4_alt_end:
> + .section .altinstructions, "a"
> + altinstruction_entry .Lcr4_orig, .Lcr4_alt, X86_FEATURE_SMEP, 12, \
> + (.Lcr4_alt_end - .Lcr4_alt)
> + altinstruction_entry .Lcr4_orig, .Lcr4_alt, X86_FEATURE_SMAP, 12, \
> + (.Lcr4_alt_end - .Lcr4_alt)
These 12's look as if they should be (.Lcr4_alt - .Lcr4_orig).
> + .popsection
> RESTORE_ALL adj=8 compat=1
> .Lft0: iretq
> _ASM_PRE_EXTABLE(.Lft0, handle_exception)
>
> +/* This mustn't modify registers other than %rax. */
> +ENTRY(cr4_smep_smap_restore)
> + mov %cr4, %rax
> + test $X86_CR4_SMEP|X86_CR4_SMAP,%eax
> + jnz 0f
> + or cr4_smep_smap_mask(%rip), %rax
> + mov %rax, %cr4
> + ret
> +0:
> + and cr4_smep_smap_mask(%rip), %eax
> + cmp cr4_smep_smap_mask(%rip), %eax
> + je 1f
> + BUG
What is the purpose of this bugcheck? It looks like it is catching a
mismatch of masked options, but I am not completely sure.
For all other ASM level BUG's, I put a short comment on the same line,
to aid people who hit the bug.
> +1:
> + xor %eax, %eax
> + ret
> +
> /* %rdx: trap_bounce, %rbx: struct vcpu */
> ENTRY(compat_post_handle_exception)
> testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx)
> @@ -190,6 +223,7 @@ ENTRY(compat_post_handle_exception)
> /* See lstar_enter for entry register state. */
> ENTRY(cstar_enter)
> sti
> + SMEP_SMAP_RESTORE
> movq 8(%rsp),%rax /* Restore %rax. */
> movq $FLAT_KERNEL_SS,8(%rsp)
> pushq %r11
> @@ -225,6 +259,7 @@ UNLIKELY_END(compat_syscall_gpf)
> jmp .Lcompat_bounce_exception
>
> ENTRY(compat_sysenter)
> + SMEP_SMAP_RESTORE
> movq VCPU_trap_ctxt(%rbx),%rcx
> cmpb $TRAP_gp_fault,UREGS_entry_vector(%rsp)
> movzwl VCPU_sysenter_sel(%rbx),%eax
> @@ -238,6 +273,7 @@ ENTRY(compat_sysenter)
> jmp compat_test_all_events
>
> ENTRY(compat_int80_direct_trap)
> + SMEP_SMAP_RESTORE
> call compat_create_bounce_frame
> jmp compat_test_all_events
>
> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -434,6 +434,7 @@ ENTRY(dom_crash_sync_extable)
>
> ENTRY(common_interrupt)
> SAVE_ALL CLAC
> + SMEP_SMAP_RESTORE
> movq %rsp,%rdi
> callq do_IRQ
> jmp ret_from_intr
> @@ -454,13 +455,64 @@ ENTRY(page_fault)
> GLOBAL(handle_exception)
> SAVE_ALL CLAC
> handle_exception_saved:
> + GET_CURRENT(%rbx)
> testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp)
> jz exception_with_ints_disabled
> - sti
> +
> +.Lsmep_smap_orig:
> + jmp 0f
> + .if 0 // GAS bug (affecting at least 2.22 ... 2.26)
> + .org .Lsmep_smap_orig + (.Lsmep_smap_alt_end - .Lsmep_smap_alt), 0xcc
> + .else
> + // worst case: rex + opcode + modrm + 4-byte displacement
> + .skip (1 + 1 + 1 + 4) - 2, 0xcc
> + .endif
Which bug is this? How does it manifest. More generally, what is this
alternative trying to achieve?
> + .pushsection .altinstr_replacement, "ax"
> +.Lsmep_smap_alt:
> + mov VCPU_domain(%rbx),%rax
> +.Lsmep_smap_alt_end:
> + .section .altinstructions, "a"
> + altinstruction_entry .Lsmep_smap_orig, .Lsmep_smap_alt, \
> + X86_FEATURE_SMEP, \
> + (.Lsmep_smap_alt_end - .Lsmep_smap_alt), \
> + (.Lsmep_smap_alt_end - .Lsmep_smap_alt)
> + altinstruction_entry .Lsmep_smap_orig, .Lsmep_smap_alt, \
> + X86_FEATURE_SMAP, \
> + (.Lsmep_smap_alt_end - .Lsmep_smap_alt), \
> + (.Lsmep_smap_alt_end - .Lsmep_smap_alt)
> + .popsection
> +
> + testb $3,UREGS_cs(%rsp)
> + jz 0f
> + cmpb $0,DOMAIN_is_32bit_pv(%rax)
This comparison is wrong on hardware lacking SMEP and SMAP, as the "mov
VCPU_domain(%rbx),%rax" won't have happened.
> + je 0f
> + call cr4_smep_smap_restore
> + /*
> + * An NMI or #MC may occur between clearing CR4.SMEP and CR4.SMAP in
> + * compat_restore_all_guest and it actually returning to guest
> + * context, in which case the guest would run with the two features
> + * enabled. The only bad that can happen from this is a kernel mode
> + * #PF which the guest doesn't expect. Rather than trying to make the
> + * NMI/#MC exit path honor the intended CR4 setting, simply check
> + * whether the wrong CR4 was in use when the #PF occurred, and exit
> + * back to the guest (which will in turn clear the two CR4 bits) to
> + * re-execute the instruction. If we get back here, the CR4 bits
> + * should then be found clear (unless another NMI/#MC occurred at
> + * exactly the right time), and we'll continue processing the
> + * exception as normal.
> + */
> + test %rax,%rax
> + jnz 0f
> + mov $PFEC_page_present,%al
> + cmpb $TRAP_page_fault,UREGS_entry_vector(%rsp)
> + jne 0f
> + xor UREGS_error_code(%rsp),%eax
> + test $~(PFEC_write_access|PFEC_insn_fetch),%eax
> + jz compat_test_all_events
> +0: sti
Its code like this which makes me even more certain that we have far too
much code written in assembly which doesn't need to be. Maybe not this
specific sample, but it has taken me 15 minutes and a pad of paper to
try and work out how this conditional works, and I am still not certain
its correct. In particular, PFEC_prot_key looks like it fool the test
into believing a non-smap/smep fault was a smap/smep fault.
Can you at least provide some C in a comment with the intended
conditional, to aid clarity?
~Andrew
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
next prev parent reply other threads:[~2016-03-07 16:59 UTC|newest]
Thread overview: 67+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-03-04 11:08 [PATCH 0/4] x86: accommodate 32-bit PV guests with SMAP/SMEP handling Jan Beulich
2016-03-04 11:27 ` [PATCH 1/4] x86/alternatives: correct near branch check Jan Beulich
2016-03-07 15:43 ` Andrew Cooper
2016-03-07 15:56 ` Jan Beulich
2016-03-07 16:11 ` Andrew Cooper
2016-03-07 16:21 ` Jan Beulich
2016-03-08 17:33 ` Andrew Cooper
2016-03-04 11:27 ` [PATCH 2/4] x86: suppress SMAP and SMEP while running 32-bit PV guest code Jan Beulich
2016-03-07 16:59 ` Andrew Cooper [this message]
2016-03-08 7:57 ` Jan Beulich
2016-03-09 8:09 ` Wu, Feng
2016-03-09 14:09 ` Jan Beulich
2016-03-09 11:19 ` Andrew Cooper
2016-03-09 14:28 ` Jan Beulich
2016-03-09 8:09 ` Wu, Feng
2016-03-09 10:45 ` Andrew Cooper
2016-03-09 12:27 ` Wu, Feng
2016-03-09 12:33 ` Andrew Cooper
2016-03-09 12:36 ` Jan Beulich
2016-03-09 12:54 ` Wu, Feng
2016-03-09 13:35 ` Wu, Feng
2016-03-09 13:42 ` Andrew Cooper
2016-03-09 14:03 ` Jan Beulich
2016-03-09 14:07 ` Jan Beulich
2016-03-04 11:28 ` [PATCH 3/4] x86: use optimal NOPs to fill the SMAP/SMEP placeholders Jan Beulich
2016-03-07 17:43 ` Andrew Cooper
2016-03-08 8:02 ` Jan Beulich
2016-03-04 11:29 ` [PATCH 4/4] x86: use 32-bit loads for 32-bit PV guest state reload Jan Beulich
2016-03-07 17:45 ` Andrew Cooper
2016-03-10 9:44 ` [PATCH v2 0/3] x86: accommodate 32-bit PV guests with SMEP/SMAP handling Jan Beulich
2016-03-10 9:53 ` [PATCH v2 1/3] x86: suppress SMEP and SMAP while running 32-bit PV guest code Jan Beulich
2016-05-13 15:48 ` Andrew Cooper
2016-03-10 9:54 ` [PATCH v2 2/3] x86: use optimal NOPs to fill the SMEP/SMAP placeholders Jan Beulich
2016-05-13 15:49 ` Andrew Cooper
2016-03-10 9:55 ` [PATCH v2 3/3] x86: use 32-bit loads for 32-bit PV guest state reload Jan Beulich
[not found] ` <56E9A0DB02000078000DD54C@prv-mh.provo.novell.com>
2016-03-17 7:50 ` [PATCH v3 0/4] x86: accommodate 32-bit PV guests with SMEP/SMAP handling Jan Beulich
2016-03-17 8:02 ` [PATCH v3 1/4] x86: move cached CR4 value to struct cpu_info Jan Beulich
2016-03-17 16:20 ` Andrew Cooper
2016-03-17 8:03 ` [PATCH v3 2/4] x86: suppress SMEP and SMAP while running 32-bit PV guest code Jan Beulich
2016-03-25 18:01 ` Konrad Rzeszutek Wilk
2016-03-29 6:55 ` Jan Beulich
2016-05-13 15:58 ` Andrew Cooper
2016-03-17 8:03 ` [PATCH v3 3/4] x86: use optimal NOPs to fill the SMEP/SMAP placeholders Jan Beulich
2016-05-13 15:57 ` Andrew Cooper
2016-05-13 16:06 ` Jan Beulich
2016-05-13 16:09 ` Andrew Cooper
2016-03-17 8:04 ` [PATCH v3 4/4] x86: use 32-bit loads for 32-bit PV guest state reload Jan Beulich
2016-03-25 18:02 ` Konrad Rzeszutek Wilk
2016-03-17 16:14 ` [PATCH v3 5/4] x86: reduce code size of struct cpu_info member accesses Jan Beulich
2016-03-25 18:47 ` Konrad Rzeszutek Wilk
2016-03-29 6:59 ` Jan Beulich
2016-03-30 14:28 ` Konrad Rzeszutek Wilk
2016-03-30 14:42 ` Jan Beulich
2016-05-13 16:11 ` Andrew Cooper
2016-05-03 13:58 ` Ping: [PATCH v3 2/4] x86: suppress SMEP and SMAP while running 32-bit PV guest code Jan Beulich
2016-05-03 14:10 ` Andrew Cooper
2016-05-03 14:25 ` Jan Beulich
2016-05-04 10:03 ` Andrew Cooper
2016-05-04 13:35 ` Jan Beulich
2016-05-04 3:07 ` Wu, Feng
2016-05-13 15:21 ` Wei Liu
2016-05-13 15:30 ` Jan Beulich
2016-05-13 15:33 ` Wei Liu
2016-05-13 17:02 ` [PATCH v3 0/4] x86: accommodate 32-bit PV guests with SMEP/SMAP handling Wei Liu
2016-05-13 17:21 ` Andrew Cooper
2016-06-21 6:19 ` Wu, Feng
2016-06-21 7:17 ` Jan Beulich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=56DDB366.8020207@citrix.com \
--to=andrew.cooper3@citrix.com \
--cc=JBeulich@suse.com \
--cc=feng.wu@intel.com \
--cc=keir@xen.org \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).