* [PATCH V5 1/9] KVM, pkeys: disable pkeys for guests in non-paging mode
2016-03-21 10:05 [PATCH V5 0/9] KVM, pkeys: add memory protection-key support Huaitong Han
@ 2016-03-21 10:05 ` Huaitong Han
2016-03-21 10:05 ` [PATCH V5 2/9] KVM, pkeys: add pkeys support for xsave state Huaitong Han
` (7 subsequent siblings)
8 siblings, 0 replies; 19+ messages in thread
From: Huaitong Han @ 2016-03-21 10:05 UTC (permalink / raw)
To: pbonzini, gleb; +Cc: kvm, guangrong.xiao, Huaitong Han
Pkeys is disabled if CPU is in non-paging mode in hardware. However KVM
always uses paging mode to emulate guest non-paging, mode with TDP. To
emulate this behavior, pkeys needs to be manually disabled when guest
switches to non-paging mode.
Signed-off-by: Huaitong Han <huaitong.han@intel.com>
Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
arch/x86/kvm/vmx.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e2951b6..c9aa407 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3855,13 +3855,17 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (!enable_unrestricted_guest && !is_paging(vcpu))
/*
- * SMEP/SMAP is disabled if CPU is in non-paging mode in
- * hardware. However KVM always uses paging mode without
- * unrestricted guest.
- * To emulate this behavior, SMEP/SMAP needs to be manually
- * disabled when guest switches to non-paging mode.
+ * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in
+ * hardware. To emulate this behavior, SMEP/SMAP/PKU needs
+ * to be manually disabled when guest switches to non-paging
+ * mode.
+ *
+ * If !enable_unrestricted_guest, the CPU is always running
+ * with CR0.PG=1 and CR4 needs to be modified.
+ * If enable_unrestricted_guest, the CPU automatically
+ * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0.
*/
- hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
+ hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
vmcs_writel(CR4_READ_SHADOW, cr4);
vmcs_writel(GUEST_CR4, hw_cr4);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH V5 2/9] KVM, pkeys: add pkeys support for xsave state
2016-03-21 10:05 [PATCH V5 0/9] KVM, pkeys: add memory protection-key support Huaitong Han
2016-03-21 10:05 ` [PATCH V5 1/9] KVM, pkeys: disable pkeys for guests in non-paging mode Huaitong Han
@ 2016-03-21 10:05 ` Huaitong Han
2016-03-21 10:05 ` [PATCH V5 3/9] x86: pkey: introduce write_pkru() for KVM Huaitong Han
` (6 subsequent siblings)
8 siblings, 0 replies; 19+ messages in thread
From: Huaitong Han @ 2016-03-21 10:05 UTC (permalink / raw)
To: pbonzini, gleb; +Cc: kvm, guangrong.xiao, Huaitong Han
This patch adds pkeys support for xsave state.
Signed-off-by: Huaitong Han <huaitong.han@intel.com>
Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
arch/x86/kvm/x86.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index f2afa5f..0f71d5d 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -182,7 +182,8 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
- | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512)
+ | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
+ | XFEATURE_MASK_PKRU)
extern u64 host_xcr0;
extern u64 kvm_supported_xcr0(void);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH V5 3/9] x86: pkey: introduce write_pkru() for KVM
2016-03-21 10:05 [PATCH V5 0/9] KVM, pkeys: add memory protection-key support Huaitong Han
2016-03-21 10:05 ` [PATCH V5 1/9] KVM, pkeys: disable pkeys for guests in non-paging mode Huaitong Han
2016-03-21 10:05 ` [PATCH V5 2/9] KVM, pkeys: add pkeys support for xsave state Huaitong Han
@ 2016-03-21 10:05 ` Huaitong Han
2016-03-21 10:05 ` [PATCH V5 4/9] KVM, pkeys: save/restore PKRU when guest/host switches Huaitong Han
` (5 subsequent siblings)
8 siblings, 0 replies; 19+ messages in thread
From: Huaitong Han @ 2016-03-21 10:05 UTC (permalink / raw)
To: pbonzini, gleb
Cc: kvm, guangrong.xiao, Ingo Molnar, Dave Hansen, Huaitong Han
From: Xiao Guangrong <guangrong.xiao@linux.intel.com>
KVM will use it to switch pkru between guest and host.
CC: Ingo Molnar <mingo@redhat.com>
CC: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Huaitong Han <huaitong.han@intel.com>
---
arch/x86/include/asm/pgtable.h | 6 ++++++
arch/x86/include/asm/special_insns.h | 16 ++++++++++++++++
2 files changed, 22 insertions(+)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1ff49ec..97f3242 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -107,6 +107,12 @@ static inline u32 read_pkru(void)
return 0;
}
+static inline void write_pkru(u32 pkru)
+{
+ if (boot_cpu_has(X86_FEATURE_OSPKE))
+ __write_pkru(pkru);
+}
+
static inline int pte_young(pte_t pte)
{
return pte_flags(pte) & _PAGE_ACCESSED;
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index aee6e76..d96d043 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -113,11 +113,27 @@ static inline u32 __read_pkru(void)
: "c" (ecx));
return pkru;
}
+
+static inline void __write_pkru(u32 pkru)
+{
+ u32 ecx = 0, edx = 0;
+
+ /*
+ * "wrpkru" instruction. Loads contents in EAX to PKRU,
+ * requires that ecx = edx = 0.
+ */
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (pkru), "c"(ecx), "d"(edx));
+}
#else
static inline u32 __read_pkru(void)
{
return 0;
}
+
+static inline void __write_pkru(u32 pkru)
+{
+}
#endif
static inline void native_wbinvd(void)
--
1.8.3.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH V5 4/9] KVM, pkeys: save/restore PKRU when guest/host switches
2016-03-21 10:05 [PATCH V5 0/9] KVM, pkeys: add memory protection-key support Huaitong Han
` (2 preceding siblings ...)
2016-03-21 10:05 ` [PATCH V5 3/9] x86: pkey: introduce write_pkru() for KVM Huaitong Han
@ 2016-03-21 10:05 ` Huaitong Han
2016-03-21 10:28 ` Paolo Bonzini
2016-03-21 10:05 ` [PATCH V5 5/9] KVM, pkeys: introduce pkru_mask to cache conditions Huaitong Han
` (4 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Huaitong Han @ 2016-03-21 10:05 UTC (permalink / raw)
To: pbonzini, gleb; +Cc: kvm, guangrong.xiao, Huaitong Han
From: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Currently XSAVE state of host is not restored after VM-exit and PKRU
is managed by XSAVE so the PKRU from guest is still controlling the
memory access even if the CPU is running the code of host. This is
not safe as KVM needs to access the memory of userspace (e,g QEMU) to
do some emulation.
So we save/restore PKRU when guest/host switches.
Signed-off-by: Huaitong Han <huaitong.han@intel.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
arch/x86/kvm/vmx.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c9aa407..3e49ef3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -596,6 +596,10 @@ struct vcpu_vmx {
/* Support for PML */
#define PML_ENTITY_NUM 512
struct page *pml_pg;
+
+ bool guest_pkru_valid;
+ u32 guest_pkru;
+ u32 host_pkru;
};
enum segment_cache_field {
@@ -2078,6 +2082,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
} while (cmpxchg(&pi_desc->control, old.control,
new.control) != old.control);
}
+
/*
* Switches to specified vcpu, until a matching vcpu_put(), but assumes
* vcpu mutex is already taken.
@@ -2136,6 +2141,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
}
vmx_vcpu_pi_load(vcpu, cpu);
+ vmx->host_pkru = read_pkru();
}
static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
@@ -8594,6 +8600,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);
+ if (vmx->guest_pkru_valid)
+ write_pkru(vmx->guest_pkru);
+
atomic_switch_perf_msrs(vmx);
debugctlmsr = get_debugctlmsr();
@@ -8734,6 +8743,18 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
/*
+ * eager fpu is enabled if PKEY is supported and CR4 is switched
+ * back on host, so it is safe to read guest PKRU from current
+ * XSAVE.
+ */
+ vmx->guest_pkru = read_pkru();
+ if (vmx->guest_pkru != vmx->host_pkru) {
+ vmx->guest_pkru_valid = true;
+ write_pkru(vmx->host_pkru);
+ } else
+ vmx->guest_pkru_valid = false;
+
+ /*
* the KVM_REQ_EVENT optimization bit is only on for one entry, and if
* we did not inject a still-pending event to L1 now because of
* nested_run_pending, we need to re-enable this bit.
--
1.8.3.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH V5 4/9] KVM, pkeys: save/restore PKRU when guest/host switches
2016-03-21 10:05 ` [PATCH V5 4/9] KVM, pkeys: save/restore PKRU when guest/host switches Huaitong Han
@ 2016-03-21 10:28 ` Paolo Bonzini
2016-03-21 10:37 ` Han, Huaitong
0 siblings, 1 reply; 19+ messages in thread
From: Paolo Bonzini @ 2016-03-21 10:28 UTC (permalink / raw)
To: Huaitong Han, gleb; +Cc: kvm, guangrong.xiao
On 21/03/2016 11:05, Huaitong Han wrote:
> + if (vmx->guest_pkru_valid)
> + write_pkru(vmx->guest_pkru);
This can be __write_pkru.
> atomic_switch_perf_msrs(vmx);
> debugctlmsr = get_debugctlmsr();
>
> @@ -8734,6 +8743,18 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
> vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
>
> /*
> + * eager fpu is enabled if PKEY is supported and CR4 is switched
> + * back on host, so it is safe to read guest PKRU from current
> + * XSAVE.
> + */
> + vmx->guest_pkru = read_pkru();
> + if (vmx->guest_pkru != vmx->host_pkru) {
> + vmx->guest_pkru_valid = true;
> + write_pkru(vmx->host_pkru);
> + } else
> + vmx->guest_pkru_valid = false;
Should this code instead be enclosed in
if (boot_cpu_has(X86_FEATURE_OSPKE))
and use __read_pkru/__write_pkru?
Paolo
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH V5 4/9] KVM, pkeys: save/restore PKRU when guest/host switches
2016-03-21 10:28 ` Paolo Bonzini
@ 2016-03-21 10:37 ` Han, Huaitong
0 siblings, 0 replies; 19+ messages in thread
From: Han, Huaitong @ 2016-03-21 10:37 UTC (permalink / raw)
To: pbonzini@redhat.com
Cc: gleb@kernel.org, kvm@vger.kernel.org,
guangrong.xiao@linux.intel.com
On Mon, 2016-03-21 at 11:28 +0100, Paolo Bonzini wrote:
>
> On 21/03/2016 11:05, Huaitong Han wrote:
> > + if (vmx->guest_pkru_valid)
> > + write_pkru(vmx->guest_pkru);
>
> This can be __write_pkru.
Yes
>
> > atomic_switch_perf_msrs(vmx);
> > debugctlmsr = get_debugctlmsr();
> >
> > @@ -8734,6 +8743,18 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
> > vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
> >
> > /*
> > + * eager fpu is enabled if PKEY is supported and CR4 is switched
> > + * back on host, so it is safe to read guest PKRU from current
> > + * XSAVE.
> > + */
> > + vmx->guest_pkru = read_pkru();
> > + if (vmx->guest_pkru != vmx->host_pkru) {
> > + vmx->guest_pkru_valid = true;
> > + write_pkru(vmx->host_pkru);
> > + } else
> > + vmx->guest_pkru_valid = false;
>
> Should this code instead be enclosed in
>
> if (boot_cpu_has(X86_FEATURE_OSPKE))
>
> and use __read_pkru/__write_pkru?
Yes
>
> Paolo
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH V5 5/9] KVM, pkeys: introduce pkru_mask to cache conditions
2016-03-21 10:05 [PATCH V5 0/9] KVM, pkeys: add memory protection-key support Huaitong Han
` (3 preceding siblings ...)
2016-03-21 10:05 ` [PATCH V5 4/9] KVM, pkeys: save/restore PKRU when guest/host switches Huaitong Han
@ 2016-03-21 10:05 ` Huaitong Han
2016-03-21 17:43 ` Paolo Bonzini
2016-03-21 10:05 ` [PATCH V5 6/9] KVM, pkeys: add pkeys support for permission_fault logic Huaitong Han
` (3 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Huaitong Han @ 2016-03-21 10:05 UTC (permalink / raw)
To: pbonzini, gleb; +Cc: kvm, guangrong.xiao, Huaitong Han
PKEYS defines a new status bit in the PFEC. PFEC.PK (bit 5), if some
conditions is true, the fault is considered as a PKU violation.
pkru_mask indicates if we need to check PKRU.ADi and PKRU.WDi, and
does cache some conditions for permission_fault.
[ Huaitong: Guangrong helps modify many sections. ]
Signed-off-by: Huaitong Han <huaitong.han@intel.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
arch/x86/include/asm/kvm_host.h | 9 ++++++
arch/x86/kvm/mmu.c | 71 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 80 insertions(+)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 44adbb8..8df2581 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -326,6 +326,15 @@ struct kvm_mmu {
*/
u8 permissions[16];
+ /*
+ * PKRU bitmap mask indicates if pkey (ADi/WDi) check is needed
+ *
+ * There are 16 domains which are indexed by page fault error
+ * code [4:1] and the PFEC.RSVD is replaced by ACC_USER_MASK,
+ * each domain has 2 bits which indicate AD and WD of pkey.
+ */
+ u32 pkru_mask;
+
u64 *pae_root;
u64 *lm_root;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0e20230..e5604d1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3836,6 +3836,72 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
}
}
+/*
+* PKU:additional mechanism by which the paging controls access to user-mode
+* addresses based on the value in the PKRU register. A fault is considered
+* as a PKU violation if all of the following conditions are true:
+* 1.CR4_PKE=1.
+* 2.EFER_LMA=1.
+* 3.page is present with no reserved bit violations.
+* 4.the access is not an instruction fetch.
+* 5.the access is to a user page.
+* 6.PKRU.AD=1
+* or The access is a data write and
+* PKRU.WD=1 and either CR0.WP=1 or it is a user access.
+*
+* PKRU bitmask is produced according to the conditions above.
+*/
+static void update_pkru_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ bool ept)
+{
+ unsigned bit;
+ bool wp;
+
+ if (ept) {
+ mmu->pkru_mask = 0;
+ return;
+ }
+
+ /* PKEY is enabled only if CR4.PKE and EFER.LMA are both set. */
+ if (!kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || !is_long_mode(vcpu)) {
+ mmu->pkru_mask = 0;
+ return;
+ }
+
+ wp = is_write_protection(vcpu);
+
+ for (bit = 0; bit < ARRAY_SIZE(mmu->permissions); ++bit) {
+ unsigned pfec, pkey_bits;
+ bool check_pkey, check_write, ff, uf, wf, pte_user;
+
+ pfec = bit << 1;
+ ff = pfec & PFERR_FETCH_MASK;
+ uf = pfec & PFERR_USER_MASK;
+ wf = pfec & PFERR_WRITE_MASK;
+
+ /* PFEC.RSVD is replaced by ACC_USER_MASK. */
+ pte_user = pfec & PFERR_RSVD_MASK;
+
+ /*
+ * Only need to check the access which is not an
+ * instruction fetch and is to a user page.
+ */
+ check_pkey = (!ff && pte_user);
+ /*
+ * write access is controlled by PKRU if it is a
+ * user access or CR0.WP = 1.
+ */
+ check_write = check_pkey && wf && (uf || wp);
+
+ /* PKRU.AD stops both read and write access. */
+ pkey_bits = !!check_pkey;
+ /* PKRU.WD stops write access. */
+ pkey_bits |= (!!check_write) << 1;
+
+ mmu->pkru_mask |= (pkey_bits & 3) << pfec;
+ }
+}
+
static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
{
unsigned root_level = mmu->root_level;
@@ -3863,6 +3929,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
reset_rsvds_bits_mask(vcpu, context);
update_permission_bitmask(vcpu, context, false);
+ update_pkru_bitmask(vcpu, context, false);
update_last_nonleaf_level(vcpu, context);
MMU_WARN_ON(!is_pae(vcpu));
@@ -3890,6 +3957,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
reset_rsvds_bits_mask(vcpu, context);
update_permission_bitmask(vcpu, context, false);
+ update_pkru_bitmask(vcpu, context, false);
update_last_nonleaf_level(vcpu, context);
context->page_fault = paging32_page_fault;
@@ -3948,6 +4016,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
}
update_permission_bitmask(vcpu, context, false);
+ update_pkru_bitmask(vcpu, context, false);
update_last_nonleaf_level(vcpu, context);
reset_tdp_shadow_zero_bits_mask(vcpu, context);
}
@@ -4000,6 +4069,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
context->direct_map = false;
update_permission_bitmask(vcpu, context, true);
+ update_pkru_bitmask(vcpu, context, true);
reset_rsvds_bits_mask_ept(vcpu, context, execonly);
reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
}
@@ -4054,6 +4124,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
}
update_permission_bitmask(vcpu, g_context, false);
+ update_pkru_bitmask(vcpu, g_context, false);
update_last_nonleaf_level(vcpu, g_context);
}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH V5 5/9] KVM, pkeys: introduce pkru_mask to cache conditions
2016-03-21 10:05 ` [PATCH V5 5/9] KVM, pkeys: introduce pkru_mask to cache conditions Huaitong Han
@ 2016-03-21 17:43 ` Paolo Bonzini
0 siblings, 0 replies; 19+ messages in thread
From: Paolo Bonzini @ 2016-03-21 17:43 UTC (permalink / raw)
To: Huaitong Han, gleb; +Cc: kvm, guangrong.xiao
Just some editing of the comments:
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 44adbb8..8df2581 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -326,6 +326,15 @@ struct kvm_mmu {
> */
> u8 permissions[16];
>
> + /*
> + * PKRU bitmap mask indicates if pkey (ADi/WDi) check is needed
> + *
> + * There are 16 domains which are indexed by page fault error
> + * code [4:1] and the PFEC.RSVD is replaced by ACC_USER_MASK,
> + * each domain has 2 bits which indicate AD and WD of pkey.
+ * The pkru_mask indicates if protection key checks are needed. It
+ * consists of 16 domains indexed by page fault error code bits [4:1],
+ * with PFEC.RSVD replaced by ACC_USER_MASK from the page tables.
+ * Each domain has 2 bits which are ANDed with AD and WD from PKRU.
> + */
> + u32 pkru_mask;
> +
> u64 *pae_root;
> u64 *lm_root;
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 0e20230..e5604d1 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3836,6 +3836,72 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
> }
> }
>
> +/*
> +* PKU:additional mechanism by which the paging controls access to user-mode
> +* addresses based on the value in the PKRU register. A fault is considered
> +* as a PKU violation if all of the following conditions are true:
> +* 1.CR4_PKE=1.
> +* 2.EFER_LMA=1.
> +* 3.page is present with no reserved bit violations.
> +* 4.the access is not an instruction fetch.
> +* 5.the access is to a user page.
> +* 6.PKRU.AD=1
> +* or The access is a data write and
> +* PKRU.WD=1 and either CR0.WP=1 or it is a user access.
> +*
> +* PKRU bitmask is produced according to the conditions above.
+* PKU is an additional mechanism by which the paging controls access to
+* user-mode addresses based on the value in the PKRU register. Protection
+* key violations are reported through a bit in the page fault error code.
+* Unlike other bits of the error code, the PK bit is not known at the
+* call site of e.g. gva_to_gpa; it must be computed directly in
+* permission_fault based on two bits of PKRU, on some machine state (CR4,
+* CR0, EFER, CPL), and on other bits of the error code and the page tables.
*
-* PKRU bitmask is produced according to the conditions above.
+* In particular the following conditions come from the error code, the
+* page tables and the machine state:
+* - PK is always zero unless CR4.PKE=1 and EFER.LMA=1
+* - PK is always zero if RSVD=1 (reserved bit set) or F=1 (instruction fetch)
+* - PK is always zero if U=0 in the page tables
+* - PKRU.WD is ignored if CR0.WP=0 and the access is a supervisor access.
+*
+* The PKRU bitmask caches the result of these four conditions. The error
+* code (minus the P bit) and the page table's U bit form an index into the
+* PKRU bitmask. Two bits of the PKRU bitmask are then extracted and ANDed
+* with the two bits of the PKRU register corresponding to the protection key.
+* For the first three conditions above the bits will be 00, thus masking
+* away both AD and WD. For the last condition, only WD will be masked away.
Thanks,
Paolo
> +*/
> +static void update_pkru_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> + bool ept)
> +{
> + unsigned bit;
> + bool wp;
> +
> + if (ept) {
> + mmu->pkru_mask = 0;
> + return;
> + }
> +
> + /* PKEY is enabled only if CR4.PKE and EFER.LMA are both set. */
> + if (!kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || !is_long_mode(vcpu)) {
> + mmu->pkru_mask = 0;
> + return;
> + }
> +
> + wp = is_write_protection(vcpu);
> +
> + for (bit = 0; bit < ARRAY_SIZE(mmu->permissions); ++bit) {
> + unsigned pfec, pkey_bits;
> + bool check_pkey, check_write, ff, uf, wf, pte_user;
> +
> + pfec = bit << 1;
> + ff = pfec & PFERR_FETCH_MASK;
> + uf = pfec & PFERR_USER_MASK;
> + wf = pfec & PFERR_WRITE_MASK;
> +
> + /* PFEC.RSVD is replaced by ACC_USER_MASK. */
> + pte_user = pfec & PFERR_RSVD_MASK;
> +
> + /*
> + * Only need to check the access which is not an
> + * instruction fetch and is to a user page.
> + */
> + check_pkey = (!ff && pte_user);
> + /*
> + * write access is controlled by PKRU if it is a
> + * user access or CR0.WP = 1.
> + */
> + check_write = check_pkey && wf && (uf || wp);
> +
> + /* PKRU.AD stops both read and write access. */
> + pkey_bits = !!check_pkey;
> + /* PKRU.WD stops write access. */
> + pkey_bits |= (!!check_write) << 1;
> +
> + mmu->pkru_mask |= (pkey_bits & 3) << pfec;
> + }
> +}
> +
> static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
> {
> unsigned root_level = mmu->root_level;
> @@ -3863,6 +3929,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
>
> reset_rsvds_bits_mask(vcpu, context);
> update_permission_bitmask(vcpu, context, false);
> + update_pkru_bitmask(vcpu, context, false);
> update_last_nonleaf_level(vcpu, context);
>
> MMU_WARN_ON(!is_pae(vcpu));
> @@ -3890,6 +3957,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
>
> reset_rsvds_bits_mask(vcpu, context);
> update_permission_bitmask(vcpu, context, false);
> + update_pkru_bitmask(vcpu, context, false);
> update_last_nonleaf_level(vcpu, context);
>
> context->page_fault = paging32_page_fault;
> @@ -3948,6 +4016,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
> }
>
> update_permission_bitmask(vcpu, context, false);
> + update_pkru_bitmask(vcpu, context, false);
> update_last_nonleaf_level(vcpu, context);
> reset_tdp_shadow_zero_bits_mask(vcpu, context);
> }
> @@ -4000,6 +4069,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
> context->direct_map = false;
>
> update_permission_bitmask(vcpu, context, true);
> + update_pkru_bitmask(vcpu, context, true);
> reset_rsvds_bits_mask_ept(vcpu, context, execonly);
> reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
> }
> @@ -4054,6 +4124,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
> }
>
> update_permission_bitmask(vcpu, g_context, false);
> + update_pkru_bitmask(vcpu, g_context, false);
> update_last_nonleaf_level(vcpu, g_context);
> }
>
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH V5 6/9] KVM, pkeys: add pkeys support for permission_fault logic
2016-03-21 10:05 [PATCH V5 0/9] KVM, pkeys: add memory protection-key support Huaitong Han
` (4 preceding siblings ...)
2016-03-21 10:05 ` [PATCH V5 5/9] KVM, pkeys: introduce pkru_mask to cache conditions Huaitong Han
@ 2016-03-21 10:05 ` Huaitong Han
2016-03-21 10:55 ` Paolo Bonzini
2016-03-21 10:05 ` [PATCH V5 7/9] KVM, pkeys: expose CPUID/CR4 to guest Huaitong Han
` (2 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Huaitong Han @ 2016-03-21 10:05 UTC (permalink / raw)
To: pbonzini, gleb; +Cc: kvm, guangrong.xiao, Huaitong Han
Protection keys define a new 4-bit protection key field (PKEY) in bits
62:59 of leaf entries of the page tables, the PKEY is an index to PKRU
register(16 domains), every domain has 2 bits(write disable bit, access
disable bit).
Static logic has been produced in update_permission_bitmask, dynamic logic
need read pkey from page table entries, get pkru value, and deduce the
correct result.
[ Huaitong: Guangrong helps modify many sections. ]
Signed-off-by: Huaitong Han <huaitong.han@intel.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
arch/x86/include/asm/kvm_host.h | 3 +++
arch/x86/kvm/kvm_cache_regs.h | 5 +++++
arch/x86/kvm/mmu.h | 35 +++++++++++++++++++++++++++++------
arch/x86/kvm/paging_tmpl.h | 16 ++++++++++++++--
arch/x86/kvm/svm.c | 8 ++++++++
arch/x86/kvm/vmx.c | 8 ++++++++
arch/x86/kvm/x86.c | 7 ++++++-
7 files changed, 73 insertions(+), 9 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8df2581..0acd135 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -186,12 +186,14 @@ enum {
#define PFERR_USER_BIT 2
#define PFERR_RSVD_BIT 3
#define PFERR_FETCH_BIT 4
+#define PFERR_PK_BIT 5
#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
+#define PFERR_PK_MASK (1U << PFERR_PK_BIT)
/* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0
@@ -874,6 +876,7 @@ struct kvm_x86_ops {
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+ u32 (*get_pkru)(struct kvm_vcpu *vcpu);
void (*fpu_activate)(struct kvm_vcpu *vcpu);
void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index e1e89ee..762cdf2 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -84,6 +84,11 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
| ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
}
+static inline u32 kvm_read_pkru(struct kvm_vcpu *vcpu)
+{
+ return kvm_x86_ops->get_pkru(vcpu);
+}
+
static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
{
vcpu->arch.hflags |= HF_GUEST_MASK;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 5fb0c9d..2ebcabe 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -10,10 +10,11 @@
#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
#define PT_WRITABLE_SHIFT 1
+#define PT_USER_SHIFT 2
#define PT_PRESENT_MASK (1ULL << 0)
#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
-#define PT_USER_MASK (1ULL << 2)
+#define PT_USER_MASK (1ULL << PT_USER_SHIFT)
#define PT_PWT_MASK (1ULL << 3)
#define PT_PCD_MASK (1ULL << 4)
#define PT_ACCESSED_SHIFT 5
@@ -149,10 +150,34 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
* if the access faults.
*/
static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
- unsigned pte_access, unsigned pfec)
+ unsigned pte_access, unsigned pte_pkey,
+ unsigned pfec)
{
int cpl = kvm_x86_ops->get_cpl(vcpu);
unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
+ unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
+ int index = (pfec >> 1) +
+ (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
+
+ if (unlikely(mmu->pkru_mask)) {
+ u32 pkru_bits, offset;
+
+ WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
+
+ /*
+ * PKRU defines 32 bits, there are 16 domains and 2
+ * attribute bits per domain in pkru, pkey is the
+ * index to a defined domain, so the value of
+ * pkey * 2 is offset of a defined domain.
+ */
+ pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
+ /* replace PFEC.RSVD with ACC_USER_MASK. */
+ offset = pfec | ((pte_access & PT_USER_MASK) <<
+ (PFERR_RSVD_BIT - PT_USER_SHIFT));
+
+ pkru_bits &= mmu->pkru_mask >> (offset & ~1);
+ pfec |= -pkru_bits & PFERR_PK_MASK;
+ }
/*
* If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
@@ -167,14 +192,12 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
* but it will be one in index if SMAP checks are being overridden.
* It is important to keep this branchless.
*/
- unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
- int index = (pfec >> 1) +
- (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
WARN_ON(pfec & PFERR_RSVD_MASK);
pfec |= PFERR_PRESENT_MASK;
- return -((mmu->permissions[index] >> pte_access) & 1) & pfec;
+ return -(((pfec >> PFERR_PK_BIT) |
+ (mmu->permissions[index] >> pte_access)) & 1) & pfec;
}
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 3f94262..1116cb8 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -254,6 +254,17 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
return 0;
}
+static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
+{
+ unsigned pkeys = 0;
+#if PTTYPE == 64
+ pte_t pte = {.pte = gpte};
+
+ pkeys = pte_flags_pkey(pte_flags(pte));
+#endif
+ return pkeys;
+}
+
/*
* Fetch a guest pte for a guest virtual address
*/
@@ -265,7 +276,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
pt_element_t pte;
pt_element_t __user *uninitialized_var(ptep_user);
gfn_t table_gfn;
- unsigned index, pt_access, pte_access, accessed_dirty;
+ unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey;
gpa_t pte_gpa;
int offset;
const int write_fault = access & PFERR_WRITE_MASK;
@@ -367,7 +378,8 @@ retry_walk:
walker->ptes[walker->level - 1] = pte;
} while (!is_last_gpte(mmu, walker->level, pte));
- errcode = permission_fault(vcpu, mmu, pte_access, errcode);
+ pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
+ errcode = permission_fault(vcpu, mmu, pte_access, pte_pkey, errcode);
if (unlikely(errcode))
goto error;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c13a64b..2e9fda8 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1280,6 +1280,11 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_svm(vcpu)->vmcb->save.rflags = rflags;
}
+static u32 svm_get_pkru(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
{
switch (reg) {
@@ -4348,6 +4353,9 @@ static struct kvm_x86_ops svm_x86_ops = {
.cache_reg = svm_cache_reg,
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
+
+ .get_pkru = svm_get_pkru,
+
.fpu_activate = svm_fpu_activate,
.fpu_deactivate = svm_fpu_deactivate,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3e49ef3..8687261 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2261,6 +2261,11 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
vmcs_writel(GUEST_RFLAGS, rflags);
}
+static u32 vmx_get_pkru(struct kvm_vcpu *vcpu)
+{
+ return to_vmx(vcpu)->guest_pkru;
+}
+
static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
{
u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
@@ -10865,6 +10870,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
.cache_reg = vmx_cache_reg,
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,
+
+ .get_pkru = vmx_get_pkru,
+
.fpu_activate = vmx_fpu_activate,
.fpu_deactivate = vmx_fpu_deactivate,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4244c2b..9a3c226 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4312,9 +4312,14 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
| (write ? PFERR_WRITE_MASK : 0);
+ /*
+ * currently PKRU is only applied to ept enabled guest so
+ * there is no pkey in EPT page table for L1 guest or EPT
+ * shadow page table for L2 guest.
+ */
if (vcpu_match_mmio_gva(vcpu, gva)
&& !permission_fault(vcpu, vcpu->arch.walk_mmu,
- vcpu->arch.access, access)) {
+ vcpu->arch.access, 0, access)) {
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
(gva & (PAGE_SIZE - 1));
trace_vcpu_match_mmio(gva, *gpa, write, false);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH V5 6/9] KVM, pkeys: add pkeys support for permission_fault logic
2016-03-21 10:05 ` [PATCH V5 6/9] KVM, pkeys: add pkeys support for permission_fault logic Huaitong Han
@ 2016-03-21 10:55 ` Paolo Bonzini
2016-03-21 12:41 ` Han, Huaitong
0 siblings, 1 reply; 19+ messages in thread
From: Paolo Bonzini @ 2016-03-21 10:55 UTC (permalink / raw)
To: Huaitong Han, gleb; +Cc: kvm, guangrong.xiao
On 21/03/2016 11:05, Huaitong Han wrote:
> static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> - unsigned pte_access, unsigned pfec)
> + unsigned pte_access, unsigned pte_pkey,
> + unsigned pfec)
> {
> int cpl = kvm_x86_ops->get_cpl(vcpu);
> unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
> + unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
> + int index = (pfec >> 1) +
> + (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
> +
> + if (unlikely(mmu->pkru_mask)) {
> + u32 pkru_bits, offset;
> +
> + WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
> +
> + /*
> + * PKRU defines 32 bits, there are 16 domains and 2
> + * attribute bits per domain in pkru, pkey is the
> + * index to a defined domain, so the value of
> + * pkey * 2 is offset of a defined domain.
> + */
> + pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
> + /* replace PFEC.RSVD with ACC_USER_MASK. */
> + offset = pfec | ((pte_access & PT_USER_MASK) <<
> + (PFERR_RSVD_BIT - PT_USER_SHIFT));
> +
> + pkru_bits &= mmu->pkru_mask >> (offset & ~1);
> + pfec |= -pkru_bits & PFERR_PK_MASK;
> + }
>
> /*
> * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
> @@ -167,14 +192,12 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> * but it will be one in index if SMAP checks are being overridden.
> * It is important to keep this branchless.
> */
> - unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
> - int index = (pfec >> 1) +
> - (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
>
> WARN_ON(pfec & PFERR_RSVD_MASK);
>
> pfec |= PFERR_PRESENT_MASK;
> - return -((mmu->permissions[index] >> pte_access) & 1) & pfec;
> + return -(((pfec >> PFERR_PK_BIT) |
> + (mmu->permissions[index] >> pte_access)) & 1) & pfec;
> }
>
> void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
Slightly cleaner:
1) keep WARN_ON together
2) keep smap comment close to smap variable
3) build expression for final return a piece at a time
Does it look good?
Thanks,
Paolo
@@ -149,7 +150,8 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
* if the access faults.
*/
static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
- unsigned pte_access, unsigned pfec)
+ unsigned pte_access, unsigned pte_pkey,
+ unsigned pfec)
{
int cpl = kvm_x86_ops->get_cpl(vcpu);
unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
@@ -170,11 +172,32 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
int index = (pfec >> 1) +
(smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
+ bool fault = (mmu->permissions[index] >> pte_access) & 1;
- WARN_ON(pfec & PFERR_RSVD_MASK);
-
+ WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
pfec |= PFERR_PRESENT_MASK;
- return -((mmu->permissions[index] >> pte_access) & 1) & pfec;
+
+ if (unlikely(mmu->pkru_mask)) {
+ u32 pkru_bits, offset;
+
+ /*
+ * PKRU defines 32 bits, there are 16 domains and 2
+ * attribute bits per domain in pkru, pkey is the
+ * index to a defined domain, so the value of
+ * pkey * 2 is offset of a defined domain.
+ */
+ pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
+
+ /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
+ offset = pfec - 1 +
+ ((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
+
+ pkru_bits &= mmu->pkru_mask >> offset;
+ pfec |= -pkru_bits & PFERR_PK_MASK;
+ fault |= (pkru_bits != 0);
+ }
+
+ return -(uint32_t)fault & pfec;
}
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH V5 6/9] KVM, pkeys: add pkeys support for permission_fault logic
2016-03-21 10:55 ` Paolo Bonzini
@ 2016-03-21 12:41 ` Han, Huaitong
0 siblings, 0 replies; 19+ messages in thread
From: Han, Huaitong @ 2016-03-21 12:41 UTC (permalink / raw)
To: pbonzini@redhat.com
Cc: gleb@kernel.org, kvm@vger.kernel.org,
guangrong.xiao@linux.intel.com
On Mon, 2016-03-21 at 11:55 +0100, Paolo Bonzini wrote:
>
> On 21/03/2016 11:05, Huaitong Han wrote:
> > static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> > - unsigned pte_access, unsigned pfec)
> > + unsigned pte_access, unsigned pte_pkey,
> > + unsigned pfec)
> > {
> > int cpl = kvm_x86_ops->get_cpl(vcpu);
> > unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
> > + unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
> > + int index = (pfec >> 1) +
> > + (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
> > +
> > + if (unlikely(mmu->pkru_mask)) {
> > + u32 pkru_bits, offset;
> > +
> > + WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
> > +
> > + /*
> > + * PKRU defines 32 bits, there are 16 domains and 2
> > + * attribute bits per domain in pkru, pkey is the
> > + * index to a defined domain, so the value of
> > + * pkey * 2 is offset of a defined domain.
> > + */
> > + pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
> > + /* replace PFEC.RSVD with ACC_USER_MASK. */
> > + offset = pfec | ((pte_access & PT_USER_MASK) <<
> > + (PFERR_RSVD_BIT - PT_USER_SHIFT));
> > +
> > + pkru_bits &= mmu->pkru_mask >> (offset & ~1);
> > + pfec |= -pkru_bits & PFERR_PK_MASK;
> > + }
> >
> > /*
> > * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
> > @@ -167,14 +192,12 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> > * but it will be one in index if SMAP checks are being overridden.
> > * It is important to keep this branchless.
> > */
> > - unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
> > - int index = (pfec >> 1) +
> > - (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
> >
> > WARN_ON(pfec & PFERR_RSVD_MASK);
> >
> > pfec |= PFERR_PRESENT_MASK;
> > - return -((mmu->permissions[index] >> pte_access) & 1) & pfec;
> > + return -(((pfec >> PFERR_PK_BIT) |
> > + (mmu->permissions[index] >> pte_access)) & 1) & pfec;
> > }
> >
> > void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
>
> Slightly cleaner:
>
> 1) keep WARN_ON together
> 2) keep smap comment close to smap variable
> 3) build expression for final return a piece at a time
>
> Does it look good?
Accepted.
>
> Thanks,
>
> Paolo
>
> @@ -149,7 +150,8 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
> * if the access faults.
> */
> static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> - unsigned pte_access, unsigned pfec)
> + unsigned pte_access, unsigned pte_pkey,
> + unsigned pfec)
> {
> int cpl = kvm_x86_ops->get_cpl(vcpu);
> unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
> @@ -170,11 +172,32 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
> int index = (pfec >> 1) +
> (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
> + bool fault = (mmu->permissions[index] >> pte_access) & 1;
>
> - WARN_ON(pfec & PFERR_RSVD_MASK);
> -
> + WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
> pfec |= PFERR_PRESENT_MASK;
> - return -((mmu->permissions[index] >> pte_access) & 1) & pfec;
> +
> + if (unlikely(mmu->pkru_mask)) {
> + u32 pkru_bits, offset;
> +
> + /*
> + * PKRU defines 32 bits, there are 16 domains and 2
> + * attribute bits per domain in pkru, pkey is the
> + * index to a defined domain, so the value of
> + * pkey * 2 is offset of a defined domain.
> + */
> + pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
> +
> + /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
> + offset = pfec - 1 +
> + ((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
> +
> + pkru_bits &= mmu->pkru_mask >> offset;
> + pfec |= -pkru_bits & PFERR_PK_MASK;
> + fault |= (pkru_bits != 0);
> + }
> +
> + return -(uint32_t)fault & pfec;
> }
>
> void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH V5 7/9] KVM, pkeys: expose CPUID/CR4 to guest
2016-03-21 10:05 [PATCH V5 0/9] KVM, pkeys: add memory protection-key support Huaitong Han
` (5 preceding siblings ...)
2016-03-21 10:05 ` [PATCH V5 6/9] KVM, pkeys: add pkeys support for permission_fault logic Huaitong Han
@ 2016-03-21 10:05 ` Huaitong Han
2016-03-21 11:36 ` Paolo Bonzini
2016-03-21 10:05 ` [PATCH V5 8/9] KVM, pkeys: disable PKU feature without ept Huaitong Han
2016-03-21 10:06 ` [PATCH V5 9/9] Revert "KVM: MMU: precompute page fault error code" Huaitong Han
8 siblings, 1 reply; 19+ messages in thread
From: Huaitong Han @ 2016-03-21 10:05 UTC (permalink / raw)
To: pbonzini, gleb; +Cc: kvm, guangrong.xiao, Huaitong Han
X86_FEATURE_PKU is referred to as "PKU" in the hardware documentation:
CPUID.7.0.ECX[3]:PKU. X86_FEATURE_OSPKE is software support for pkeys,
enumerated with CPUID.7.0.ECX[4]:OSPKE, and it reflects the setting of
CR4.PKE(bit 22).
Signed-off-by: Huaitong Han <huaitong.han@intel.com>
Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
arch/x86/include/asm/kvm_host.h | 3 ++-
arch/x86/kvm/cpuid.c | 33 +++++++++++++++++++++++++--------
arch/x86/kvm/cpuid.h | 8 ++++++++
arch/x86/kvm/x86.c | 9 ++++++---
4 files changed, 41 insertions(+), 12 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0acd135..f3cfbea 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -83,7 +83,8 @@
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
- | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP))
+ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \
+ | X86_CR4_PKE))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 6525e92..7dc7a5a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -81,6 +81,17 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
apic->lapic_timer.timer_mode_mask = 1 << 17;
}
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ if (!best)
+ return 0;
+
+ /* Update OSPKE bit */
+ if (boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7) {
+ best->ecx &= ~F(OSPKE);
+ if (kvm_read_cr4_bits(vcpu, X86_CR4_PKE))
+ best->ecx |= F(OSPKE);
+ }
+
best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
if (!best) {
vcpu->arch.guest_supported_xcr0 = 0;
@@ -354,6 +365,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
const u32 kvm_supported_word10_x86_features =
F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
+ /* cpuid 7.0.ecx*/
+ const u32 kvm_supported_word11_x86_features = F(PKU) | 0 /*OSPKE*/;
+
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -371,9 +385,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
break;
case 1:
entry->edx &= kvm_supported_word0_x86_features;
- cpuid_mask(&entry->edx, 0);
+ cpuid_mask(&entry->edx, CPUID_1_EDX);
entry->ecx &= kvm_supported_word4_x86_features;
- cpuid_mask(&entry->ecx, 4);
+ cpuid_mask(&entry->ecx, CPUID_1_ECX);
/* we support x2apic emulation even if host does not support
* it since we emulate x2apic in software */
entry->ecx |= F(X2APIC);
@@ -428,13 +442,16 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* Mask ebx against host capability word 9 */
if (index == 0) {
entry->ebx &= kvm_supported_word9_x86_features;
- cpuid_mask(&entry->ebx, 9);
+ cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
// TSC_ADJUST is emulated
entry->ebx |= F(TSC_ADJUST);
- } else
+ entry->ecx &= kvm_supported_word11_x86_features;
+ cpuid_mask(&entry->ecx, CPUID_7_ECX);
+ } else {
entry->ebx = 0;
+ entry->ecx = 0;
+ }
entry->eax = 0;
- entry->ecx = 0;
entry->edx = 0;
break;
}
@@ -559,9 +576,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
break;
case 0x80000001:
entry->edx &= kvm_supported_word1_x86_features;
- cpuid_mask(&entry->edx, 1);
+ cpuid_mask(&entry->edx, CPUID_8000_0001_EDX);
entry->ecx &= kvm_supported_word6_x86_features;
- cpuid_mask(&entry->ecx, 6);
+ cpuid_mask(&entry->ecx, CPUID_8000_0001_ECX);
break;
case 0x80000007: /* Advanced power management */
/* invariant TSC is CPUID.80000007H:EDX[8] */
@@ -595,7 +612,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
break;
case 0xC0000001:
entry->edx &= kvm_supported_word5_x86_features;
- cpuid_mask(&entry->edx, 5);
+ cpuid_mask(&entry->edx, CPUID_C000_0001_EDX);
break;
case 3: /* Processor serial number */
case 5: /* MONITOR/MWAIT */
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index c8eda14..3bacab1 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -79,6 +79,14 @@ static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
}
+static inline bool guest_cpuid_has_pku(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->ecx & bit(X86_FEATURE_PKU));
+}
+
static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9a3c226..2ee48c5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -720,7 +720,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
unsigned long old_cr4 = kvm_read_cr4(vcpu);
unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
- X86_CR4_SMEP | X86_CR4_SMAP;
+ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
if (cr4 & CR4_RESERVED_BITS)
return 1;
@@ -737,6 +737,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
return 1;
+ if (!guest_cpuid_has_pku(vcpu) && (cr4 & X86_CR4_PKE))
+ return 1;
+
if (is_long_mode(vcpu)) {
if (!(cr4 & X86_CR4_PAE))
return 1;
@@ -762,7 +765,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
kvm_mmu_reset_context(vcpu);
- if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
+ if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
kvm_update_cpuid(vcpu);
return 0;
@@ -7114,7 +7117,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
- if (sregs->cr4 & X86_CR4_OSXSAVE)
+ if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE))
kvm_update_cpuid(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH V5 7/9] KVM, pkeys: expose CPUID/CR4 to guest
2016-03-21 10:05 ` [PATCH V5 7/9] KVM, pkeys: expose CPUID/CR4 to guest Huaitong Han
@ 2016-03-21 11:36 ` Paolo Bonzini
2016-03-21 11:56 ` Han, Huaitong
0 siblings, 1 reply; 19+ messages in thread
From: Paolo Bonzini @ 2016-03-21 11:36 UTC (permalink / raw)
To: Huaitong Han, gleb; +Cc: kvm, guangrong.xiao
On 21/03/2016 11:05, Huaitong Han wrote:
> X86_FEATURE_PKU is referred to as "PKU" in the hardware documentation:
> CPUID.7.0.ECX[3]:PKU. X86_FEATURE_OSPKE is software support for pkeys,
> enumerated with CPUID.7.0.ECX[4]:OSPKE, and it reflects the setting of
> CR4.PKE(bit 22).
>
> Signed-off-by: Huaitong Han <huaitong.han@intel.com>
> Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> ---
> arch/x86/include/asm/kvm_host.h | 3 ++-
> arch/x86/kvm/cpuid.c | 33 +++++++++++++++++++++++++--------
> arch/x86/kvm/cpuid.h | 8 ++++++++
> arch/x86/kvm/x86.c | 9 ++++++---
> 4 files changed, 41 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 0acd135..f3cfbea 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -83,7 +83,8 @@
> | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
> | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
> | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
> - | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP))
> + | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \
> + | X86_CR4_PKE))
>
> #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 6525e92..7dc7a5a 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -81,6 +81,17 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> apic->lapic_timer.timer_mode_mask = 1 << 17;
> }
>
> + best = kvm_find_cpuid_entry(vcpu, 7, 0);
> + if (!best)
> + return 0;
> +
> + /* Update OSPKE bit */
> + if (boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7) {
> + best->ecx &= ~F(OSPKE);
> + if (kvm_read_cr4_bits(vcpu, X86_CR4_PKE))
> + best->ecx |= F(OSPKE);
> + }
> +
> best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
> if (!best) {
> vcpu->arch.guest_supported_xcr0 = 0;
> @@ -354,6 +365,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> const u32 kvm_supported_word10_x86_features =
> F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
>
> + /* cpuid 7.0.ecx*/
> + const u32 kvm_supported_word11_x86_features = F(PKU) | 0 /*OSPKE*/;
> +
> /* all calls to cpuid_count() should be made on the same cpu */
> get_cpu();
>
> @@ -371,9 +385,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> break;
> case 1:
> entry->edx &= kvm_supported_word0_x86_features;
> - cpuid_mask(&entry->edx, 0);
> + cpuid_mask(&entry->edx, CPUID_1_EDX);
Unrelated to this patch, I'll look at these changes for 4.7.
> entry->ecx &= kvm_supported_word4_x86_features;
> - cpuid_mask(&entry->ecx, 4);
> + cpuid_mask(&entry->ecx, CPUID_1_ECX);
> /* we support x2apic emulation even if host does not support
> * it since we emulate x2apic in software */
> entry->ecx |= F(X2APIC);
> @@ -428,13 +442,16 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> /* Mask ebx against host capability word 9 */
> if (index == 0) {
> entry->ebx &= kvm_supported_word9_x86_features;
> - cpuid_mask(&entry->ebx, 9);
> + cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
> // TSC_ADJUST is emulated
> entry->ebx |= F(TSC_ADJUST);
> - } else
> + entry->ecx &= kvm_supported_word11_x86_features;
> + cpuid_mask(&entry->ecx, CPUID_7_ECX);
This is now word 16, so:
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b40445612135..1ecaed58b8c0 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -372,7 +372,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
/* cpuid 7.0.ecx*/
- const u32 kvm_supported_word11_x86_features = F(PKU) | 0 /*OSPKE*/;
+ const u32 kvm_supported_word16_x86_features = F(PKU) | 0 /*OSPKE*/;
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -451,8 +451,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
cpuid_mask(&entry->ebx, 9);
// TSC_ADJUST is emulated
entry->ebx |= F(TSC_ADJUST);
- entry->ecx &= kvm_supported_word11_x86_features;
- cpuid_mask(&entry->ecx, CPUID_7_ECX);
+ entry->ecx &= kvm_supported_word16_x86_features;
+ cpuid_mask(&entry->ecx, 16);
if (!tdp_enabled)
/*
* PKU is not yet implemented for shadow
Paolo
> + } else {
> entry->ebx = 0;
> + entry->ecx = 0;
> + }
> entry->eax = 0;
> - entry->ecx = 0;
> entry->edx = 0;
> break;
> }
> @@ -559,9 +576,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> break;
> case 0x80000001:
> entry->edx &= kvm_supported_word1_x86_features;
> - cpuid_mask(&entry->edx, 1);
> + cpuid_mask(&entry->edx, CPUID_8000_0001_EDX);
> entry->ecx &= kvm_supported_word6_x86_features;
> - cpuid_mask(&entry->ecx, 6);
> + cpuid_mask(&entry->ecx, CPUID_8000_0001_ECX);
> break;
> case 0x80000007: /* Advanced power management */
> /* invariant TSC is CPUID.80000007H:EDX[8] */
> @@ -595,7 +612,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> break;
> case 0xC0000001:
> entry->edx &= kvm_supported_word5_x86_features;
> - cpuid_mask(&entry->edx, 5);
> + cpuid_mask(&entry->edx, CPUID_C000_0001_EDX);
> break;
> case 3: /* Processor serial number */
> case 5: /* MONITOR/MWAIT */
> diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> index c8eda14..3bacab1 100644
> --- a/arch/x86/kvm/cpuid.h
> +++ b/arch/x86/kvm/cpuid.h
> @@ -79,6 +79,14 @@ static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
> return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
> }
>
> +static inline bool guest_cpuid_has_pku(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_cpuid_entry2 *best;
> +
> + best = kvm_find_cpuid_entry(vcpu, 7, 0);
> + return best && (best->ecx & bit(X86_FEATURE_PKU));
> +}
> +
> static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu)
> {
> struct kvm_cpuid_entry2 *best;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 9a3c226..2ee48c5 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -720,7 +720,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
> {
> unsigned long old_cr4 = kvm_read_cr4(vcpu);
> unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
> - X86_CR4_SMEP | X86_CR4_SMAP;
> + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
>
> if (cr4 & CR4_RESERVED_BITS)
> return 1;
> @@ -737,6 +737,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
> if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
> return 1;
>
> + if (!guest_cpuid_has_pku(vcpu) && (cr4 & X86_CR4_PKE))
> + return 1;
> +
> if (is_long_mode(vcpu)) {
> if (!(cr4 & X86_CR4_PAE))
> return 1;
> @@ -762,7 +765,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
> (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
> kvm_mmu_reset_context(vcpu);
>
> - if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
> + if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
> kvm_update_cpuid(vcpu);
>
> return 0;
> @@ -7114,7 +7117,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
>
> mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
> kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
> - if (sregs->cr4 & X86_CR4_OSXSAVE)
> + if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE))
> kvm_update_cpuid(vcpu);
>
> idx = srcu_read_lock(&vcpu->kvm->srcu);
>
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH V5 7/9] KVM, pkeys: expose CPUID/CR4 to guest
2016-03-21 11:36 ` Paolo Bonzini
@ 2016-03-21 11:56 ` Han, Huaitong
2016-03-21 12:08 ` Paolo Bonzini
0 siblings, 1 reply; 19+ messages in thread
From: Han, Huaitong @ 2016-03-21 11:56 UTC (permalink / raw)
To: pbonzini@redhat.com
Cc: gleb@kernel.org, kvm@vger.kernel.org,
guangrong.xiao@linux.intel.com
On Mon, 2016-03-21 at 12:36 +0100, Paolo Bonzini wrote:
>
> This is now word 16, so:
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index b40445612135..1ecaed58b8c0 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -372,7 +372,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
>
> /* cpuid 7.0.ecx*/
> - const u32 kvm_supported_word11_x86_features = F(PKU) | 0 /*OSPKE*/;
> + const u32 kvm_supported_word16_x86_features = F(PKU) | 0 /*OSPKE*/;
>
> /* all calls to cpuid_count() should be made on the same cpu */
> get_cpu();
> @@ -451,8 +451,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> cpuid_mask(&entry->ebx, 9);
> // TSC_ADJUST is emulated
> entry->ebx |= F(TSC_ADJUST);
> - entry->ecx &= kvm_supported_word11_x86_features;
> - cpuid_mask(&entry->ecx, CPUID_7_ECX);
> + entry->ecx &= kvm_supported_word16_x86_features;
> + cpuid_mask(&entry->ecx, 16);
cpuid_mask(&entry->ecx, CPUID_7_ECX), CPUID_7_ECX is better than 16?
> if (!tdp_enabled)
> /*
> * PKU is not yet implemented for shadow
>
> Paolo
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH V5 7/9] KVM, pkeys: expose CPUID/CR4 to guest
2016-03-21 11:56 ` Han, Huaitong
@ 2016-03-21 12:08 ` Paolo Bonzini
0 siblings, 0 replies; 19+ messages in thread
From: Paolo Bonzini @ 2016-03-21 12:08 UTC (permalink / raw)
To: Han, Huaitong
Cc: gleb@kernel.org, kvm@vger.kernel.org,
guangrong.xiao@linux.intel.com
On 21/03/2016 12:56, Han, Huaitong wrote:
>> > + entry->ecx &= kvm_supported_word16_x86_features;
>> > + cpuid_mask(&entry->ecx, 16);
> cpuid_mask(&entry->ecx, CPUID_7_ECX), CPUID_7_ECX is better than 16?
If the name of the variable is kvm_supported_word16_x86_features, it is
not necessarily better.
If we rename the variables with names like
kvm_supported_cpuid_7_ecx_x86_features, then of course we use
CPUID_7_ECX as well.
Paolo
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH V5 8/9] KVM, pkeys: disable PKU feature without ept
2016-03-21 10:05 [PATCH V5 0/9] KVM, pkeys: add memory protection-key support Huaitong Han
` (6 preceding siblings ...)
2016-03-21 10:05 ` [PATCH V5 7/9] KVM, pkeys: expose CPUID/CR4 to guest Huaitong Han
@ 2016-03-21 10:05 ` Huaitong Han
2016-03-21 11:01 ` Paolo Bonzini
2016-03-21 10:06 ` [PATCH V5 9/9] Revert "KVM: MMU: precompute page fault error code" Huaitong Han
8 siblings, 1 reply; 19+ messages in thread
From: Huaitong Han @ 2016-03-21 10:05 UTC (permalink / raw)
To: pbonzini, gleb; +Cc: kvm, guangrong.xiao, Huaitong Han
This patch disables CPUID:PKU without ept, because pkeys is not yet
implemented for shadow paging.
Signed-off-by: Huaitong Han <huaitong.han@intel.com>
Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
arch/x86/kvm/cpuid.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7dc7a5a..5028c1e 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -447,6 +447,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ebx |= F(TSC_ADJUST);
entry->ecx &= kvm_supported_word11_x86_features;
cpuid_mask(&entry->ecx, CPUID_7_ECX);
+ if (!tdp_enabled)
+ /*
+ * PKU is not yet implemented for shadow
+ * paging.
+ */
+ entry->ecx &= ~F(PKU);
} else {
entry->ebx = 0;
entry->ecx = 0;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH V5 8/9] KVM, pkeys: disable PKU feature without ept
2016-03-21 10:05 ` [PATCH V5 8/9] KVM, pkeys: disable PKU feature without ept Huaitong Han
@ 2016-03-21 11:01 ` Paolo Bonzini
0 siblings, 0 replies; 19+ messages in thread
From: Paolo Bonzini @ 2016-03-21 11:01 UTC (permalink / raw)
To: Huaitong Han, gleb; +Cc: kvm, guangrong.xiao
On 21/03/2016 11:05, Huaitong Han wrote:
> This patch disables CPUID:PKU without ept, because pkeys is not yet
> implemented for shadow paging.
>
> Signed-off-by: Huaitong Han <huaitong.han@intel.com>
> Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> ---
> arch/x86/kvm/cpuid.c | 6 ++++++
> 1 file changed, 6 insertions(+)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 7dc7a5a..5028c1e 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -447,6 +447,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> entry->ebx |= F(TSC_ADJUST);
> entry->ecx &= kvm_supported_word11_x86_features;
> cpuid_mask(&entry->ecx, CPUID_7_ECX);
> + if (!tdp_enabled)
> + /*
> + * PKU is not yet implemented for shadow
> + * paging.
> + */
> + entry->ecx &= ~F(PKU);
> } else {
> entry->ebx = 0;
> entry->ecx = 0;
>
I'll squash this into the previous patch.
Paolo
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH V5 9/9] Revert "KVM: MMU: precompute page fault error code"
2016-03-21 10:05 [PATCH V5 0/9] KVM, pkeys: add memory protection-key support Huaitong Han
` (7 preceding siblings ...)
2016-03-21 10:05 ` [PATCH V5 8/9] KVM, pkeys: disable PKU feature without ept Huaitong Han
@ 2016-03-21 10:06 ` Huaitong Han
8 siblings, 0 replies; 19+ messages in thread
From: Huaitong Han @ 2016-03-21 10:06 UTC (permalink / raw)
To: pbonzini, gleb; +Cc: kvm, guangrong.xiao
From: Xiao Guangrong <guangrong.xiao@linux.intel.com>
This reverts commit 5688dccad8a05988be55eacc9e5c7dc8ef20a6d0.
It is not necessary, please refer to:
https://lkml.org/lkml/2016/3/10/302
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
arch/x86/kvm/mmu.c | 2 +-
arch/x86/kvm/paging_tmpl.h | 26 +++++++++++---------------
2 files changed, 12 insertions(+), 16 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e5604d1..feba9f6 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3798,7 +3798,7 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
u = bit & ACC_USER_MASK;
if (!ept) {
- /* Not really needed: if !nx, ff will always be zero */
+ /* Not really needed: !nx will cause pte.nx to fault */
x |= !mmu->nx;
/* Allow supervisor writes if !cr0.wp */
w |= !is_write_protection(vcpu) && !uf;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 1116cb8..7ab25f0 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -280,24 +280,13 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
gpa_t pte_gpa;
int offset;
const int write_fault = access & PFERR_WRITE_MASK;
- u16 errcode;
+ const int user_fault = access & PFERR_USER_MASK;
+ const int fetch_fault = access & PFERR_FETCH_MASK;
+ u16 errcode = 0;
gpa_t real_gpa;
gfn_t gfn;
trace_kvm_mmu_pagetable_walk(addr, access);
-
- /*
- * Do not modify PFERR_FETCH_MASK in access. It is used later in the call to
- * mmu->translate_gpa and, when nested virtualization is in use, the X or NX
- * bit of nested page tables always applies---even if NX and SMEP are disabled
- * in the guest.
- *
- * TODO: cache the result of the NX and SMEP test in struct kvm_mmu?
- */
- errcode = access;
- if (!(mmu->nx || kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)))
- errcode &= ~PFERR_FETCH_MASK;
-
retry_walk:
walker->level = mmu->root_level;
pte = mmu->get_cr3(vcpu);
@@ -408,7 +397,9 @@ retry_walk:
if (unlikely(!accessed_dirty)) {
ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
- if (ret > 0)
+ if (unlikely(ret < 0))
+ goto error;
+ else if (ret)
goto retry_walk;
}
@@ -419,6 +410,11 @@ retry_walk:
return 1;
error:
+ errcode |= write_fault | user_fault;
+ if (fetch_fault && (mmu->nx ||
+ kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)))
+ errcode |= PFERR_FETCH_MASK;
+
walker->fault.vector = PF_VECTOR;
walker->fault.error_code_valid = true;
walker->fault.error_code = errcode;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 19+ messages in thread