* [PATCH 6.1 0/5] KVM CR0.WP series backport
@ 2023-05-08 15:45 Mathias Krause
2023-05-08 15:45 ` [PATCH 6.1 1/5] KVM: x86/mmu: Avoid indirect call for get_cr3 Mathias Krause
` (5 more replies)
0 siblings, 6 replies; 7+ messages in thread
From: Mathias Krause @ 2023-05-08 15:45 UTC (permalink / raw)
To: stable; +Cc: Paolo Bonzini, Sean Christopherson, kvm, Mathias Krause
This is a backport of the CR0.WP KVM series[1] to Linux v6.1, pretty
much the same as for v6.2.
I used 'ssdd 10 50000' from rt-tests[2] as a micro-benchmark, running on
a grsecurity L1 VM. Below table shows the results (runtime in seconds,
lower is better):
legacy TDP shadow
Linux v6.1.23 7.65s 8.23s 68.7s
+ patches 3.36s 3.36s 69.1s
The KVM unit test suite showed no regressions.
Please consider applying.
Thanks,
Mathias
[1] https://lore.kernel.org/kvm/20230322013731.102955-1-minipli@grsecurity.net/
[2] https://git.kernel.org/pub/scm/utils/rt-tests/rt-tests.git
Mathias Krause (3):
KVM: x86: Do not unload MMU roots when only toggling CR0.WP with TDP
enabled
KVM: x86: Make use of kvm_read_cr*_bits() when testing bits
KVM: VMX: Make CR0.WP a guest owned bit
Paolo Bonzini (1):
KVM: x86/mmu: Avoid indirect call for get_cr3
Sean Christopherson (1):
KVM: x86/mmu: Refresh CR0.WP prior to checking for emulated permission
faults
arch/x86/kvm/kvm_cache_regs.h | 2 +-
arch/x86/kvm/mmu.h | 26 ++++++++++++++++++-
arch/x86/kvm/mmu/mmu.c | 46 ++++++++++++++++++++++++++--------
arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
arch/x86/kvm/pmu.c | 4 +--
arch/x86/kvm/vmx/nested.c | 4 +--
arch/x86/kvm/vmx/vmx.c | 6 ++---
arch/x86/kvm/vmx/vmx.h | 18 +++++++++++++
arch/x86/kvm/x86.c | 12 +++++++++
9 files changed, 99 insertions(+), 21 deletions(-)
--
2.39.2
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 6.1 1/5] KVM: x86/mmu: Avoid indirect call for get_cr3
2023-05-08 15:45 [PATCH 6.1 0/5] KVM CR0.WP series backport Mathias Krause
@ 2023-05-08 15:45 ` Mathias Krause
2023-05-08 15:45 ` [PATCH 6.1 2/5] KVM: x86: Do not unload MMU roots when only toggling CR0.WP with TDP enabled Mathias Krause
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Mathias Krause @ 2023-05-08 15:45 UTC (permalink / raw)
To: stable; +Cc: Paolo Bonzini, Sean Christopherson, kvm, Mathias Krause
From: Paolo Bonzini <pbonzini@redhat.com>
[ Upstream commit 2fdcc1b324189b5fb20655baebd40cd82e2bdf0c ]
Most of the time, calls to get_guest_pgd result in calling
kvm_read_cr3 (the exception is only nested TDP). Hardcode
the default instead of using the get_cr3 function, avoiding
a retpoline if they are enabled.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Mathias Krause <minipli@grsecurity.net>
Link: https://lore.kernel.org/r/20230322013731.102955-2-minipli@grsecurity.net
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Mathias Krause <minipli@grsecurity.net> # backport to v6.1.x
---
arch/x86/kvm/mmu/mmu.c | 31 ++++++++++++++++++++-----------
arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
2 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b6f96d47e596..f2a10c7d1369 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -232,6 +232,20 @@ static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu)
return regs;
}
+static unsigned long get_guest_cr3(struct kvm_vcpu *vcpu)
+{
+ return kvm_read_cr3(vcpu);
+}
+
+static inline unsigned long kvm_mmu_get_guest_pgd(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *mmu)
+{
+ if (IS_ENABLED(CONFIG_RETPOLINE) && mmu->get_guest_pgd == get_guest_cr3)
+ return kvm_read_cr3(vcpu);
+
+ return mmu->get_guest_pgd(vcpu);
+}
+
static inline bool kvm_available_flush_tlb_with_range(void)
{
return kvm_x86_ops.tlb_remote_flush_with_range;
@@ -3661,7 +3675,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
int quadrant, i, r;
hpa_t root;
- root_pgd = mmu->get_guest_pgd(vcpu);
+ root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu);
root_gfn = root_pgd >> PAGE_SHIFT;
if (mmu_check_root(vcpu, root_gfn))
@@ -4112,7 +4126,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
arch.token = alloc_apf_token(vcpu);
arch.gfn = gfn;
arch.direct_map = vcpu->arch.mmu->root_role.direct;
- arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu);
+ arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu);
return kvm_setup_async_pf(vcpu, cr2_or_gpa,
kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
@@ -4131,7 +4145,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
return;
if (!vcpu->arch.mmu->root_role.direct &&
- work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu))
+ work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu))
return;
kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
@@ -4488,11 +4502,6 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
}
EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
-static unsigned long get_cr3(struct kvm_vcpu *vcpu)
-{
- return kvm_read_cr3(vcpu);
-}
-
static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
unsigned int access)
{
@@ -5043,7 +5052,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu,
context->page_fault = kvm_tdp_page_fault;
context->sync_page = nonpaging_sync_page;
context->invlpg = NULL;
- context->get_guest_pgd = get_cr3;
+ context->get_guest_pgd = get_guest_cr3;
context->get_pdptr = kvm_pdptr_read;
context->inject_page_fault = kvm_inject_page_fault;
@@ -5193,7 +5202,7 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu,
kvm_init_shadow_mmu(vcpu, cpu_role);
- context->get_guest_pgd = get_cr3;
+ context->get_guest_pgd = get_guest_cr3;
context->get_pdptr = kvm_pdptr_read;
context->inject_page_fault = kvm_inject_page_fault;
}
@@ -5207,7 +5216,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu,
return;
g_context->cpu_role.as_u64 = new_mode.as_u64;
- g_context->get_guest_pgd = get_cr3;
+ g_context->get_guest_pgd = get_guest_cr3;
g_context->get_pdptr = kvm_pdptr_read;
g_context->inject_page_fault = kvm_inject_page_fault;
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 5ab5f94dcb6f..1f4f5e703f13 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -324,7 +324,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
trace_kvm_mmu_pagetable_walk(addr, access);
retry_walk:
walker->level = mmu->cpu_role.base.level;
- pte = mmu->get_guest_pgd(vcpu);
+ pte = kvm_mmu_get_guest_pgd(vcpu, mmu);
have_ad = PT_HAVE_ACCESSED_DIRTY(mmu);
#if PTTYPE == 64
--
2.39.2
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 6.1 2/5] KVM: x86: Do not unload MMU roots when only toggling CR0.WP with TDP enabled
2023-05-08 15:45 [PATCH 6.1 0/5] KVM CR0.WP series backport Mathias Krause
2023-05-08 15:45 ` [PATCH 6.1 1/5] KVM: x86/mmu: Avoid indirect call for get_cr3 Mathias Krause
@ 2023-05-08 15:45 ` Mathias Krause
2023-05-08 15:46 ` [PATCH 6.1 3/5] KVM: x86: Make use of kvm_read_cr*_bits() when testing bits Mathias Krause
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Mathias Krause @ 2023-05-08 15:45 UTC (permalink / raw)
To: stable; +Cc: Paolo Bonzini, Sean Christopherson, kvm, Mathias Krause
[ Upstream commit 01b31714bd90be2784f7145bf93b7f78f3d081e1 ]
There is no need to unload the MMU roots with TDP enabled when only
CR0.WP has changed -- the paging structures are still valid, only the
permission bitmap needs to be updated.
One heavy user of toggling CR0.WP is grsecurity's KERNEXEC feature to
implement kernel W^X.
The optimization brings a huge performance gain for this case as the
following micro-benchmark running 'ssdd 10 50000' from rt-tests[1] on a
grsecurity L1 VM shows (runtime in seconds, lower is better):
legacy TDP shadow
kvm-x86/next@d8708b 8.43s 9.45s 70.3s
+patch 5.39s 5.63s 70.2s
For legacy MMU this is ~36% faster, for TDP MMU even ~40% faster. Also
TDP and legacy MMU now both have a similar runtime which vanishes the
need to disable TDP MMU for grsecurity.
Shadow MMU sees no measurable difference and is still slow, as expected.
[1] https://git.kernel.org/pub/scm/utils/rt-tests/rt-tests.git
Signed-off-by: Mathias Krause <minipli@grsecurity.net>
Link: https://lore.kernel.org/r/20230322013731.102955-3-minipli@grsecurity.net
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Mathias Krause <minipli@grsecurity.net>
---
arch/x86/kvm/x86.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ab09d292bded..496bb9a58273 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -910,6 +910,18 @@ EXPORT_SYMBOL_GPL(load_pdptrs);
void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
{
+ /*
+ * CR0.WP is incorporated into the MMU role, but only for non-nested,
+ * indirect shadow MMUs. If TDP is enabled, the MMU's metadata needs
+ * to be updated, e.g. so that emulating guest translations does the
+ * right thing, but there's no need to unload the root as CR0.WP
+ * doesn't affect SPTEs.
+ */
+ if (tdp_enabled && (cr0 ^ old_cr0) == X86_CR0_WP) {
+ kvm_init_mmu(vcpu);
+ return;
+ }
+
if ((cr0 ^ old_cr0) & X86_CR0_PG) {
kvm_clear_async_pf_completion_queue(vcpu);
kvm_async_pf_hash_reset(vcpu);
--
2.39.2
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 6.1 3/5] KVM: x86: Make use of kvm_read_cr*_bits() when testing bits
2023-05-08 15:45 [PATCH 6.1 0/5] KVM CR0.WP series backport Mathias Krause
2023-05-08 15:45 ` [PATCH 6.1 1/5] KVM: x86/mmu: Avoid indirect call for get_cr3 Mathias Krause
2023-05-08 15:45 ` [PATCH 6.1 2/5] KVM: x86: Do not unload MMU roots when only toggling CR0.WP with TDP enabled Mathias Krause
@ 2023-05-08 15:46 ` Mathias Krause
2023-05-08 15:46 ` [PATCH 6.1 4/5] KVM: VMX: Make CR0.WP a guest owned bit Mathias Krause
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Mathias Krause @ 2023-05-08 15:46 UTC (permalink / raw)
To: stable; +Cc: Paolo Bonzini, Sean Christopherson, kvm, Mathias Krause
[ Upstream commit 74cdc836919bf34684ef66f995273f35e2189daf ]
Make use of the kvm_read_cr{0,4}_bits() helper functions when we only
want to know the state of certain bits instead of the whole register.
This not only makes the intent cleaner, it also avoids a potential
VMREAD in case the tested bits aren't guest owned.
Signed-off-by: Mathias Krause <minipli@grsecurity.net>
Link: https://lore.kernel.org/r/20230322013731.102955-5-minipli@grsecurity.net
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Mathias Krause <minipli@grsecurity.net>
---
arch/x86/kvm/pmu.c | 4 ++--
arch/x86/kvm/vmx/vmx.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index de1fd7369736..20cd746cf467 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -418,9 +418,9 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
if (!pmc)
return 1;
- if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) &&
+ if (!(kvm_read_cr4_bits(vcpu, X86_CR4_PCE)) &&
(static_call(kvm_x86_get_cpl)(vcpu) != 0) &&
- (kvm_read_cr0(vcpu) & X86_CR0_PE))
+ (kvm_read_cr0_bits(vcpu, X86_CR0_PE)))
return 1;
*data = pmc_read_counter(pmc) & mask;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index bc868958e91f..a5009b66df9a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5417,7 +5417,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
break;
case 3: /* lmsw */
val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
- trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
+ trace_kvm_cr_write(0, (kvm_read_cr0_bits(vcpu, ~0xful) | val));
kvm_lmsw(vcpu, val);
return kvm_skip_emulated_instruction(vcpu);
@@ -7496,7 +7496,7 @@ static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
- if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
+ if (kvm_read_cr0_bits(vcpu, X86_CR0_CD)) {
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
cache = MTRR_TYPE_WRBACK;
else
--
2.39.2
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 6.1 4/5] KVM: VMX: Make CR0.WP a guest owned bit
2023-05-08 15:45 [PATCH 6.1 0/5] KVM CR0.WP series backport Mathias Krause
` (2 preceding siblings ...)
2023-05-08 15:46 ` [PATCH 6.1 3/5] KVM: x86: Make use of kvm_read_cr*_bits() when testing bits Mathias Krause
@ 2023-05-08 15:46 ` Mathias Krause
2023-05-08 15:46 ` [PATCH 6.1 5/5] KVM: x86/mmu: Refresh CR0.WP prior to checking for emulated permission faults Mathias Krause
2023-05-11 21:15 ` [PATCH 6.1 0/5] KVM CR0.WP series backport Sean Christopherson
5 siblings, 0 replies; 7+ messages in thread
From: Mathias Krause @ 2023-05-08 15:46 UTC (permalink / raw)
To: stable; +Cc: Paolo Bonzini, Sean Christopherson, kvm, Mathias Krause
[ Upstream commit fb509f76acc8d42bed11bca308404f81c2be856a ]
Guests like grsecurity that make heavy use of CR0.WP to implement kernel
level W^X will suffer from the implied VMEXITs.
With EPT there is no need to intercept a guest change of CR0.WP, so
simply make it a guest owned bit if we can do so.
This implies that a read of a guest's CR0.WP bit might need a VMREAD.
However, the only potentially affected user seems to be kvm_init_mmu()
which is a heavy operation to begin with. But also most callers already
cache the full value of CR0 anyway, so no additional VMREAD is needed.
The only exception is nested_vmx_load_cr3().
This change is VMX-specific, as SVM has no such fine grained control
register intercept control.
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Mathias Krause <minipli@grsecurity.net>
Link: https://lore.kernel.org/r/20230322013731.102955-7-minipli@grsecurity.net
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Mathias Krause <minipli@grsecurity.net> # backport to v6.1.x
---
arch/x86/kvm/kvm_cache_regs.h | 2 +-
arch/x86/kvm/vmx/nested.c | 4 ++--
arch/x86/kvm/vmx/vmx.c | 2 +-
arch/x86/kvm/vmx/vmx.h | 18 ++++++++++++++++++
4 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 3febc342360c..896cc7394944 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -4,7 +4,7 @@
#include <linux/kvm_host.h>
-#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
+#define KVM_POSSIBLE_CR0_GUEST_BITS (X86_CR0_TS | X86_CR0_WP)
#define KVM_POSSIBLE_CR4_GUEST_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE)
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 1d00f7824da1..d5b1ccd2f362 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4455,7 +4455,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* CR0_GUEST_HOST_MASK is already set in the original vmcs01
* (KVM doesn't change it);
*/
- vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
+ vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
vmx_set_cr0(vcpu, vmcs12->host_cr0);
/* Same as above - no reason to call set_cr4_guest_host_mask(). */
@@ -4606,7 +4606,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
*/
vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
- vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
+ vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a5009b66df9a..29d236a65ed5 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4695,7 +4695,7 @@ static void init_vmcs(struct vcpu_vmx *vmx)
/* 22.2.1, 20.8.1 */
vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
- vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
+ vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits);
set_cr4_guest_host_mask(vmx);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index a3da84f4ea45..e2b04f4c0fef 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -640,6 +640,24 @@ BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64)
(1 << VCPU_EXREG_EXIT_INFO_1) | \
(1 << VCPU_EXREG_EXIT_INFO_2))
+static inline unsigned long vmx_l1_guest_owned_cr0_bits(void)
+{
+ unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS;
+
+ /*
+ * CR0.WP needs to be intercepted when KVM is shadowing legacy paging
+ * in order to construct shadow PTEs with the correct protections.
+ * Note! CR0.WP technically can be passed through to the guest if
+ * paging is disabled, but checking CR0.PG would generate a cyclical
+ * dependency of sorts due to forcing the caller to ensure CR0 holds
+ * the correct value prior to determining which CR0 bits can be owned
+ * by L1. Keep it simple and limit the optimization to EPT.
+ */
+ if (!enable_ept)
+ bits &= ~X86_CR0_WP;
+ return bits;
+}
+
static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
{
return container_of(kvm, struct kvm_vmx, kvm);
--
2.39.2
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 6.1 5/5] KVM: x86/mmu: Refresh CR0.WP prior to checking for emulated permission faults
2023-05-08 15:45 [PATCH 6.1 0/5] KVM CR0.WP series backport Mathias Krause
` (3 preceding siblings ...)
2023-05-08 15:46 ` [PATCH 6.1 4/5] KVM: VMX: Make CR0.WP a guest owned bit Mathias Krause
@ 2023-05-08 15:46 ` Mathias Krause
2023-05-11 21:15 ` [PATCH 6.1 0/5] KVM CR0.WP series backport Sean Christopherson
5 siblings, 0 replies; 7+ messages in thread
From: Mathias Krause @ 2023-05-08 15:46 UTC (permalink / raw)
To: stable; +Cc: Paolo Bonzini, Sean Christopherson, kvm, Mathias Krause
From: Sean Christopherson <seanjc@google.com>
[ Upstream commit cf9f4c0eb1699d306e348b1fd0225af7b2c282d3 ]
Refresh the MMU's snapshot of the vCPU's CR0.WP prior to checking for
permission faults when emulating a guest memory access and CR0.WP may be
guest owned. If the guest toggles only CR0.WP and triggers emulation of
a supervisor write, e.g. when KVM is emulating UMIP, KVM may consume a
stale CR0.WP, i.e. use stale protection bits metadata.
Note, KVM passes through CR0.WP if and only if EPT is enabled as CR0.WP
is part of the MMU role for legacy shadow paging, and SVM (NPT) doesn't
support per-bit interception controls for CR0. Don't bother checking for
EPT vs. NPT as the "old == new" check will always be true under NPT, i.e.
the only cost is the read of vcpu->arch.cr4 (SVM unconditionally grabs CR0
from the VMCB on VM-Exit).
Reported-by: Mathias Krause <minipli@grsecurity.net>
Link: https://lkml.kernel.org/r/677169b4-051f-fcae-756b-9a3e1bb9f8fe%40grsecurity.net
Fixes: fb509f76acc8 ("KVM: VMX: Make CR0.WP a guest owned bit")
Tested-by: Mathias Krause <minipli@grsecurity.net>
Link: https://lore.kernel.org/r/20230405002608.418442-1-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Mathias Krause <minipli@grsecurity.net> # backport to v6.1.x
---
arch/x86/kvm/mmu.h | 26 +++++++++++++++++++++++++-
arch/x86/kvm/mmu/mmu.c | 15 +++++++++++++++
2 files changed, 40 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 6bdaacb6faa0..59804be91b5b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -113,6 +113,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len);
+void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *mmu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
@@ -153,6 +155,24 @@ static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
vcpu->arch.mmu->root_role.level);
}
+static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *mmu)
+{
+ /*
+ * When EPT is enabled, KVM may passthrough CR0.WP to the guest, i.e.
+ * @mmu's snapshot of CR0.WP and thus all related paging metadata may
+ * be stale. Refresh CR0.WP and the metadata on-demand when checking
+ * for permission faults. Exempt nested MMUs, i.e. MMUs for shadowing
+ * nEPT and nNPT, as CR0.WP is ignored in both cases. Note, KVM does
+ * need to refresh nested_mmu, a.k.a. the walker used to translate L2
+ * GVAs to GPAs, as that "MMU" needs to honor L2's CR0.WP.
+ */
+ if (!tdp_enabled || mmu == &vcpu->arch.guest_mmu)
+ return;
+
+ __kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
+}
+
/*
* Check if a given access (described through the I/D, W/R and U/S bits of a
* page fault error code pfec) causes a permission fault with the given PTE
@@ -184,8 +204,12 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
u64 implicit_access = access & PFERR_IMPLICIT_ACCESS;
bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC;
int index = (pfec + (not_smap << PFERR_RSVD_BIT)) >> 1;
- bool fault = (mmu->permissions[index] >> pte_access) & 1;
u32 errcode = PFERR_PRESENT_MASK;
+ bool fault;
+
+ kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
+
+ fault = (mmu->permissions[index] >> pte_access) & 1;
WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
if (unlikely(mmu->pkru_mask)) {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f2a10c7d1369..230108a90cf3 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5005,6 +5005,21 @@ kvm_calc_cpu_role(struct kvm_vcpu *vcpu, const struct kvm_mmu_role_regs *regs)
return role;
}
+void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *mmu)
+{
+ const bool cr0_wp = !!kvm_read_cr0_bits(vcpu, X86_CR0_WP);
+
+ BUILD_BUG_ON((KVM_MMU_CR0_ROLE_BITS & KVM_POSSIBLE_CR0_GUEST_BITS) != X86_CR0_WP);
+ BUILD_BUG_ON((KVM_MMU_CR4_ROLE_BITS & KVM_POSSIBLE_CR4_GUEST_BITS));
+
+ if (is_cr0_wp(mmu) == cr0_wp)
+ return;
+
+ mmu->cpu_role.base.cr0_wp = cr0_wp;
+ reset_guest_paging_metadata(vcpu, mmu);
+}
+
static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
{
/* tdp_root_level is architecture forced level, use it if nonzero */
--
2.39.2
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 6.1 0/5] KVM CR0.WP series backport
2023-05-08 15:45 [PATCH 6.1 0/5] KVM CR0.WP series backport Mathias Krause
` (4 preceding siblings ...)
2023-05-08 15:46 ` [PATCH 6.1 5/5] KVM: x86/mmu: Refresh CR0.WP prior to checking for emulated permission faults Mathias Krause
@ 2023-05-11 21:15 ` Sean Christopherson
5 siblings, 0 replies; 7+ messages in thread
From: Sean Christopherson @ 2023-05-11 21:15 UTC (permalink / raw)
To: Mathias Krause; +Cc: stable, Paolo Bonzini, kvm
On Mon, May 08, 2023, Mathias Krause wrote:
> This is a backport of the CR0.WP KVM series[1] to Linux v6.1, pretty
> much the same as for v6.2.
>
> I used 'ssdd 10 50000' from rt-tests[2] as a micro-benchmark, running on
> a grsecurity L1 VM. Below table shows the results (runtime in seconds,
> lower is better):
>
> legacy TDP shadow
> Linux v6.1.23 7.65s 8.23s 68.7s
> + patches 3.36s 3.36s 69.1s
>
> The KVM unit test suite showed no regressions.
>
> Please consider applying.
>
> Thanks,
> Mathias
>
> [1] https://lore.kernel.org/kvm/20230322013731.102955-1-minipli@grsecurity.net/
> [2] https://git.kernel.org/pub/scm/utils/rt-tests/rt-tests.git
>
>
> Mathias Krause (3):
> KVM: x86: Do not unload MMU roots when only toggling CR0.WP with TDP
> enabled
> KVM: x86: Make use of kvm_read_cr*_bits() when testing bits
> KVM: VMX: Make CR0.WP a guest owned bit
>
> Paolo Bonzini (1):
> KVM: x86/mmu: Avoid indirect call for get_cr3
>
> Sean Christopherson (1):
> KVM: x86/mmu: Refresh CR0.WP prior to checking for emulated permission
> faults
Acked-by: Sean Christopherson <seanjc@google.com>
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2023-05-11 21:16 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-05-08 15:45 [PATCH 6.1 0/5] KVM CR0.WP series backport Mathias Krause
2023-05-08 15:45 ` [PATCH 6.1 1/5] KVM: x86/mmu: Avoid indirect call for get_cr3 Mathias Krause
2023-05-08 15:45 ` [PATCH 6.1 2/5] KVM: x86: Do not unload MMU roots when only toggling CR0.WP with TDP enabled Mathias Krause
2023-05-08 15:46 ` [PATCH 6.1 3/5] KVM: x86: Make use of kvm_read_cr*_bits() when testing bits Mathias Krause
2023-05-08 15:46 ` [PATCH 6.1 4/5] KVM: VMX: Make CR0.WP a guest owned bit Mathias Krause
2023-05-08 15:46 ` [PATCH 6.1 5/5] KVM: x86/mmu: Refresh CR0.WP prior to checking for emulated permission faults Mathias Krause
2023-05-11 21:15 ` [PATCH 6.1 0/5] KVM CR0.WP series backport Sean Christopherson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).