[PATCH RFC 3/7] kvm: x86: XSAVE state and XFD MSRs context switch

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Jing Liu <jing2.liu@linux.intel.com>
To: pbonzini@redhat.com, seanjc@google.com, kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, jing2.liu@intel.com
Subject: [PATCH RFC 3/7] kvm: x86: XSAVE state and XFD MSRs context switch
Date: Sun,  7 Feb 2021 10:42:52 -0500	[thread overview]
Message-ID: <20210207154256.52850-4-jing2.liu@linux.intel.com> (raw)
In-Reply-To: <20210207154256.52850-1-jing2.liu@linux.intel.com>

XFD allows the kernel to enable a feature state in XCR0 and to
receive a #NM trap when a task uses instructions accessing that state.
Kernel defines "struct fpu.state_mask" to indicate the saved xstate and
interact with the XFD hardware when needed via a simple conversion.
Once a dynamic feature is detected, "state_mask" is expanded and
"state_ptr" is dynamically allocated to hold the whole state. Meanwhile
once the state is not in INIT state, the corresponding XFD bit should
not be armed anymore.

In KVM, "guest_fpu" serves for any guest task working on this vcpu
during vmexit and vmenter. We provide a pre-allocated guest_fpu space
and entire "guest_fpu.state_mask" to avoid each dynamic features
detection on each vcpu task. Meanwhile, to ensure correctly
xsaves/xrstors guest state, set IA32_XFD as zero during vmexit and
vmenter.

For "current->thread.fpu", since host and guest probably have different
state and mask, it also need be switched to the right context when fpu
load and put.

Signed-off-by: Jing Liu <jing2.liu@linux.intel.com>
---
 arch/x86/include/asm/kvm_host.h |  3 ++
 arch/x86/kernel/fpu/init.c      |  1 +
 arch/x86/kernel/fpu/xstate.c    |  2 +
 arch/x86/kvm/vmx/vmx.c          | 76 +++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/vmx.h          |  1 +
 arch/x86/kvm/x86.c              | 69 +++++++++++++++++++++++++-----
 6 files changed, 141 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7e5f33a0d0e2..6dedf3d22659 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1203,6 +1203,9 @@ struct kvm_x86_ops {
 			       struct x86_exception *exception);
 	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
 
+	void (*xfd_load)(struct kvm_vcpu *vcpu);
+	void (*xfd_put)(struct kvm_vcpu *vcpu);
+
 	void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
 
 	void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 7e0c68043ce3..fbb761fc13ec 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -145,6 +145,7 @@ EXPORT_SYMBOL_GPL(fpu_kernel_xstate_min_size);
  * can be dynamically expanded to include some states up to this size.
  */
 unsigned int fpu_kernel_xstate_max_size;
+EXPORT_SYMBOL_GPL(fpu_kernel_xstate_max_size);
 
 /* Get alignment of the TYPE. */
 #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 080f3be9a5e6..9c471a0364e2 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -77,12 +77,14 @@ static struct xfeature_capflag_info xfeature_capflags[] __initdata = {
  * XSAVE buffer, both supervisor and user xstates.
  */
 u64 xfeatures_mask_all __read_mostly;
+EXPORT_SYMBOL_GPL(xfeatures_mask_all);
 
 /*
  * This represents user xstates, a subset of xfeatures_mask_all, saved in a
  * dynamic kernel XSAVE buffer.
  */
 u64 xfeatures_mask_user_dynamic __read_mostly;
+EXPORT_SYMBOL_GPL(xfeatures_mask_user_dynamic);
 
 static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
 static unsigned int xstate_sizes[XFEATURE_MAX]   = { [ 0 ... XFEATURE_MAX - 1] = -1};
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7fa54e78c45c..be3cc0f3ec6d 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1167,6 +1167,75 @@ static void pt_guest_exit(struct vcpu_vmx *vmx)
 	wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
 }
 
+static void vmx_xfd_load(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (guest_cpuid_has(vcpu, X86_FEATURE_XFD)) {
+		vmx->host_ia32_xfd = xfirstuse_not_detected(vcpu->arch.user_fpu);
+		/*
+		 * Keep IA32_XFD as zero in hypervisor.
+		 * Guest non-zero IA32_XFD is restored until kvm_x86_ops.run
+		 */
+		if (vmx->host_ia32_xfd)
+			wrmsrl(MSR_IA32_XFD, 0);
+	}
+}
+
+static void vmx_xfd_put(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (guest_cpuid_has(vcpu, X86_FEATURE_XFD)) {
+		/* IA32_XFD register is kept as zero in hypervisor. */
+		if (vmx->host_ia32_xfd)
+			wrmsrl(MSR_IA32_XFD, vmx->host_ia32_xfd);
+		/* User (qemu) IA32_XFD_ERR should be zero. */
+		if (vmx->msr_ia32_xfd_err)
+			wrmsrl(MSR_IA32_XFD_ERR, 0);
+	}
+}
+
+/* Load guest XFD MSRs before entering. */
+static void xfd_guest_enter(struct vcpu_vmx *vmx)
+{
+	if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_XFD)) {
+		if (vmx->msr_ia32_xfd)
+			wrmsrl(MSR_IA32_XFD, vmx->msr_ia32_xfd);
+		/*
+		 * We do not rdmsr here since in most cases
+		 * IA32_XFD_ERR is zero. One rare exception is that,
+		 * this vmenter follows a vmexit with non-zero
+		 * MSR_IA32_XFD_ERR and it doesn't change during
+		 * this interval.
+		 *
+		 * So just simply load the non-zero guest value.
+		 */
+		if (vmx->msr_ia32_xfd_err)
+			wrmsrl(MSR_IA32_XFD_ERR, vmx->msr_ia32_xfd_err);
+	}
+}
+
+/*
+ * Save guest XFD MSRs once vmexit since the registers may be changed
+ * when control is transferred out of KVM, e.g. preemption.
+ */
+static void xfd_guest_exit(struct vcpu_vmx *vmx)
+{
+	if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_XFD)) {
+		rdmsrl(MSR_IA32_XFD, vmx->msr_ia32_xfd);
+		rdmsrl(MSR_IA32_XFD_ERR, vmx->msr_ia32_xfd_err);
+		/*
+		 * Clear the MSR_IA32_XFD to ensure correctly protect guest
+		 * fpu context in hypervisor.
+		 * No need to reset MSR_IA32_XFD_ERR in hypervisor since it
+		 * has no impact on others.
+		 */
+		if (vmx->msr_ia32_xfd)
+			wrmsrl(MSR_IA32_XFD, 0);
+	}
+}
+
 void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
 			unsigned long fs_base, unsigned long gs_base)
 {
@@ -6735,6 +6804,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	kvm_load_guest_xsave_state(vcpu);
 
+	xfd_guest_enter(vmx);
+
 	pt_guest_enter(vmx);
 
 	atomic_switch_perf_msrs(vmx);
@@ -6804,6 +6875,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	pt_guest_exit(vmx);
 
+	xfd_guest_exit(vmx);
+
 	kvm_load_host_xsave_state(vcpu);
 
 	vmx->nested.nested_run_pending = 0;
@@ -7644,6 +7717,9 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.vcpu_load = vmx_vcpu_load,
 	.vcpu_put = vmx_vcpu_put,
 
+	.xfd_load = vmx_xfd_load,
+	.xfd_put = vmx_xfd_put,
+
 	.update_exception_bitmap = update_exception_bitmap,
 	.get_msr_feature = vmx_get_msr_feature,
 	.get_msr = vmx_get_msr,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index d487f5a53a08..9a9ea37a29b1 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -288,6 +288,7 @@ struct vcpu_vmx {
 	} shadow_msr_intercept;
 
 	/* eXtended Feature Disabling (XFD) MSRs */
+	u64 host_ia32_xfd;
 	u64 msr_ia32_xfd;
 	u64 msr_ia32_xfd_err;
 };
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9ca8b1e58afa..15908bc65d1c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9220,22 +9220,44 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 
 static void kvm_save_current_fpu(struct fpu *fpu)
 {
-	struct fpu *src_fpu = &current->thread.fpu;
+	struct fpu *cur_fpu = &current->thread.fpu;
 
+	fpu->state_ptr = cur_fpu->state_ptr;
+	fpu->state_mask = cur_fpu->state_mask;
 	/*
 	 * If the target FPU state is not resident in the CPU registers, just
 	 * memcpy() from current, else save CPU state directly to the target.
 	 */
 	if (test_thread_flag(TIF_NEED_FPU_LOAD)) {
-		memcpy(&fpu->state, &src_fpu->state,
-		       fpu_kernel_xstate_min_size);
+		/*
+		 * No need to copy if dynamic feature is used, because
+		 * they just simply point to the same recent state.
+		 */
+		if (!cur_fpu->state_ptr)
+			memcpy(&fpu->state, &cur_fpu->state,
+			       fpu_kernel_xstate_min_size);
 	} else {
-		if (fpu->state_mask != src_fpu->state_mask)
-			fpu->state_mask = src_fpu->state_mask;
 		copy_fpregs_to_fpstate(fpu);
 	}
 }
 
+/*
+ * Swap fpu context to next fpu role.
+ *
+ * "current" fpu acts two roles: user contexts and guest contexts.
+ * Swap "current" fpu to next role to ensure correctly handle
+ * dynamic state buffers, e.g. in preemption case.
+ */
+static void kvm_load_next_fpu(struct fpu *next_fpu, u64 mask)
+{
+	struct fpu *cur_fpu = &current->thread.fpu;
+
+	cur_fpu->state_ptr = next_fpu->state_ptr;
+	cur_fpu->state_mask = next_fpu->state_mask;
+
+	__copy_kernel_to_fpregs(__xstate(next_fpu), mask);
+}
+
 /* Swap (qemu) user FPU context for the guest FPU context. */
 static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 {
@@ -9243,9 +9265,11 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
 	kvm_save_current_fpu(vcpu->arch.user_fpu);
 
+	if (static_cpu_has(X86_FEATURE_XFD) && kvm_x86_ops.xfd_load)
+		kvm_x86_ops.xfd_load(vcpu);
+
 	/* PKRU is separately restored in kvm_x86_ops.run.  */
-	__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
-				~XFEATURE_MASK_PKRU);
+	kvm_load_next_fpu(vcpu->arch.guest_fpu, ~XFEATURE_MASK_PKRU);
 
 	fpregs_mark_activate();
 	fpregs_unlock();
@@ -9260,7 +9284,10 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 
 	kvm_save_current_fpu(vcpu->arch.guest_fpu);
 
-	copy_kernel_to_fpregs(vcpu->arch.user_fpu);
+	if (static_cpu_has(X86_FEATURE_XFD) && kvm_x86_ops.xfd_put)
+		kvm_x86_ops.xfd_put(vcpu);
+
+	kvm_load_next_fpu(vcpu->arch.user_fpu, -1);
 
 	fpregs_mark_activate();
 	fpregs_unlock();
@@ -9840,11 +9867,13 @@ static int sync_regs(struct kvm_vcpu *vcpu)
 
 static void fx_init(struct kvm_vcpu *vcpu)
 {
+	struct xregs_state *xsave;
+
+	xsave = __xsave(vcpu->arch.guest_fpu);
 	fpstate_init(vcpu->arch.guest_fpu);
 	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
+		xsave->header.xcomp_bv =
 			host_xcr0 | XSTATE_COMPACTION_ENABLED;
-
 	/*
 	 * Ensure guest xcr0 is valid for loading
 	 */
@@ -9920,6 +9949,21 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 		pr_err("kvm: failed to allocate vcpu's fpu\n");
 		goto free_user_fpu;
 	}
+
+	vcpu->arch.guest_fpu->state_mask = xfeatures_mask_all &
+				~xfeatures_mask_user_dynamic;
+
+	/* If have dynamic features, initialize full context. */
+	if (xfeatures_mask_user_dynamic) {
+		vcpu->arch.guest_fpu->state_ptr =
+			kmalloc(fpu_kernel_xstate_max_size, GFP_KERNEL);
+		if (!vcpu->arch.guest_fpu->state_ptr)
+			goto free_guest_fpu;
+
+		vcpu->arch.guest_fpu->state_mask |=
+			xfeatures_mask_user_dynamic;
+	}
+
 	fx_init(vcpu);
 
 	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
@@ -9936,7 +9980,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 
 	r = kvm_x86_ops.vcpu_create(vcpu);
 	if (r)
-		goto free_guest_fpu;
+		goto free_guest_fpu_exp;
 
 	vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
 	vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
@@ -9947,6 +9991,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	vcpu_put(vcpu);
 	return 0;
 
+free_guest_fpu_exp:
+	kfree(vcpu->arch.guest_fpu->state_ptr);
 free_guest_fpu:
 	kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
 free_user_fpu:
@@ -10002,6 +10048,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
 	free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
 	kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
+	kfree(vcpu->arch.guest_fpu->state_ptr);
 	kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
 
 	kvm_hv_vcpu_uninit(vcpu);
-- 
2.18.4

next prev parent reply	other threads:[~2021-02-07  6:58 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-07 15:42 [PATCH RFC 0/7] Introduce support for guest AMX feature Jing Liu
2021-02-07 15:42 ` [PATCH RFC 1/7] kvm: x86: Expose XFD CPUID to guest Jing Liu
2021-05-24 21:34   ` Sean Christopherson
2021-06-07  3:27     ` Liu, Jing2
2021-02-07 15:42 ` [PATCH RFC 2/7] kvm: x86: Introduce XFD MSRs as passthrough " Jing Liu
2021-05-24 21:43   ` Sean Christopherson
2021-05-24 21:57     ` Jim Mattson
2021-06-02  3:12     ` Liu, Jing2
2021-06-23 17:50     ` Dave Hansen
2021-06-28  2:00       ` Liu, Jing2
2021-06-29 17:58         ` Dave Hansen
2021-07-06  7:33           ` Liu, Jing2
2021-02-07 15:42 ` Jing Liu [this message]
2021-02-07 11:49   ` [PATCH RFC 3/7] kvm: x86: XSAVE state and XFD MSRs context switch Borislav Petkov
2021-02-08  3:35     ` Liu, Jing2
2021-02-08 10:25   ` Paolo Bonzini
2021-02-08 17:31     ` Sean Christopherson
2021-02-08 17:45       ` Paolo Bonzini
2021-02-08 18:04         ` Sean Christopherson
2021-02-08 18:12           ` Paolo Bonzini
2021-02-08 18:55             ` Konrad Rzeszutek Wilk
2021-02-22  8:51               ` Liu, Jing2
2021-02-22  8:36             ` Liu, Jing2
2021-02-07 15:42 ` [PATCH RFC 4/7] kvm: x86: Add new ioctls for XSAVE extension Jing Liu
2021-05-24 21:50   ` Sean Christopherson
2021-05-26  6:09     ` Liu, Jing2
2021-05-26 14:43       ` Sean Christopherson
2021-06-01 10:24         ` Liu, Jing2
2021-06-07  5:23           ` Liu, Jing2
2021-05-24 22:06   ` Jim Mattson
2021-05-26  6:11     ` Liu, Jing2
2021-02-07 15:42 ` [PATCH RFC 5/7] kvm: x86: Revise CPUID.D.1.EBX for alignment rule Jing Liu
2021-05-24 21:28   ` Sean Christopherson
2021-06-03  4:45     ` Liu, Jing2
2021-02-07 15:42 ` [PATCH RFC 6/7] kvm: x86: Add AMX_TILE, AMX_INT8 and AMX_BF16 support Jing Liu
2021-02-07 15:42 ` [PATCH RFC 7/7] kvm: x86: AMX XCR0 support for guest Jing Liu
2021-05-24 21:53   ` Sean Christopherson
2021-05-26  7:54     ` Liu, Jing2
2021-05-26 14:54       ` Sean Christopherson

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:7e5f33a0d0e dfblob:6dedf3d2265 dfblob:7e0c68043ce
dfblob:fbb761fc13e dfblob:080f3be9a5e dfblob:9c471a0364e
dfblob:7fa54e78c45 dfblob:be3cc0f3ec6 dfblob:d487f5a53a0
dfblob:9a9ea37a29b dfblob:9ca8b1e58af dfblob:15908bc65d1 )
 OR (
bs:"[PATCH RFC 3/7] kvm: x86: XSAVE state and XFD MSRs context switch" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210207154256.52850-4-jing2.liu@linux.intel.com \
    --to=jing2.liu@linux.intel.com \
    --cc=jing2.liu@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.