All of lore.kernel.org
 help / color / mirror / Atom feed
From: Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
To: kvm-devel
	<kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org>,
	Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
Subject: [PATCH] Lazy FPU save/restore for VT
Date: Thu, 26 Apr 2007 10:49:50 -0500	[thread overview]
Message-ID: <4630CA1E.2000808@us.ibm.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 954 bytes --]

Howdy,

Attached patch implements lazy-fpu save/restore for VT.  VMEXIT time 
improves by about 10% (~550 cycles).  I had to do a couple more things 
to get it working than I had to do with SVM.  I changed the CR0 host 
mask to be all 1's so that any attempt to write to CR0 causes a VMEXIT.  
I don't think there are any remaining bits now that we want to trap TS 
that are safe for the guest to access and are in the fast-paths.

Since we're trapping TS, I had to implement CLTS exit handling.  
vcpu->cr0 also had a rather bizarre life cycle.  After a set_cr0, it was 
a proper shadow of the guest's CR0.  However, after a decache_cr0, it 
would contain the host's version of the bits covered by the CR0 host 
mask so it was no longer a proper shadow.

I got rid of the CR0 caching and made vcpu->cr0 always be equivalent to 
CR0_READ_SHADOW.  Once these changes were made, the rest of the patch 
was much like the SVM one.

Regards,

Anthony Liguori

[-- Attachment #2: vt-lazy-fpu.diff --]
[-- Type: text/x-patch, Size: 7245 bytes --]

From: Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
Subject: [PATCH] Lazy FPU save/restore for VT

  Lazy FPU save/restore for VT

Signed-off-by: Anthony Liguori <aliguori-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>

Index: kernel/drivers/kvm/vmx.c
===================================================================
--- kernel.orig/drivers/kvm/vmx.c	2007-04-26 08:53:54.584362024 -0500
+++ kernel/drivers/kvm/vmx.c	2007-04-26 10:34:49.693845464 -0500
@@ -216,6 +216,16 @@
 #endif
 }
 
+static void vmcs_clear_bits(unsigned long field, u32 mask)
+{
+	vmcs_writel(field, vmcs_readl(field) & ~mask);
+}
+
+static void vmcs_set_bits(unsigned long field, u32 mask)
+{
+	vmcs_writel(field, vmcs_readl(field) | mask);
+}
+
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -810,11 +820,8 @@
 
 #endif
 
-static void vmx_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu)
+static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
 {
-	vcpu->cr0 &= KVM_GUEST_CR0_MASK;
-	vcpu->cr0 |= vmcs_readl(GUEST_CR0) & ~KVM_GUEST_CR0_MASK;
-
 	vcpu->cr4 &= KVM_GUEST_CR4_MASK;
 	vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;
 }
@@ -836,6 +843,11 @@
 	}
 #endif
 
+	if (!(cr0 & CR0_TS_MASK)) {
+		vcpu->fpu_active = 1;
+		vmcs_clear_bits(EXCEPTION_BITMAP, CR0_TS_MASK);
+	}
+
 	vmcs_writel(CR0_READ_SHADOW, cr0);
 	vmcs_writel(GUEST_CR0,
 		    (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
@@ -845,6 +857,12 @@
 static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
 	vmcs_writel(GUEST_CR3, cr3);
+
+	if (!(vcpu->cr0 & CR0_TS_MASK)) {
+		vcpu->fpu_active = 0;
+		vmcs_set_bits(GUEST_CR0, CR0_TS_MASK);
+		vmcs_set_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR);
+	}
 }
 
 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -1205,7 +1223,7 @@
 	vmcs_writel(TPR_THRESHOLD, 0);
 #endif
 
-	vmcs_writel(CR0_GUEST_HOST_MASK, KVM_GUEST_CR0_MASK);
+	vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
 	vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
 
 	vcpu->cr0 = 0x60000010;
@@ -1367,10 +1385,18 @@
 		set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
 	}
 
-	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
+	switch (intr_info & INTR_INFO_VECTOR_MASK) {
+	case NMI_VECTOR:
 		asm ("int $2");
 		return 1;
+	case NM_VECTOR:
+		vcpu->fpu_active = 1;
+		vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR);
+		if (!(vcpu->cr0 & CR0_TS_MASK))
+			vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK);
+		return 1;
 	}
+
 	error_code = 0;
 	rip = vmcs_readl(GUEST_RIP);
 	if (intr_info & INTR_INFO_DELIEVER_CODE_MASK)
@@ -1557,6 +1583,15 @@
 			return 1;
 		};
 		break;
+	case 2: /* clts */
+		vcpu_load_rsp_rip(vcpu);
+		vcpu->fpu_active = 1;
+		vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR);
+		vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK);
+		vcpu->cr0 &= ~CR0_TS_MASK;
+		vmcs_writel(CR0_READ_SHADOW, vcpu->cr0);
+		skip_emulated_instruction(vcpu);
+		return 1;
 	case 1: /*mov from cr*/
 		switch (cr) {
 		case 3:
@@ -1804,8 +1839,10 @@
 	if (vcpu->guest_debug.enabled)
 		kvm_guest_debug_pre(vcpu);
 
-	fx_save(vcpu->host_fx_image);
-	fx_restore(vcpu->guest_fx_image);
+	if (vcpu->fpu_active) {
+		fx_save(vcpu->host_fx_image);
+		fx_restore(vcpu->guest_fx_image);
+	}
 
 #ifdef CONFIG_X86_64
 	if (is_long_mode(vcpu)) {
@@ -1963,8 +2000,11 @@
 	}
 #endif
 
-	fx_save(vcpu->guest_fx_image);
-	fx_restore(vcpu->host_fx_image);
+	if (vcpu->fpu_active) {
+		fx_save(vcpu->guest_fx_image);
+		fx_restore(vcpu->host_fx_image);
+	}
+
 	vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
 
 	asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
@@ -2076,6 +2116,7 @@
 	vmcs_clear(vmcs);
 	vcpu->vmcs = vmcs;
 	vcpu->launched = 0;
+	vcpu->fpu_active = 1;
 
 	return 0;
 
@@ -2112,7 +2153,7 @@
 	.get_segment = vmx_get_segment,
 	.set_segment = vmx_set_segment,
 	.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
-	.decache_cr0_cr4_guest_bits = vmx_decache_cr0_cr4_guest_bits,
+	.decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
 	.set_cr0 = vmx_set_cr0,
 	.set_cr3 = vmx_set_cr3,
 	.set_cr4 = vmx_set_cr4,
Index: kernel/drivers/kvm/kvm.h
===================================================================
--- kernel.orig/drivers/kvm/kvm.h	2007-04-26 10:17:29.931913304 -0500
+++ kernel/drivers/kvm/kvm.h	2007-04-26 10:36:00.420093440 -0500
@@ -63,6 +63,9 @@
 #define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN)
 
 #define DE_VECTOR 0
+#ifdef CONFIG_X86_64
+#define NMI_VECTOR 2
+#endif
 #define NM_VECTOR 7
 #define DF_VECTOR 8
 #define TS_VECTOR 10
@@ -397,7 +400,7 @@
 	void (*set_segment)(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
 	void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
-	void (*decache_cr0_cr4_guest_bits)(struct kvm_vcpu *vcpu);
+	void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
 	void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
 	void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
 	void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
Index: kernel/drivers/kvm/kvm_main.c
===================================================================
--- kernel.orig/drivers/kvm/kvm_main.c	2007-04-26 10:35:03.087809272 -0500
+++ kernel/drivers/kvm/kvm_main.c	2007-04-26 10:35:50.944533944 -0500
@@ -510,7 +510,6 @@
 
 void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 {
-	kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
 	set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
 }
 EXPORT_SYMBOL_GPL(lmsw);
@@ -1117,7 +1116,6 @@
 {
 	unsigned long cr0;
 
-	kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
 	cr0 = vcpu->cr0 & ~CR0_TS_MASK;
 	kvm_arch_ops->set_cr0(vcpu, cr0);
 	return X86EMUL_CONTINUE;
@@ -1318,7 +1316,7 @@
 
 unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
 {
-	kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
+	kvm_arch_ops->decache_cr4_guest_bits(vcpu);
 	switch (cr) {
 	case 0:
 		return vcpu->cr0;
@@ -1934,7 +1932,7 @@
 	sregs->gdt.limit = dt.limit;
 	sregs->gdt.base = dt.base;
 
-	kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
+	kvm_arch_ops->decache_cr4_guest_bits(vcpu);
 	sregs->cr0 = vcpu->cr0;
 	sregs->cr2 = vcpu->cr2;
 	sregs->cr3 = vcpu->cr3;
@@ -1985,7 +1983,7 @@
 #endif
 	vcpu->apic_base = sregs->apic_base;
 
-	kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
+	kvm_arch_ops->decache_cr4_guest_bits(vcpu);
 
 	mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
 	kvm_arch_ops->set_cr0(vcpu, sregs->cr0);
Index: kernel/drivers/kvm/svm.c
===================================================================
--- kernel.orig/drivers/kvm/svm.c	2007-04-26 10:35:03.019819608 -0500
+++ kernel/drivers/kvm/svm.c	2007-04-26 10:35:16.142824608 -0500
@@ -738,7 +738,7 @@
 	vcpu->svm->vmcb->save.gdtr.base = dt->base ;
 }
 
-static void svm_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu)
+static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
 {
 }
 
@@ -1759,7 +1759,7 @@
 	.get_segment = svm_get_segment,
 	.set_segment = svm_set_segment,
 	.get_cs_db_l_bits = svm_get_cs_db_l_bits,
-	.decache_cr0_cr4_guest_bits = svm_decache_cr0_cr4_guest_bits,
+	.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
 	.set_cr0 = svm_set_cr0,
 	.set_cr3 = svm_set_cr3,
 	.set_cr4 = svm_set_cr4,

[-- Attachment #3: Type: text/plain, Size: 286 bytes --]

-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

[-- Attachment #4: Type: text/plain, Size: 186 bytes --]

_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel

             reply	other threads:[~2007-04-26 15:49 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-04-26 15:49 Anthony Liguori [this message]
     [not found] ` <4630CA1E.2000808-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2007-04-26 16:03   ` [PATCH] Lazy FPU save/restore for VT Avi Kivity
     [not found]     ` <4630CD6F.8050706-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-04-26 16:10       ` Anthony Liguori

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4630CA1E.2000808@us.ibm.com \
    --to=aliguori-r/jw6+rmf7hqt0dzr+alfa@public.gmane.org \
    --cc=avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org \
    --cc=kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.