[patch] KVM: cr3 cache support

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

From: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
To: Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
Cc: kvm-devel <kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org>
Subject: [patch] KVM: cr3 cache support
Date: Tue, 9 Jan 2007 10:35:31 +0100	[thread overview]
Message-ID: <20070109093531.GA10976@elte.hu> (raw)

Subject: [patch] KVM: cr3 cache support
From: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>

this enables a KVM-aware Linux guest to make use of VMX CPU's cr3 cache 
feature. The result is cheaper context-switches and faster TLB flushes.

Signed-off-by: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
---
 drivers/kvm/kvm.h         |   17 ++-
 drivers/kvm/kvm_main.c    |    2 
 drivers/kvm/mmu.c         |  123 +++++++++++++++++--------
 drivers/kvm/paging_tmpl.h |    2 
 drivers/kvm/vmx.c         |  224 ++++++++++++++++++++++++++++++++++++++--------
 drivers/kvm/vmx.h         |    1 
 6 files changed, 283 insertions(+), 86 deletions(-)

Index: linux/drivers/kvm/kvm.h
===================================================================
--- linux.orig/drivers/kvm/kvm.h
+++ linux/drivers/kvm/kvm.h
@@ -52,8 +52,8 @@
 
 #define KVM_MAX_VCPUS 1
 #define KVM_MEMORY_SLOTS 4
-#define KVM_NUM_MMU_PAGES 256
-#define KVM_MIN_FREE_MMU_PAGES 5
+#define KVM_NUM_MMU_PAGES 1024
+#define KVM_MIN_FREE_MMU_PAGES 10
 #define KVM_REFILL_PAGES 25
 
 #define FX_IMAGE_SIZE 512
@@ -166,7 +166,7 @@ struct kvm_mmu {
 	int root_level;
 	int shadow_root_level;
 
-	u64 *pae_root;
+	u64 *pae_root[KVM_CR3_CACHE_SIZE];
 };
 
 #define KVM_NR_MEM_OBJS 20
@@ -240,6 +240,9 @@ struct kvm_vcpu {
 	unsigned long cr3;
 	struct kvm_vcpu_para_state *para_state;
 	hpa_t vm_syscall_hpa;
+	unsigned int cr3_cache_idx;
+	unsigned int cr3_cache_limit;
+	gpa_t guest_cr3_gpa[KVM_CR3_CACHE_SIZE];
 	unsigned long cr4;
 	unsigned long cr8;
 	u64 pdptrs[4]; /* pae */
@@ -400,6 +403,8 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu)
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot);
 
+void kvm_cr3_cache_clear(struct kvm_vcpu *vcpu);
+
 hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa);
 #define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
 #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
@@ -445,9 +450,9 @@ int emulator_set_dr(struct x86_emulate_c
 		    unsigned long value);
 
 void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
-void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0);
-void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0);
-void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0);
+void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
+void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
 void lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
 
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
Index: linux/drivers/kvm/kvm_main.c
===================================================================
--- linux.orig/drivers/kvm/kvm_main.c
+++ linux/drivers/kvm/kvm_main.c
@@ -447,7 +447,7 @@ EXPORT_SYMBOL_GPL(set_cr4);
 void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
 	if (is_long_mode(vcpu)) {
-		if ( cr3 & CR3_L_MODE_RESEVED_BITS) {
+		if (cr3 & CR3_L_MODE_RESEVED_BITS) {
 			printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
 			inject_gp(vcpu);
 			return;
Index: linux/drivers/kvm/mmu.c
===================================================================
--- linux.orig/drivers/kvm/mmu.c
+++ linux/drivers/kvm/mmu.c
@@ -782,7 +782,7 @@ static int nonpaging_map(struct kvm_vcpu
 
 static void mmu_free_roots(struct kvm_vcpu *vcpu)
 {
-	int i;
+	int i, j;
 	struct kvm_mmu_page *page;
 
 #ifdef CONFIG_X86_64
@@ -796,21 +796,40 @@ static void mmu_free_roots(struct kvm_vc
 		return;
 	}
 #endif
-	for (i = 0; i < 4; ++i) {
-		hpa_t root = vcpu->mmu.pae_root[i];
+	/*
+	 * Skip to the next cr3 filter entry and free it (if it's occupied):
+	 */
+	vcpu->cr3_cache_idx++;
+	if (unlikely(vcpu->cr3_cache_idx >= vcpu->cr3_cache_limit))
+		vcpu->cr3_cache_idx = 0;
 
-		ASSERT(VALID_PAGE(root));
-		root &= PT64_BASE_ADDR_MASK;
-		page = page_header(root);
-		--page->root_count;
-		vcpu->mmu.pae_root[i] = INVALID_PAGE;
+	j = vcpu->cr3_cache_idx;
+	/*
+	 * Clear the guest-visible entry:
+	 */
+	if (vcpu->para_state) {
+		vcpu->para_state->cr3_cache.entry[j].guest_cr3 = 0;
+		vcpu->para_state->cr3_cache.entry[j].host_cr3 = 0;
+	}
+	ASSERT(vcpu->mmu.pae_root[j]);
+	if (VALID_PAGE(vcpu->mmu.pae_root[j][0])) {
+		vcpu->guest_cr3_gpa[j] = INVALID_PAGE;
+		for (i = 0; i < 4; ++i) {
+			hpa_t root = vcpu->mmu.pae_root[j][i];
+
+			ASSERT(VALID_PAGE(root));
+			root &= PT64_BASE_ADDR_MASK;
+			page = page_header(root);
+			--page->root_count;
+			vcpu->mmu.pae_root[j][i] = INVALID_PAGE;
+		}
 	}
 	vcpu->mmu.root_hpa = INVALID_PAGE;
 }
 
 static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 {
-	int i;
+	int i, j;
 	gfn_t root_gfn;
 	struct kvm_mmu_page *page;
 
@@ -829,8 +848,10 @@ static void mmu_alloc_roots(struct kvm_v
 		return;
 	}
 #endif
+
+	j = vcpu->cr3_cache_idx;
 	for (i = 0; i < 4; ++i) {
-		hpa_t root = vcpu->mmu.pae_root[i];
+		hpa_t root = vcpu->mmu.pae_root[j][i];
 
 		ASSERT(!VALID_PAGE(root));
 		if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL)
@@ -842,9 +863,14 @@ static void mmu_alloc_roots(struct kvm_v
 					NULL);
 		root = page->page_hpa;
 		++page->root_count;
-		vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
+		vcpu->mmu.pae_root[j][i] = root | PT_PRESENT_MASK;
 	}
-	vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root);
+	vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root[j]);
+	/*
+	 * Store the guest-side address too, we need it if a guest
+	 * exits the VM, to rediscover what cr3 it changed to:
+	 */
+	vcpu->guest_cr3_gpa[j] = vcpu->cr3;
 }
 
 static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
@@ -877,7 +903,13 @@ static int nonpaging_page_fault(struct k
 
 static void nonpaging_free(struct kvm_vcpu *vcpu)
 {
-	mmu_free_roots(vcpu);
+	int j;
+
+	/*
+	 * This will cycle through all existing roots and free them:
+	 */
+	for (j = 0; j < KVM_CR3_CACHE_SIZE; j++)
+		mmu_free_roots(vcpu);
 }
 
 static int nonpaging_init_context(struct kvm_vcpu *vcpu)
@@ -896,20 +928,17 @@ static int nonpaging_init_context(struct
 	return 0;
 }
 
-static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
-{
-	++kvm_stat.tlb_flush;
-	kvm_arch_ops->tlb_flush(vcpu);
-}
-
 static void paging_new_cr3(struct kvm_vcpu *vcpu)
 {
 	pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
+
 	mmu_free_roots(vcpu);
 	if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
 		kvm_mmu_free_some_pages(vcpu);
 	mmu_alloc_roots(vcpu);
-	kvm_mmu_flush_tlb(vcpu);
+	/*
+	 * Setting the cr3 will flush the TLB:
+	 */
 	kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
 }
 
@@ -1194,6 +1223,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_free_some_page
 static void free_mmu_pages(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu_page *page;
+	int j;
 
 	while (!list_empty(&vcpu->kvm->active_mmu_pages)) {
 		page = container_of(vcpu->kvm->active_mmu_pages.next,
@@ -1207,13 +1237,17 @@ static void free_mmu_pages(struct kvm_vc
 		__free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT));
 		page->page_hpa = INVALID_PAGE;
 	}
-	free_page((unsigned long)vcpu->mmu.pae_root);
+	for (j = 0; j < KVM_CR3_CACHE_SIZE; j++) {
+		ASSERT(vcpu->mmu.pae_root[j]);
+		free_page((unsigned long)vcpu->mmu.pae_root[j]);
+		vcpu->mmu.pae_root[j] = NULL;
+	}
 }
 
 static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 {
 	struct page *page;
-	int i;
+	int i, j;
 
 	ASSERT(vcpu);
 
@@ -1230,17 +1264,22 @@ static int alloc_mmu_pages(struct kvm_vc
 		++vcpu->kvm->n_free_mmu_pages;
 	}
 
-	/*
-	 * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
-	 * Therefore we need to allocate shadow page tables in the first
-	 * 4GB of memory, which happens to fit the DMA32 zone.
-	 */
-	page = alloc_page(GFP_KERNEL | __GFP_DMA32);
-	if (!page)
-		goto error_1;
-	vcpu->mmu.pae_root = page_address(page);
-	for (i = 0; i < 4; ++i)
-		vcpu->mmu.pae_root[i] = INVALID_PAGE;
+	for (j = 0; j < KVM_CR3_CACHE_SIZE; j++) {
+		/*
+		 * When emulating 32-bit mode, cr3 is only 32 bits even on
+		 * x86_64. Therefore we need to allocate shadow page tables
+		 * in the first 4GB of memory, which happens to fit the DMA32
+		 * zone:
+		 */
+		page = alloc_page(GFP_KERNEL | __GFP_DMA32);
+		if (!page)
+			goto error_1;
+
+		ASSERT(!vcpu->mmu.pae_root[j]);
+		vcpu->mmu.pae_root[j] = page_address(page);
+		for (i = 0; i < 4; ++i)
+			vcpu->mmu.pae_root[j][i] = INVALID_PAGE;
+	}
 
 	return 0;
 
@@ -1344,15 +1383,19 @@ static void audit_mappings(struct kvm_vc
 {
 	int i;
 
-	if (vcpu->mmu.root_level == 4)
+	if (vcpu->mmu.root_level == 4) {
 		audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);
-	else
-		for (i = 0; i < 4; ++i)
-			if (vcpu->mmu.pae_root[i] & PT_PRESENT_MASK)
+		return;
+	}
+
+	for (j = 0; j < KVM_CR3_CACHE_SIZE; j++) {
+		for (i = 0; i < 4; ++i) {
+			if (vcpu->mmu.pae_root[j][i] & PT_PRESENT_MASK) {
 				audit_mappings_page(vcpu,
-						    vcpu->mmu.pae_root[i],
-						    i << 30,
-						    2);
+					vcpu->mmu.pae_root[j][i], i << 30, 2);
+			}
+		}
+	}
 }
 
 static int count_rmaps(struct kvm_vcpu *vcpu)
Index: linux/drivers/kvm/paging_tmpl.h
===================================================================
--- linux.orig/drivers/kvm/paging_tmpl.h
+++ linux/drivers/kvm/paging_tmpl.h
@@ -197,7 +197,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu
 	shadow_addr = vcpu->mmu.root_hpa;
 	level = vcpu->mmu.shadow_root_level;
 	if (level == PT32E_ROOT_LEVEL) {
-		shadow_addr = vcpu->mmu.pae_root[(addr >> 30) & 3];
+		shadow_addr = vcpu->mmu.pae_root[vcpu->cr3_cache_idx][(addr >> 30) & 3];
 		shadow_addr &= PT64_BASE_ADDR_MASK;
 		--level;
 	}
Index: linux/drivers/kvm/vmx.c
===================================================================
--- linux.orig/drivers/kvm/vmx.c
+++ linux/drivers/kvm/vmx.c
@@ -794,9 +794,54 @@ static void vmx_set_cr0_no_modeswitch(st
 	vcpu->cr0 = cr0;
 }
 
-static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+static void print_area_in_hex(void *area, int size)
 {
-	vmcs_writel(GUEST_CR3, cr3);
+	unsigned char *data = area;
+	int i;
+
+	for (i = 0; i < size; i++, data++) {
+		if (!(i & 15))
+			printk("\n%p:", data);
+		printk(" %02x", data[i]);
+	}
+	printk("\n");
+}
+
+/*
+ * Clear the guest side of the cr3 cache:
+ */
+void kvm_cr3_cache_clear(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cr3_cache *cache;
+
+	if (!vcpu->para_state)
+		return;
+	cache = &vcpu->para_state->cr3_cache;
+	memset(cache->entry, 0, sizeof(cache->entry));
+}
+
+static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3_hpa)
+{
+	struct kvm_cr3_cache *cache;
+	int idx;
+
+	vmcs_writel(GUEST_CR3, cr3_hpa);
+	if (!vcpu->para_state)
+		return;
+
+	WARN_ON(vmcs_readl(GUEST_CR3) != vcpu->mmu.root_hpa);
+
+	idx = vcpu->cr3_cache_idx;
+	cache = &vcpu->para_state->cr3_cache;
+
+	/* NOTE: remove this check, in case of hostile guests: */
+	WARN_ON(cache->entry[idx].guest_cr3);
+	WARN_ON(cache->entry[idx].host_cr3);
+
+	cache->entry[idx].guest_cr3 = vcpu->cr3;
+	cache->entry[idx].host_cr3 = cr3_hpa;
+
+	vmcs_writel(CR3_TARGET_VALUE0 + idx*2, cr3_hpa);
 }
 
 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -973,6 +1018,42 @@ static void seg_setup(int seg)
 }
 
 /*
+ * Set up the cr3 validity hardware cache:
+ */
+static int vmcs_setup_cr3_cache(struct kvm_vcpu *vcpu)
+{
+	unsigned int cr3_target_values, i;
+	u64 msr_val;
+
+	rdmsrl(MSR_IA32_VMX_MISC, msr_val);
+
+	printk("MSR_IA32_VMX_MISC: %016Lx\n", msr_val);
+
+	/*
+	 * 9 bits of "CR3 target values":
+	 */
+	cr3_target_values = (msr_val >> 16) & ((1 << 10) - 1);
+	printk(" cr3 target values: %d\n", cr3_target_values);
+	if (cr3_target_values > KVM_CR3_CACHE_SIZE) {
+		printk("KVM: limiting cr3 cache size from %d to %d\n",
+			cr3_target_values, KVM_CR3_CACHE_SIZE);
+		cr3_target_values = KVM_CR3_CACHE_SIZE;
+	}
+
+	vcpu->cr3_cache_idx = 0;
+	vcpu->cr3_cache_limit = cr3_target_values;
+	/*
+	 * Initialize. TODO: set this to guest physical memory.
+	 */
+	for (i = 0; i < cr3_target_values; i++)
+		vmcs_writel(CR3_TARGET_VALUE0 + i*2, -1UL);
+
+	vmcs_write32(CR3_TARGET_COUNT, cr3_target_values);
+
+	return 0;
+}
+
+/*
  * Sets up the vmcs for emulated real mode.
  */
 static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
@@ -1076,7 +1157,10 @@ static int vmx_vcpu_setup(struct kvm_vcp
 	vmcs_write32(EXCEPTION_BITMAP, 1 << PF_VECTOR);
 	vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
 	vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
-	vmcs_write32(CR3_TARGET_COUNT, 0);           /* 22.2.1 */
+
+	ret = vmcs_setup_cr3_cache(vcpu);
+	if (ret < 0)
+		goto out;
 
 	vmcs_writel(HOST_CR0, read_cr0());  /* 22.2.3 */
 	vmcs_writel(HOST_CR4, read_cr4());  /* 22.2.3, 22.2.5 */
@@ -1328,6 +1412,7 @@ static int handle_exception(struct kvm_v
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
 
 		spin_lock(&vcpu->kvm->lock);
+		kvm_cr3_cache_clear(vcpu);
 		r = kvm_mmu_page_fault(vcpu, cr2, error_code);
 		if (r < 0) {
 			spin_unlock(&vcpu->kvm->lock);
@@ -1499,6 +1584,7 @@ int vcpu_register_para(struct kvm_vcpu *
 		goto err_skip;
 	}
 
+	para_state->cr3_cache.max_idx = vcpu->cr3_cache_limit;
 	printk("KVM: para guest successfully registered.\n");
 	vcpu->para_state = para_state;
 	vcpu->vm_syscall_hpa = vm_syscall_hpa;
@@ -1694,6 +1780,13 @@ static int handle_halt(struct kvm_vcpu *
 	return 0;
 }
 
+static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	kvm_run->exit_reason = KVM_EXIT_DEBUG;
+//	printk("got vmcall at RIP %08lx\n", vmcs_readl(GUEST_RIP));
+	vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP)+3);
+	return 1;
+}
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -1711,6 +1804,7 @@ static int (*kvm_vmx_exit_handlers[])(st
 	[EXIT_REASON_MSR_WRITE]               = handle_wrmsr,
 	[EXIT_REASON_PENDING_INTERRUPT]       = handle_interrupt_window,
 	[EXIT_REASON_HLT]                     = handle_halt,
+	[EXIT_REASON_VMCALL]                  = handle_vmcall,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -1755,48 +1849,43 @@ static int dm_request_for_irq_injection(
 		(vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
 }
 
-static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void kvm_cr3_cache_sync(struct kvm_vcpu *vcpu)
 {
-	u8 fail;
-	u16 fs_sel, gs_sel, ldt_sel;
-	int fs_gs_ldt_reload_needed;
-	int r;
+	void *guest_cr3_hva;
+	hpa_t guest_cr3_hpa;
+	u64 *root;
+	int j;
+
+	if (!vcpu->para_state)
+		return;
+
+	guest_cr3_hpa = vmcs_readl(GUEST_CR3);
 
-again:
 	/*
-	 * Set host fs and gs selectors.  Unfortunately, 22.2.3 does not
-	 * allow segment selectors with cpl > 0 or ti == 1.
+	 * Are they in sync already?
 	 */
-	fs_sel = read_fs();
-	gs_sel = read_gs();
-	ldt_sel = read_ldt();
-	fs_gs_ldt_reload_needed = (fs_sel & 7) | (gs_sel & 7) | ldt_sel;
-	if (!fs_gs_ldt_reload_needed) {
-		vmcs_write16(HOST_FS_SELECTOR, fs_sel);
-		vmcs_write16(HOST_GS_SELECTOR, gs_sel);
-	} else {
-		vmcs_write16(HOST_FS_SELECTOR, 0);
-		vmcs_write16(HOST_GS_SELECTOR, 0);
-	}
-
-#ifdef CONFIG_X86_64
-	vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
-	vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
-#else
-	vmcs_writel(HOST_FS_BASE, segment_base(fs_sel));
-	vmcs_writel(HOST_GS_BASE, segment_base(gs_sel));
-#endif
+	if (guest_cr3_hpa == vcpu->mmu.root_hpa)
+		return;
 
-	do_interrupt_requests(vcpu, kvm_run);
+	guest_cr3_hva = __va(guest_cr3_hpa);
 
-	if (vcpu->guest_debug.enabled)
-		kvm_guest_debug_pre(vcpu);
+	for (j = 0; j < vcpu->cr3_cache_limit; j++) {
+		root = vcpu->mmu.pae_root[j];
+		WARN_ON(!root);
+		if (root != guest_cr3_hva)
+			continue;
 
-	fx_save(vcpu->host_fx_image);
-	fx_restore(vcpu->guest_fx_image);
+		vcpu->cr3 = vcpu->guest_cr3_gpa[j];
+		vcpu->cr3_cache_idx = j;
+		vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root[j]);
+		break;
+	}
+	WARN_ON(j == KVM_CR3_CACHE_SIZE);
+}
 
-	save_msrs(vcpu->host_msrs, vcpu->nmsrs);
-	load_msrs(vcpu->guest_msrs, NR_BAD_MSRS);
+static int __vmx_vcpu_run(struct kvm_vcpu *vcpu)
+{
+	u8 fail;
 
 	asm (
 		/* Store host registers */
@@ -1917,6 +2006,64 @@ again:
 		[cr2]"i"(offsetof(struct kvm_vcpu, cr2))
 	      : "cc", "memory" );
 
+	return fail;
+}
+
+static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u8 fail;
+	u16 fs_sel, gs_sel, ldt_sel;
+	int fs_gs_ldt_reload_needed;
+	int r;
+
+again:
+	/*
+	 * Set host fs and gs selectors.  Unfortunately, 22.2.3 does not
+	 * allow segment selectors with cpl > 0 or ti == 1.
+	 */
+	fs_sel = read_fs();
+	gs_sel = read_gs();
+	ldt_sel = read_ldt();
+	fs_gs_ldt_reload_needed = (fs_sel & 7) | (gs_sel & 7) | ldt_sel;
+	if (!fs_gs_ldt_reload_needed) {
+		vmcs_write16(HOST_FS_SELECTOR, fs_sel);
+		vmcs_write16(HOST_GS_SELECTOR, gs_sel);
+	} else {
+		vmcs_write16(HOST_FS_SELECTOR, 0);
+		vmcs_write16(HOST_GS_SELECTOR, 0);
+	}
+
+#ifdef CONFIG_X86_64
+	vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
+	vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
+#else
+	vmcs_writel(HOST_FS_BASE, segment_base(fs_sel));
+	vmcs_writel(HOST_GS_BASE, segment_base(gs_sel));
+#endif
+
+	do_interrupt_requests(vcpu, kvm_run);
+
+	if (vcpu->guest_debug.enabled)
+		kvm_guest_debug_pre(vcpu);
+
+	fx_save(vcpu->host_fx_image);
+	fx_restore(vcpu->guest_fx_image);
+
+	save_msrs(vcpu->host_msrs, vcpu->nmsrs);
+	load_msrs(vcpu->guest_msrs, NR_BAD_MSRS);
+
+	WARN_ON(vmcs_readl(GUEST_CR3) != vcpu->mmu.root_hpa);
+
+	fail = __vmx_vcpu_run(vcpu);
+
+	/*
+	 * Figure out whether vcpu->cr3 needs updating because
+	 * the guest makde use of the cr3 cache:
+	 */
+	kvm_cr3_cache_sync(vcpu);
+
+	WARN_ON(vmcs_readl(GUEST_CR3) != vcpu->mmu.root_hpa);
+
 	++kvm_stat.exits;
 
 	save_msrs(vcpu->guest_msrs, NR_BAD_MSRS);
@@ -1987,6 +2134,7 @@ again:
 static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
 {
 	vmcs_writel(GUEST_CR3, vmcs_readl(GUEST_CR3));
+	kvm_cr3_cache_clear(vcpu);
 }
 
 static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,
@@ -2016,7 +2164,7 @@ static void vmx_inject_page_fault(struct
 		     INTR_TYPE_EXCEPTION |
 		     INTR_INFO_DELIEVER_CODE_MASK |
 		     INTR_INFO_VALID_MASK);
-
+	kvm_cr3_cache_clear(vcpu);
 }
 
 static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
Index: linux/drivers/kvm/vmx.h
===================================================================
--- linux.orig/drivers/kvm/vmx.h
+++ linux/drivers/kvm/vmx.h
@@ -292,5 +292,6 @@ enum vmcs_field {
 #define MSR_IA32_VMX_PROCBASED_CTLS		0x482
 #define MSR_IA32_VMX_EXIT_CTLS		0x483
 #define MSR_IA32_VMX_ENTRY_CTLS		0x484
+#define MSR_IA32_VMX_MISC		0x485
 
 #endif

-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV

                 reply	other threads:[~2007-01-09  9:35 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070109093531.GA10976@elte.hu \
    --to=mingo-x9un+bfzkdi@public.gmane.org \
    --cc=avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org \
    --cc=kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox