[patch 2/4] KVM: MMU: allow pinning spte translations (TDP-only)

All of lore.kernel.org
 help / color / mirror / Atom feed

From: mtosatti@redhat.com
To: kvm@vger.kernel.org, ak@linux.intel.com
Cc: pbonzini@redhat.com, xiaoguangrong@linux.vnet.ibm.com,
	gleb@kernel.org, avi.kivity@gmail.com
Subject: [patch 2/4] KVM: MMU: allow pinning spte translations (TDP-only)
Date: Wed, 09 Jul 2014 16:12:52 -0300	[thread overview]
Message-ID: <20140709191611.207208253@amt.cnet> (raw)
In-Reply-To: 20140709191250.408928362@amt.cnet

[-- Attachment #1: mmu-pinned-spte --]
[-- Type: text/plain, Size: 21829 bytes --]

Allow vcpus to pin spte translations by:

1) Creating a per-vcpu list of pinned ranges.
2) On mmu reload request:
	- Fault ranges.
	- Mark sptes with a pinned bit.
	- Mark shadow pages as pinned.

3) Then modify the following actions:
	- Page age => skip spte flush.
	- MMU notifiers => force mmu reload request (which kicks cpu out of
				guest mode).
	- GET_DIRTY_LOG => force mmu reload request.
	- SLAB shrinker => skip shadow page deletion.

TDP-only.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---
 arch/x86/include/asm/kvm_host.h |   13 +
 arch/x86/kvm/mmu.c              |  294 +++++++++++++++++++++++++++++++++++++---
 arch/x86/kvm/mmu.h              |    7 
 arch/x86/kvm/mmutrace.h         |   23 +++
 arch/x86/kvm/paging_tmpl.h      |    4 
 arch/x86/kvm/x86.c              |    8 -
 include/linux/kvm_host.h        |    3 
 include/uapi/linux/kvm.h        |    2 
 virt/kvm/kvm_main.c             |   18 +-
 9 files changed, 340 insertions(+), 32 deletions(-)

Index: kvm.pinned-sptes/arch/x86/include/asm/kvm_host.h
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/include/asm/kvm_host.h	2014-07-09 12:05:34.836161266 -0300
+++ kvm.pinned-sptes/arch/x86/include/asm/kvm_host.h	2014-07-09 12:08:45.341762782 -0300
@@ -97,6 +97,8 @@
 #define KVM_NR_FIXED_MTRR_REGION 88
 #define KVM_NR_VAR_MTRR 8
 
+#define KVM_MAX_PER_VCPU_PINNED_RANGE 10
+
 #define ASYNC_PF_PER_VCPU 64
 
 struct kvm_vcpu;
@@ -221,6 +223,8 @@
 	/* hold the gfn of each spte inside spt */
 	gfn_t *gfns;
 	bool unsync;
+	bool pinned;
+
 	int root_count;          /* Currently serving as active root */
 	unsigned int unsync_children;
 	unsigned long parent_ptes;	/* Reverse mapping for parent_pte */
@@ -337,6 +341,12 @@
 	KVM_DEBUGREG_WONT_EXIT = 2,
 };
 
+struct kvm_pinned_page_range {
+	gfn_t base_gfn;
+	unsigned long npages;
+	struct list_head link;
+};
+
 struct kvm_vcpu_arch {
 	/*
 	 * rip and regs accesses must go through
@@ -392,6 +402,9 @@
 	struct kvm_mmu_memory_cache mmu_page_cache;
 	struct kvm_mmu_memory_cache mmu_page_header_cache;
 
+	struct list_head pinned_mmu_pages;
+	atomic_t nr_pinned_ranges;
+
 	struct fpu guest_fpu;
 	u64 xcr0;
 	u64 guest_supported_xcr0;
Index: kvm.pinned-sptes/arch/x86/kvm/mmu.c
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.c	2014-07-09 12:05:34.837161264 -0300
+++ kvm.pinned-sptes/arch/x86/kvm/mmu.c	2014-07-09 12:09:21.856684314 -0300
@@ -148,6 +148,9 @@
 
 #define SPTE_HOST_WRITEABLE	(1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
 #define SPTE_MMU_WRITEABLE	(1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
+#define SPTE_PINNED		(1ULL << (PT64_SECOND_AVAIL_BITS_SHIFT))
+
+#define SPTE_PINNED_BIT PT64_SECOND_AVAIL_BITS_SHIFT
 
 #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
 
@@ -327,6 +330,11 @@
 	return pte & PT_PRESENT_MASK && !is_mmio_spte(pte);
 }
 
+static int is_pinned_spte(u64 spte)
+{
+	return spte & SPTE_PINNED && is_shadow_present_pte(spte);
+}
+
 static int is_large_pte(u64 pte)
 {
 	return pte & PT_PAGE_SIZE_MASK;
@@ -1176,6 +1184,16 @@
 		kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
+static bool vcpu_has_pinned(struct kvm_vcpu *vcpu)
+{
+	return atomic_read(&vcpu->arch.nr_pinned_ranges);
+}
+
+static void mmu_reload_pinned_vcpus(struct kvm *kvm)
+{
+	make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD, &vcpu_has_pinned);
+}
+
 /*
  * Write-protect on the specified @sptep, @pt_protect indicates whether
  * spte write-protection is caused by protecting shadow page table.
@@ -1268,7 +1286,8 @@
 }
 
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			   struct kvm_memory_slot *slot, unsigned long data)
+			   struct kvm_memory_slot *slot, unsigned long data,
+			   bool age)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1278,6 +1297,14 @@
 		BUG_ON(!(*sptep & PT_PRESENT_MASK));
 		rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep);
 
+		if (is_pinned_spte(*sptep)) {
+			/* don't nuke pinned sptes if page aging: return
+ 			 * young=yes instead.
+ 			 */
+			if (age)
+				return 1;
+			mmu_reload_pinned_vcpus(kvm);
+		}
 		drop_spte(kvm, sptep);
 		need_tlb_flush = 1;
 	}
@@ -1286,7 +1313,8 @@
 }
 
 static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			     struct kvm_memory_slot *slot, unsigned long data)
+			     struct kvm_memory_slot *slot, unsigned long data,
+			     bool age)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1304,6 +1332,9 @@
 
 		need_flush = 1;
 
+		if (is_pinned_spte(*sptep))
+			mmu_reload_pinned_vcpus(kvm);
+
 		if (pte_write(*ptep)) {
 			drop_spte(kvm, sptep);
 			sptep = rmap_get_first(*rmapp, &iter);
@@ -1334,7 +1365,8 @@
 				int (*handler)(struct kvm *kvm,
 					       unsigned long *rmapp,
 					       struct kvm_memory_slot *slot,
-					       unsigned long data))
+					       unsigned long data,
+					       bool age))
 {
 	int j;
 	int ret = 0;
@@ -1374,7 +1406,7 @@
 			rmapp = __gfn_to_rmap(gfn_start, j, memslot);
 
 			for (; idx <= idx_end; ++idx)
-				ret |= handler(kvm, rmapp++, memslot, data);
+				ret |= handler(kvm, rmapp++, memslot, data, false);
 		}
 	}
 
@@ -1385,7 +1417,8 @@
 			  unsigned long data,
 			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
 					 struct kvm_memory_slot *slot,
-					 unsigned long data))
+					 unsigned long data,
+					 bool age))
 {
 	return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
 }
@@ -1406,7 +1439,8 @@
 }
 
 static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			 struct kvm_memory_slot *slot, unsigned long data)
+			 struct kvm_memory_slot *slot, unsigned long data,
+			 bool age)
 {
 	u64 *sptep;
 	struct rmap_iterator uninitialized_var(iter);
@@ -1421,7 +1455,7 @@
 	 * out actively used pages or breaking up actively used hugepages.
 	 */
 	if (!shadow_accessed_mask) {
-		young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
+		young = kvm_unmap_rmapp(kvm, rmapp, slot, data, true);
 		goto out;
 	}
 
@@ -1442,7 +1476,8 @@
 }
 
 static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			      struct kvm_memory_slot *slot, unsigned long data)
+			      struct kvm_memory_slot *slot, unsigned long data,
+			      bool age)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1480,7 +1515,7 @@
 
 	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
 
-	kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
+	kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0, false);
 	kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
@@ -2753,7 +2788,8 @@
 }
 
 static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
-				pfn_t pfn, unsigned access, int *ret_val)
+				pfn_t pfn, unsigned access, int *ret_val,
+				bool pin)
 {
 	bool ret = true;
 
@@ -2763,8 +2799,14 @@
 		goto exit;
 	}
 
-	if (unlikely(is_noslot_pfn(pfn)))
+	if (unlikely(is_noslot_pfn(pfn))) {
+		/* pinned sptes must point to RAM */
+		if (unlikely(pin)) {
+			*ret_val = -EFAULT;
+			goto exit;
+		}
 		vcpu_cache_mmio_info(vcpu, gva, gfn, access);
+	}
 
 	ret = false;
 exit:
@@ -2818,7 +2860,7 @@
  * - false: let the real page fault path to fix it.
  */
 static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
-			    u32 error_code)
+			    u32 error_code, bool pin)
 {
 	struct kvm_shadow_walk_iterator iterator;
 	struct kvm_mmu_page *sp;
@@ -2828,6 +2870,9 @@
 	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
 		return false;
 
+	if (pin)
+		return false;
+
 	if (!page_fault_can_be_fast(error_code))
 		return false;
 
@@ -2895,9 +2940,71 @@
 }
 
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
-			 gva_t gva, pfn_t *pfn, bool write, bool *writable);
+			 gva_t gva, pfn_t *pfn, bool write, bool *writable,
+			 bool pin);
 static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
 
+
+static int get_sptep_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes[4])
+
+{
+	struct kvm_shadow_walk_iterator iterator;
+	int nr_sptes = 0;
+
+	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+		return nr_sptes;
+
+	for_each_shadow_entry(vcpu, addr, iterator) {
+		sptes[iterator.level-1] = iterator.sptep;
+		nr_sptes++;
+		if (!is_shadow_present_pte(*iterator.sptep))
+			break;
+	}
+
+	return nr_sptes;
+}
+
+static bool __direct_pin_sptes(struct kvm_vcpu *vcpu, gfn_t gfn, bool pin)
+{
+	u64 *sptes[4];
+	int r, i, level;
+
+	r = get_sptep_hierarchy(vcpu, gfn << PAGE_SHIFT, sptes);
+	if (!r)
+		return false;
+
+	level = 5 - r;
+	if (!is_last_spte(*sptes[level-1], level))
+		return false;
+	if (!is_shadow_present_pte(*sptes[level-1]))
+		return false;
+
+	for (i = 0; i < r; i++) {
+		u64 *sptep = sptes[3-i];
+		struct kvm_mmu_page *sp = page_header(__pa(sptep));
+
+		if (pin) {
+			sp->pinned = true;
+			set_bit(SPTE_PINNED_BIT, (unsigned long *)sptep);
+		} else {
+			sp->pinned = false;
+			clear_bit(SPTE_PINNED_BIT, (unsigned long *)sptep);
+		}
+	}
+
+	return true;
+}
+
+static bool direct_pin_sptes(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+	return __direct_pin_sptes(vcpu, gfn, true);
+}
+
+static bool direct_unpin_sptes(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+	return __direct_pin_sptes(vcpu, gfn, false);
+}
+
 static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 			 gfn_t gfn, bool prefault, bool pin, bool *pinned)
 {
@@ -2923,16 +3030,17 @@
 	} else
 		level = PT_PAGE_TABLE_LEVEL;
 
-	if (fast_page_fault(vcpu, v, level, error_code))
+	if (fast_page_fault(vcpu, v, level, error_code, pin))
 		return 0;
 
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
 
-	if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable))
+	if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable,
+			 pin))
 		return 0;
 
-	if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
+	if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r, pin))
 		return r;
 
 	spin_lock(&vcpu->kvm->mmu_lock);
@@ -2943,6 +3051,8 @@
 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
 	r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn,
 			 prefault);
+	if (pin)
+		*pinned = direct_pin_sptes(vcpu, gfn);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 
@@ -3131,7 +3241,7 @@
 
 			lm_root = (void*)get_zeroed_page(GFP_KERNEL);
 			if (lm_root == NULL)
-				return 1;
+				return -ENOMEM;
 
 			lm_root[0] = __pa(vcpu->arch.mmu.pae_root) | pm_mask;
 
@@ -3349,7 +3459,8 @@
 }
 
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
-			 gva_t gva, pfn_t *pfn, bool write, bool *writable)
+			 gva_t gva, pfn_t *pfn, bool write, bool *writable,
+			 bool pin)
 {
 	bool async;
 
@@ -3358,7 +3469,7 @@
 	if (!async)
 		return false; /* *pfn has correct page already */
 
-	if (!prefault && can_do_async_pf(vcpu)) {
+	if (!prefault && !pin && can_do_async_pf(vcpu)) {
 		trace_kvm_try_async_get_page(gva, gfn);
 		if (kvm_find_async_pf_gfn(vcpu, gfn)) {
 			trace_kvm_async_pf_doublefault(gva, gfn);
@@ -3406,16 +3517,17 @@
 	} else
 		level = PT_PAGE_TABLE_LEVEL;
 
-	if (fast_page_fault(vcpu, gpa, level, error_code))
+	if (fast_page_fault(vcpu, gpa, level, error_code, pin))
 		return 0;
 
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
 
-	if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
+	if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable,
+			 pin))
 		return 0;
 
-	if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
+	if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r, pin))
 		return r;
 
 	spin_lock(&vcpu->kvm->mmu_lock);
@@ -3426,6 +3538,8 @@
 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
 	r = __direct_map(vcpu, gpa, write, map_writable,
 			 level, gfn, pfn, prefault);
+	if (pin)
+		*pinned = direct_pin_sptes(vcpu, gfn);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 	return r;
@@ -3903,6 +4017,141 @@
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
 
+int kvm_mmu_register_pinned_range(struct kvm_vcpu *vcpu,
+				  gfn_t base_gfn, unsigned long npages)
+{
+	struct kvm_pinned_page_range *p;
+
+	if (!tdp_enabled) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	list_for_each_entry(p, &vcpu->arch.pinned_mmu_pages, link) {
+		if (p->base_gfn == base_gfn && p->npages == npages) {
+			return -EEXIST;
+		}
+	}
+
+	if (atomic_read(&vcpu->arch.nr_pinned_ranges) >=
+	    KVM_MAX_PER_VCPU_PINNED_RANGE)
+		return -ENOSPC;
+
+	p = kzalloc(sizeof(struct kvm_pinned_page_range), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	atomic_inc(&vcpu->arch.nr_pinned_ranges);
+
+	trace_kvm_mmu_register_pinned_range(vcpu->vcpu_id, base_gfn, npages);
+
+	INIT_LIST_HEAD(&p->link);
+	p->base_gfn = base_gfn;
+	p->npages = npages;
+	list_add(&p->link, &vcpu->arch.pinned_mmu_pages);
+	kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+
+	return 0;
+}
+
+
+void unregister_pinned_sptes(struct kvm_vcpu *vcpu, unsigned long base_gfn,
+			     unsigned long npages)
+{
+	gfn_t gfn;
+
+	for (gfn = base_gfn; gfn < base_gfn+npages; gfn++)
+		direct_unpin_sptes(vcpu, gfn);
+
+}
+
+int kvm_mmu_unregister_pinned_range(struct kvm_vcpu *vcpu,
+				    gfn_t base_gfn, unsigned long npages)
+{
+	struct kvm_pinned_page_range *p;
+
+	list_for_each_entry(p, &vcpu->arch.pinned_mmu_pages, link) {
+		if (p->base_gfn == base_gfn && p->npages == npages) {
+			list_del(&p->link);
+			atomic_dec(&vcpu->arch.nr_pinned_ranges);
+			spin_lock(&vcpu->kvm->mmu_lock);
+			mmu_reload_pinned_vcpus(vcpu->kvm);
+			unregister_pinned_sptes(vcpu, base_gfn, npages);
+			spin_unlock(&vcpu->kvm->mmu_lock);
+			kfree(p);
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+void kvm_mmu_free_pinned_ranges(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pinned_page_range *p, *p2;
+
+	list_for_each_entry_safe(p, p2, &vcpu->arch.pinned_mmu_pages, link) {
+		list_del(&p->link);
+		kfree(p);
+	}
+}
+
+/*
+ * Pin KVM MMU page translations. This guarantees, for valid
+ * addresses registered by kvm_mmu_register_pinned_range (valid address
+ * meaning address which posses sufficient information for fault to
+ * be resolved), valid translations exist while in guest mode and
+ * therefore no VM-exits due to faults will occur.
+ *
+ * Failure to instantiate pages will abort guest entry.
+ *
+ * Pinning is not guaranteed while executing as L2 guest.
+ *
+ */
+
+static int kvm_mmu_pin_pages(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pinned_page_range *p;
+	int r = 1;
+
+	if (is_guest_mode(vcpu))
+		return r;
+
+	if (!vcpu->arch.mmu.direct_map)
+		return r;
+
+	ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
+
+	list_for_each_entry(p, &vcpu->arch.pinned_mmu_pages, link) {
+		gfn_t gfn_offset;
+
+		for (gfn_offset = 0; gfn_offset < p->npages; gfn_offset++) {
+			gfn_t gfn = p->base_gfn + gfn_offset;
+			int r;
+			bool pinned = false;
+
+			r = vcpu->arch.mmu.page_fault(vcpu, gfn << PAGE_SHIFT,
+						     PFERR_WRITE_MASK, false,
+						     true, &pinned);
+			/* MMU notifier sequence window: retry */
+			if (!r && !pinned)
+				kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+			if (r) {
+				vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+				vcpu->run->internal.suberror =
+					KVM_INTERNAL_ERROR_PIN_FAILURE;
+				vcpu->run->internal.ndata = 1;
+				vcpu->run->internal.data[0] = gfn;
+				r = 0;
+				goto out;
+			}
+
+		}
+	}
+out:
+	return r;
+}
+
 int kvm_mmu_load(struct kvm_vcpu *vcpu)
 {
 	int r;
@@ -3916,6 +4165,7 @@
 		goto out;
 	/* set_cr3() should ensure TLB has been flushed */
 	vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
+	r = kvm_mmu_pin_pages(vcpu);
 out:
 	return r;
 }
Index: kvm.pinned-sptes/arch/x86/kvm/mmu.h
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.h	2014-07-09 12:05:30.018171068 -0300
+++ kvm.pinned-sptes/arch/x86/kvm/mmu.h	2014-07-09 12:08:45.343762778 -0300
@@ -94,7 +94,7 @@
 static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
 {
 	if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE))
-		return 0;
+		return 1;
 
 	return kvm_mmu_load(vcpu);
 }
@@ -178,4 +178,9 @@
 }
 
 void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
+int kvm_mmu_register_pinned_range(struct kvm_vcpu *vcpu,
+				 gfn_t base_gfn, unsigned long npages);
+int kvm_mmu_unregister_pinned_range(struct kvm_vcpu *vcpu,
+				 gfn_t base_gfn, unsigned long npages);
+void kvm_mmu_free_pinned_ranges(struct kvm_vcpu *vcpu);
 #endif
Index: kvm.pinned-sptes/arch/x86/kvm/x86.c
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/x86.c	2014-07-09 12:05:34.838161262 -0300
+++ kvm.pinned-sptes/arch/x86/kvm/x86.c	2014-07-09 12:08:45.346762771 -0300
@@ -6017,7 +6017,7 @@
 	}
 
 	r = kvm_mmu_reload(vcpu);
-	if (unlikely(r)) {
+	if (unlikely(r <= 0)) {
 		goto cancel_injection;
 	}
 
@@ -7049,6 +7049,8 @@
 
 	kvm_async_pf_hash_reset(vcpu);
 	kvm_pmu_init(vcpu);
+	INIT_LIST_HEAD(&vcpu->arch.pinned_mmu_pages);
+	atomic_set(&vcpu->arch.nr_pinned_ranges, 0);
 
 	return 0;
 fail_free_wbinvd_dirty_mask:
@@ -7069,6 +7071,7 @@
 {
 	int idx;
 
+	kvm_mmu_free_pinned_ranges(vcpu);
 	kvm_pmu_destroy(vcpu);
 	kfree(vcpu->arch.mce_banks);
 	kvm_free_lapic(vcpu);
@@ -7113,6 +7116,7 @@
 	int r;
 	r = vcpu_load(vcpu);
 	BUG_ON(r);
+	kvm_mmu_free_pinned_ranges(vcpu);
 	kvm_mmu_unload(vcpu);
 	vcpu_put(vcpu);
 }
@@ -7408,7 +7412,7 @@
 		return;
 
 	r = kvm_mmu_reload(vcpu);
-	if (unlikely(r))
+	if (unlikely(r <= 0))
 		return;
 
 	if (!vcpu->arch.mmu.direct_map &&
Index: kvm.pinned-sptes/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/paging_tmpl.h	2014-07-09 12:05:34.837161264 -0300
+++ kvm.pinned-sptes/arch/x86/kvm/paging_tmpl.h	2014-07-09 12:08:45.346762771 -0300
@@ -747,11 +747,11 @@
 	smp_rmb();
 
 	if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, write_fault,
-			 &map_writable))
+			 &map_writable, false))
 		return 0;
 
 	if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr,
-				walker.gfn, pfn, walker.pte_access, &r))
+				walker.gfn, pfn, walker.pte_access, &r, false))
 		return r;
 
 	/*
Index: kvm.pinned-sptes/arch/x86/kvm/mmutrace.h
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/mmutrace.h	2014-07-09 12:05:30.018171068 -0300
+++ kvm.pinned-sptes/arch/x86/kvm/mmutrace.h	2014-07-09 12:08:45.347762769 -0300
@@ -322,6 +322,29 @@
 		  __entry->kvm_gen == __entry->spte_gen
 	)
 );
+
+TRACE_EVENT(
+	kvm_mmu_register_pinned_range,
+	TP_PROTO(unsigned int vcpu_id, gfn_t gfn, unsigned long npages),
+	TP_ARGS(vcpu_id, gfn, npages),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	vcpu_id	)
+		__field(	gfn_t,		gfn	)
+		__field(	unsigned long,	npages	)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id		= vcpu_id;
+		__entry->gfn			= gfn;
+		__entry->npages			= npages;
+	),
+
+	TP_printk("vcpu_id %u gfn %llx npages %lx",
+		  __entry->vcpu_id,
+		  __entry->gfn,
+		  __entry->npages)
+);
 #endif /* _TRACE_KVMMMU_H */
 
 #undef TRACE_INCLUDE_PATH
Index: kvm.pinned-sptes/include/uapi/linux/kvm.h
===================================================================
--- kvm.pinned-sptes.orig/include/uapi/linux/kvm.h	2014-07-09 12:05:30.019171066 -0300
+++ kvm.pinned-sptes/include/uapi/linux/kvm.h	2014-07-09 12:08:45.347762769 -0300
@@ -180,6 +180,8 @@
 #define KVM_INTERNAL_ERROR_SIMUL_EX	2
 /* Encounter unexpected vm-exit due to delivery event. */
 #define KVM_INTERNAL_ERROR_DELIVERY_EV	3
+/* Failure to pin address translation. */
+#define KVM_INTERNAL_ERROR_PIN_FAILURE	4
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
Index: kvm.pinned-sptes/include/linux/kvm_host.h
===================================================================
--- kvm.pinned-sptes.orig/include/linux/kvm_host.h	2014-07-09 12:05:30.019171066 -0300
+++ kvm.pinned-sptes/include/linux/kvm_host.h	2014-07-09 12:08:45.348762767 -0300
@@ -591,6 +591,9 @@
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 
+bool make_all_cpus_request(struct kvm *kvm, unsigned int req,
+			   bool (*vcpukick)(struct kvm_vcpu *));
+
 void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
 void kvm_make_mclock_inprogress_request(struct kvm *kvm);
Index: kvm.pinned-sptes/virt/kvm/kvm_main.c
===================================================================
--- kvm.pinned-sptes.orig/virt/kvm/kvm_main.c	2014-07-09 12:05:30.019171066 -0300
+++ kvm.pinned-sptes/virt/kvm/kvm_main.c	2014-07-09 12:08:45.349762765 -0300
@@ -152,7 +152,8 @@
 {
 }
 
-static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
+bool make_all_cpus_request(struct kvm *kvm, unsigned int req,
+			   bool (*vcpukick)(struct kvm_vcpu *))
 {
 	int i, cpu, me;
 	cpumask_var_t cpus;
@@ -163,6 +164,8 @@
 
 	me = get_cpu();
 	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (vcpukick && !vcpukick(vcpu))
+			continue;
 		kvm_make_request(req, vcpu);
 		cpu = vcpu->cpu;
 
@@ -189,7 +192,7 @@
 	long dirty_count = kvm->tlbs_dirty;
 
 	smp_mb();
-	if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+	if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH, NULL))
 		++kvm->stat.remote_tlb_flush;
 	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
@@ -197,17 +200,22 @@
 
 void kvm_reload_remote_mmus(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+	make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD, NULL);
+}
+
+void kvm_reload_pinned_remote_mmus(struct kvm *kvm)
+{
+	make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD, NULL);
 }
 
 void kvm_make_mclock_inprogress_request(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
+	make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS, NULL);
 }
 
 void kvm_make_scan_ioapic_request(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
+	make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC, NULL);
 }
 
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)

next prev parent reply	other threads:[~2014-07-09 19:17 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-09 19:12 [patch 0/4] KVM: support for pinning sptes (v2) mtosatti
2014-07-09 19:12 ` [patch 1/4] KVM: x86: add pinned parameter to page_fault methods mtosatti
2014-07-09 19:12 ` mtosatti [this message]
2014-07-17 17:18   ` [patch 2/4] KVM: MMU: allow pinning spte translations (TDP-only) Nadav Amit
2014-07-17 21:38     ` Marcelo Tosatti
2014-07-24 12:16       ` Nadav Amit
2014-07-21 21:46   ` Xiao Guangrong
2014-07-22  5:26     ` Xiao Guangrong
2014-07-09 19:12 ` [patch 3/4] KVM: MMU: reload request from GET_DIRTY_LOG path mtosatti
2014-07-21 13:14   ` Gleb Natapov
2014-09-09 15:28     ` Marcelo Tosatti
2014-09-22 17:19       ` Marcelo Tosatti
2014-09-30 18:59         ` Marcelo Tosatti
2014-10-04  7:23       ` Gleb Natapov
2014-10-06 17:19         ` Marcelo Tosatti
2014-10-08  6:56           ` Gleb Natapov
2014-10-08 17:15             ` Marcelo Tosatti
2014-10-08 17:59               ` Gleb Natapov
2014-10-08 19:22                 ` Marcelo Tosatti
2014-10-10 13:09                   ` Gleb Natapov
2014-10-13  8:52                     ` Marcelo Tosatti
2014-10-15  8:03                       ` Gleb Natapov
2014-07-21 21:55   ` Xiao Guangrong
2014-09-09 15:35     ` Marcelo Tosatti
2014-07-09 19:12 ` [patch 4/4] KVM: MMU: pinned sps are not candidates for deletion mtosatti
2014-07-21 21:59   ` Xiao Guangrong
2014-09-09 15:41     ` Marcelo Tosatti
2014-09-22 17:17       ` Marcelo Tosatti
2014-09-23  5:30       ` Xiao Guangrong
2014-07-09 19:20 ` [patch 0/4] KVM: support for pinning sptes (v2) Marcelo Tosatti

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140709191611.207208253@amt.cnet \
    --to=mtosatti@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=avi.kivity@gmail.com \
    --cc=gleb@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=xiaoguangrong@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.