[patch 3/3] KVM: propagate fault r/w information to gup(), allow read-only mappings

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

From: Marcelo Tosatti <mtosatti@redhat.com>
To: kvm@vger.kernel.org
Cc: avi@redhat.com, aarcange@redhat.com,
	Marcelo Tosatti <mtosatti@redhat.com>
Subject: [patch 3/3] KVM: propagate fault r/w information to gup(), allow read-only mappings
Date: Tue, 19 Oct 2010 14:26:16 -0200	[thread overview]
Message-ID: <20101019162726.570196410@redhat.com> (raw)
In-Reply-To: 20101019162613.278902252@redhat.com

[-- Attachment #1: gfn-to-pfn-read-only --]
[-- Type: text/plain, Size: 10305 bytes --]

As suggested by Andrea, pass r/w error code to gup(), upgrading read fault
to writable if host pte allows it.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/x86/kvm/mmu.c
===================================================================
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -2081,8 +2081,7 @@ static void mmu_set_spte(struct kvm_vcpu
  		 * will find a read-only spte, even though the writable spte
  		 * might be cached on a CPU's TLB.
  		 */
-		} else if (is_writable_pte(*sptep) &&
-			  (!(pte_access & ACC_WRITE_MASK) || !dirty)) {
+		} else if (is_writable_pte(*sptep) && !dirty) {
 			drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte);
 			kvm_flush_remote_tlbs(vcpu->kvm);
 		} else
@@ -2222,7 +2221,7 @@ static void direct_pte_prefetch(struct k
 }
 
 static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
-			int level, gfn_t gfn, pfn_t pfn)
+			int map_writable, int level, gfn_t gfn, pfn_t pfn)
 {
 	struct kvm_shadow_walk_iterator iterator;
 	struct kvm_mmu_page *sp;
@@ -2231,9 +2230,13 @@ static int __direct_map(struct kvm_vcpu 
 
 	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
 		if (iterator.level == level) {
-			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
+			unsigned pte_access = ACC_ALL;
+
+			if (!map_writable)
+				pte_access &= ~ACC_WRITE_MASK;
+			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access,
 				     0, write, 1, &pt_write,
-				     level, gfn, pfn, false, true);
+				     level, gfn, pfn, false, map_writable);
 			direct_pte_prefetch(vcpu, iterator.sptep);
 			++vcpu->stat.pf_fixed;
 			break;
@@ -2294,6 +2297,7 @@ static int nonpaging_map(struct kvm_vcpu
 	int level;
 	pfn_t pfn;
 	unsigned long mmu_seq;
+	bool map_writable;
 
 	level = mapping_level(vcpu, gfn);
 
@@ -2308,7 +2312,7 @@ static int nonpaging_map(struct kvm_vcpu
 
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
-	pfn = gfn_to_pfn(vcpu->kvm, gfn);
+	pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, &map_writable);
 
 	/* mmio */
 	if (is_error_pfn(pfn))
@@ -2318,7 +2322,7 @@ static int nonpaging_map(struct kvm_vcpu
 	if (mmu_notifier_retry(vcpu, mmu_seq))
 		goto out_unlock;
 	kvm_mmu_free_some_pages(vcpu);
-	r = __direct_map(vcpu, v, write, level, gfn, pfn);
+	r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 
@@ -2616,11 +2620,11 @@ static bool can_do_async_pf(struct kvm_v
 }
 
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn,
-			 gva_t gva, pfn_t *pfn)
+			 gva_t gva, pfn_t *pfn, bool write, bool *writable)
 {
 	bool async;
 
-	*pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async);
+	*pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async, write, writable);
 
 	if (!async)
 		return false; /* *pfn has correct page already */
@@ -2637,7 +2641,7 @@ static bool try_async_pf(struct kvm_vcpu
 			return true;
 	}
 
-	*pfn = gfn_to_pfn(vcpu->kvm, gfn);
+	*pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, writable);
 	
 	return false;
 }
@@ -2650,6 +2654,8 @@ static int tdp_page_fault(struct kvm_vcp
 	int level;
 	gfn_t gfn = gpa >> PAGE_SHIFT;
 	unsigned long mmu_seq;
+	int write = error_code & PFERR_WRITE_MASK;
+	bool map_writable;
 
 	ASSERT(vcpu);
 	ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -2665,7 +2671,7 @@ static int tdp_page_fault(struct kvm_vcp
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
 
-	if (try_async_pf(vcpu, no_apf, gfn, gpa, &pfn))
+	if (try_async_pf(vcpu, no_apf, gfn, gpa, &pfn, write, &map_writable))
 		return 0;
 
 	/* mmio */
@@ -2675,7 +2681,7 @@ static int tdp_page_fault(struct kvm_vcp
 	if (mmu_notifier_retry(vcpu, mmu_seq))
 		goto out_unlock;
 	kvm_mmu_free_some_pages(vcpu);
-	r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
+	r = __direct_map(vcpu, gpa, write, map_writable,
 			 level, gfn, pfn);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
Index: kvm/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.orig/arch/x86/kvm/paging_tmpl.h
+++ kvm/arch/x86/kvm/paging_tmpl.h
@@ -427,7 +427,7 @@ static void FNAME(pte_prefetch)(struct k
 static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 			 struct guest_walker *gw,
 			 int user_fault, int write_fault, int hlevel,
-			 int *ptwrite, pfn_t pfn)
+			 int *ptwrite, pfn_t pfn, bool map_writable)
 {
 	unsigned access = gw->pt_access;
 	struct kvm_mmu_page *sp = NULL;
@@ -501,7 +501,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu
 
 	mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access,
 		     user_fault, write_fault, dirty, ptwrite, it.level,
-		     gw->gfn, pfn, false, true);
+		     gw->gfn, pfn, false, map_writable);
 	FNAME(pte_prefetch)(vcpu, gw, it.sptep);
 
 	return it.sptep;
@@ -539,6 +539,7 @@ static int FNAME(page_fault)(struct kvm_
 	pfn_t pfn;
 	int level = PT_PAGE_TABLE_LEVEL;
 	unsigned long mmu_seq;
+	bool map_writable;
 
 	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
 
@@ -569,13 +570,17 @@ static int FNAME(page_fault)(struct kvm_
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
 
-	if (try_async_pf(vcpu, no_apf, walker.gfn, addr, &pfn))
+	if (try_async_pf(vcpu, no_apf, walker.gfn, addr, &pfn, write_fault,
+			 &map_writable))
 		return 0;
 
 	/* mmio */
 	if (is_error_pfn(pfn))
 		return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn);
 
+	if (!map_writable)
+		walker.pte_access &= ~ACC_WRITE_MASK;
+
 	spin_lock(&vcpu->kvm->mmu_lock);
 	if (mmu_notifier_retry(vcpu, mmu_seq))
 		goto out_unlock;
@@ -583,7 +588,7 @@ static int FNAME(page_fault)(struct kvm_
 	trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
 	kvm_mmu_free_some_pages(vcpu);
 	sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
-			     level, &write_pt, pfn);
+			     level, &write_pt, pfn, map_writable);
 	(void)sptep;
 	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
 		 sptep, *sptep, write_pt);
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -334,8 +334,11 @@ void kvm_set_page_accessed(struct page *
 
 pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
-pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async);
+pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
+		       bool write_fault, bool *writable);
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
+pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
+		      bool *writable);
 pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
 			 struct kvm_memory_slot *slot, gfn_t gfn);
 int memslot_id(struct kvm *kvm, gfn_t gfn);
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -955,7 +955,7 @@ unsigned long gfn_to_hva(struct kvm *kvm
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
 static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
-			bool *async)
+			bool *async, bool write_fault, bool *writable)
 {
 	struct page *page[1];
 	int npages = 0;
@@ -964,19 +964,42 @@ static pfn_t hva_to_pfn(struct kvm *kvm,
 	/* we can do it either atomically or asynchronously, not both */
 	BUG_ON(atomic && async);
 
+	BUG_ON(!write_fault && !writable);
+
+	if (writable)
+		*writable = true;
+
 	if (atomic || async)
 		npages = __get_user_pages_fast(addr, 1, 1, page);
 
 	if (unlikely(npages != 1) && !atomic) {
 		might_sleep();
 
+		if (writable)
+			*writable = write_fault;
+
 		if (async) {
 			down_read(&current->mm->mmap_sem);
 			npages = get_user_pages_noio(current, current->mm,
-						     addr, 1, 1, 0, page, NULL);
+						     addr, 1, write_fault, 0,
+						     page, NULL);
 			up_read(&current->mm->mmap_sem);
 		} else
-			npages = get_user_pages_fast(addr, 1, 1, page);
+			npages = get_user_pages_fast(addr, 1, write_fault,
+						     page);
+
+		/* map read fault as writable if possible */
+		if (unlikely(!write_fault) && npages == 1) {
+			struct page *wpage[1];
+
+			npages = __get_user_pages_fast(addr, 1, 1, wpage);
+			if (npages == 1) {
+				*writable = true;
+				put_page(page[0]);
+				page[0] = wpage[0];
+			}
+			npages = 1;
+		}
 	}
 
 	if (unlikely(npages != 1)) {
@@ -1016,11 +1039,12 @@ return_fault_page:
 
 pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
 {
-	return hva_to_pfn(kvm, addr, true, NULL);
+	return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
 }
 EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
 
-static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async)
+static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
+			  bool write_fault, bool *writable)
 {
 	unsigned long addr;
 
@@ -1033,32 +1057,40 @@ static pfn_t __gfn_to_pfn(struct kvm *kv
 		return page_to_pfn(bad_page);
 	}
 
-	return hva_to_pfn(kvm, addr, atomic, async);
+	return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
 }
 
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
 {
-	return __gfn_to_pfn(kvm, gfn, true, NULL);
+	return __gfn_to_pfn(kvm, gfn, true, NULL, true, NULL);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
 
-pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async)
+pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
+		       bool write_fault, bool *writable)
 {
-	return __gfn_to_pfn(kvm, gfn, false, async);
+	return __gfn_to_pfn(kvm, gfn, false, async, write_fault, writable);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_async);
 
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
 {
-	return __gfn_to_pfn(kvm, gfn, false, NULL);
+	return __gfn_to_pfn(kvm, gfn, false, NULL, true, NULL);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn);
 
+pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
+		      bool *writable)
+{
+	return __gfn_to_pfn(kvm, gfn, false, NULL, write_fault, writable);
+}
+EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
+
 pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
 			 struct kvm_memory_slot *slot, gfn_t gfn)
 {
 	unsigned long addr = gfn_to_hva_memslot(slot, gfn);
-	return hva_to_pfn(kvm, addr, false, NULL);
+	return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
 }
 
 int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,

next prev parent reply	other threads:[~2010-10-19 16:29 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-19 16:26 [patch 0/3] [RFC] support read-only mappings Marcelo Tosatti
2010-10-19 16:26 ` [patch 1/3] KVM: VMX: remove setting of shadow_base_ptes for EPT Marcelo Tosatti
2010-10-20 10:24   ` Avi Kivity
2010-10-19 16:26 ` [patch 2/3] KVM: MMU: flush TLBs on writable -> read-only spte overwrite Marcelo Tosatti
2010-10-19 16:26 ` Marcelo Tosatti [this message]
2010-10-20 10:36   ` [patch 3/3] KVM: propagate fault r/w information to gup(), allow read-only mappings Avi Kivity
2010-10-22 16:18 ` [patch 0/4] support read-only mappings (v2) Marcelo Tosatti
2010-10-22 16:18   ` [patch 1/4] KVM: VMX: remove setting of shadow_base_ptes for EPT Marcelo Tosatti
2010-10-22 16:18   ` [patch 2/4] KVM: MMU: remove kvm_mmu_set_base_ptes Marcelo Tosatti
2010-10-22 16:18   ` [patch 3/4] KVM: MMU: flush TLBs on writable -> read-only spte overwrite Marcelo Tosatti
2010-10-22 16:18   ` [patch 4/4] KVM: propagate fault r/w information to gup(), allow read-only memory Marcelo Tosatti
2010-10-27  9:20   ` [patch 0/4] support read-only mappings (v2) Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20101019162726.570196410@redhat.com \
    --to=mtosatti@redhat.com \
    --cc=aarcange@redhat.com \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox