From: Marcelo Tosatti <mtosatti@redhat.com>
To: kvm@vger.kernel.org
Cc: avi@redhat.com, aarcange@redhat.com,
Marcelo Tosatti <mtosatti@redhat.com>
Subject: [patch 3/3] KVM: propagate fault r/w information to gup(), allow read-only mappings
Date: Tue, 19 Oct 2010 14:26:16 -0200 [thread overview]
Message-ID: <20101019162726.570196410@redhat.com> (raw)
In-Reply-To: 20101019162613.278902252@redhat.com
[-- Attachment #1: gfn-to-pfn-read-only --]
[-- Type: text/plain, Size: 10305 bytes --]
As suggested by Andrea, pass r/w error code to gup(), upgrading read fault
to writable if host pte allows it.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: kvm/arch/x86/kvm/mmu.c
===================================================================
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -2081,8 +2081,7 @@ static void mmu_set_spte(struct kvm_vcpu
* will find a read-only spte, even though the writable spte
* might be cached on a CPU's TLB.
*/
- } else if (is_writable_pte(*sptep) &&
- (!(pte_access & ACC_WRITE_MASK) || !dirty)) {
+ } else if (is_writable_pte(*sptep) && !dirty) {
drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte);
kvm_flush_remote_tlbs(vcpu->kvm);
} else
@@ -2222,7 +2221,7 @@ static void direct_pte_prefetch(struct k
}
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
- int level, gfn_t gfn, pfn_t pfn)
+ int map_writable, int level, gfn_t gfn, pfn_t pfn)
{
struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp;
@@ -2231,9 +2230,13 @@ static int __direct_map(struct kvm_vcpu
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
if (iterator.level == level) {
- mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
+ unsigned pte_access = ACC_ALL;
+
+ if (!map_writable)
+ pte_access &= ~ACC_WRITE_MASK;
+ mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access,
0, write, 1, &pt_write,
- level, gfn, pfn, false, true);
+ level, gfn, pfn, false, map_writable);
direct_pte_prefetch(vcpu, iterator.sptep);
++vcpu->stat.pf_fixed;
break;
@@ -2294,6 +2297,7 @@ static int nonpaging_map(struct kvm_vcpu
int level;
pfn_t pfn;
unsigned long mmu_seq;
+ bool map_writable;
level = mapping_level(vcpu, gfn);
@@ -2308,7 +2312,7 @@ static int nonpaging_map(struct kvm_vcpu
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
- pfn = gfn_to_pfn(vcpu->kvm, gfn);
+ pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, &map_writable);
/* mmio */
if (is_error_pfn(pfn))
@@ -2318,7 +2322,7 @@ static int nonpaging_map(struct kvm_vcpu
if (mmu_notifier_retry(vcpu, mmu_seq))
goto out_unlock;
kvm_mmu_free_some_pages(vcpu);
- r = __direct_map(vcpu, v, write, level, gfn, pfn);
+ r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn);
spin_unlock(&vcpu->kvm->mmu_lock);
@@ -2616,11 +2620,11 @@ static bool can_do_async_pf(struct kvm_v
}
static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn,
- gva_t gva, pfn_t *pfn)
+ gva_t gva, pfn_t *pfn, bool write, bool *writable)
{
bool async;
- *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async);
+ *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async, write, writable);
if (!async)
return false; /* *pfn has correct page already */
@@ -2637,7 +2641,7 @@ static bool try_async_pf(struct kvm_vcpu
return true;
}
- *pfn = gfn_to_pfn(vcpu->kvm, gfn);
+ *pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, writable);
return false;
}
@@ -2650,6 +2654,8 @@ static int tdp_page_fault(struct kvm_vcp
int level;
gfn_t gfn = gpa >> PAGE_SHIFT;
unsigned long mmu_seq;
+ int write = error_code & PFERR_WRITE_MASK;
+ bool map_writable;
ASSERT(vcpu);
ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -2665,7 +2671,7 @@ static int tdp_page_fault(struct kvm_vcp
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
- if (try_async_pf(vcpu, no_apf, gfn, gpa, &pfn))
+ if (try_async_pf(vcpu, no_apf, gfn, gpa, &pfn, write, &map_writable))
return 0;
/* mmio */
@@ -2675,7 +2681,7 @@ static int tdp_page_fault(struct kvm_vcp
if (mmu_notifier_retry(vcpu, mmu_seq))
goto out_unlock;
kvm_mmu_free_some_pages(vcpu);
- r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
+ r = __direct_map(vcpu, gpa, write, map_writable,
level, gfn, pfn);
spin_unlock(&vcpu->kvm->mmu_lock);
Index: kvm/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.orig/arch/x86/kvm/paging_tmpl.h
+++ kvm/arch/x86/kvm/paging_tmpl.h
@@ -427,7 +427,7 @@ static void FNAME(pte_prefetch)(struct k
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *gw,
int user_fault, int write_fault, int hlevel,
- int *ptwrite, pfn_t pfn)
+ int *ptwrite, pfn_t pfn, bool map_writable)
{
unsigned access = gw->pt_access;
struct kvm_mmu_page *sp = NULL;
@@ -501,7 +501,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu
mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access,
user_fault, write_fault, dirty, ptwrite, it.level,
- gw->gfn, pfn, false, true);
+ gw->gfn, pfn, false, map_writable);
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
return it.sptep;
@@ -539,6 +539,7 @@ static int FNAME(page_fault)(struct kvm_
pfn_t pfn;
int level = PT_PAGE_TABLE_LEVEL;
unsigned long mmu_seq;
+ bool map_writable;
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
@@ -569,13 +570,17 @@ static int FNAME(page_fault)(struct kvm_
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
- if (try_async_pf(vcpu, no_apf, walker.gfn, addr, &pfn))
+ if (try_async_pf(vcpu, no_apf, walker.gfn, addr, &pfn, write_fault,
+ &map_writable))
return 0;
/* mmio */
if (is_error_pfn(pfn))
return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn);
+ if (!map_writable)
+ walker.pte_access &= ~ACC_WRITE_MASK;
+
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu, mmu_seq))
goto out_unlock;
@@ -583,7 +588,7 @@ static int FNAME(page_fault)(struct kvm_
trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
kvm_mmu_free_some_pages(vcpu);
sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
- level, &write_pt, pfn);
+ level, &write_pt, pfn, map_writable);
(void)sptep;
pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
sptep, *sptep, write_pt);
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -334,8 +334,11 @@ void kvm_set_page_accessed(struct page *
pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
-pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async);
+pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
+ bool write_fault, bool *writable);
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
+pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
+ bool *writable);
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn);
int memslot_id(struct kvm *kvm, gfn_t gfn);
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -955,7 +955,7 @@ unsigned long gfn_to_hva(struct kvm *kvm
EXPORT_SYMBOL_GPL(gfn_to_hva);
static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
- bool *async)
+ bool *async, bool write_fault, bool *writable)
{
struct page *page[1];
int npages = 0;
@@ -964,19 +964,42 @@ static pfn_t hva_to_pfn(struct kvm *kvm,
/* we can do it either atomically or asynchronously, not both */
BUG_ON(atomic && async);
+ BUG_ON(!write_fault && !writable);
+
+ if (writable)
+ *writable = true;
+
if (atomic || async)
npages = __get_user_pages_fast(addr, 1, 1, page);
if (unlikely(npages != 1) && !atomic) {
might_sleep();
+ if (writable)
+ *writable = write_fault;
+
if (async) {
down_read(¤t->mm->mmap_sem);
npages = get_user_pages_noio(current, current->mm,
- addr, 1, 1, 0, page, NULL);
+ addr, 1, write_fault, 0,
+ page, NULL);
up_read(¤t->mm->mmap_sem);
} else
- npages = get_user_pages_fast(addr, 1, 1, page);
+ npages = get_user_pages_fast(addr, 1, write_fault,
+ page);
+
+ /* map read fault as writable if possible */
+ if (unlikely(!write_fault) && npages == 1) {
+ struct page *wpage[1];
+
+ npages = __get_user_pages_fast(addr, 1, 1, wpage);
+ if (npages == 1) {
+ *writable = true;
+ put_page(page[0]);
+ page[0] = wpage[0];
+ }
+ npages = 1;
+ }
}
if (unlikely(npages != 1)) {
@@ -1016,11 +1039,12 @@ return_fault_page:
pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
{
- return hva_to_pfn(kvm, addr, true, NULL);
+ return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
}
EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
-static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async)
+static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
+ bool write_fault, bool *writable)
{
unsigned long addr;
@@ -1033,32 +1057,40 @@ static pfn_t __gfn_to_pfn(struct kvm *kv
return page_to_pfn(bad_page);
}
- return hva_to_pfn(kvm, addr, atomic, async);
+ return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
}
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
{
- return __gfn_to_pfn(kvm, gfn, true, NULL);
+ return __gfn_to_pfn(kvm, gfn, true, NULL, true, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
-pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async)
+pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
+ bool write_fault, bool *writable)
{
- return __gfn_to_pfn(kvm, gfn, false, async);
+ return __gfn_to_pfn(kvm, gfn, false, async, write_fault, writable);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_async);
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
{
- return __gfn_to_pfn(kvm, gfn, false, NULL);
+ return __gfn_to_pfn(kvm, gfn, false, NULL, true, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn);
+pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
+ bool *writable)
+{
+ return __gfn_to_pfn(kvm, gfn, false, NULL, write_fault, writable);
+}
+EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
+
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn)
{
unsigned long addr = gfn_to_hva_memslot(slot, gfn);
- return hva_to_pfn(kvm, addr, false, NULL);
+ return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
}
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
next prev parent reply other threads:[~2010-10-19 16:29 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-10-19 16:26 [patch 0/3] [RFC] support read-only mappings Marcelo Tosatti
2010-10-19 16:26 ` [patch 1/3] KVM: VMX: remove setting of shadow_base_ptes for EPT Marcelo Tosatti
2010-10-20 10:24 ` Avi Kivity
2010-10-19 16:26 ` [patch 2/3] KVM: MMU: flush TLBs on writable -> read-only spte overwrite Marcelo Tosatti
2010-10-19 16:26 ` Marcelo Tosatti [this message]
2010-10-20 10:36 ` [patch 3/3] KVM: propagate fault r/w information to gup(), allow read-only mappings Avi Kivity
2010-10-22 16:18 ` [patch 0/4] support read-only mappings (v2) Marcelo Tosatti
2010-10-22 16:18 ` [patch 1/4] KVM: VMX: remove setting of shadow_base_ptes for EPT Marcelo Tosatti
2010-10-22 16:18 ` [patch 2/4] KVM: MMU: remove kvm_mmu_set_base_ptes Marcelo Tosatti
2010-10-22 16:18 ` [patch 3/4] KVM: MMU: flush TLBs on writable -> read-only spte overwrite Marcelo Tosatti
2010-10-22 16:18 ` [patch 4/4] KVM: propagate fault r/w information to gup(), allow read-only memory Marcelo Tosatti
2010-10-27 9:20 ` [patch 0/4] support read-only mappings (v2) Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20101019162726.570196410@redhat.com \
--to=mtosatti@redhat.com \
--cc=aarcange@redhat.com \
--cc=avi@redhat.com \
--cc=kvm@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.