* [RFC PATCH 2/2] kvm, x86: use ro page and don't copy shared page
@ 2010-06-09 7:53 Lai Jiangshan
2010-06-09 9:28 ` Avi Kivity
0 siblings, 1 reply; 4+ messages in thread
From: Lai Jiangshan @ 2010-06-09 7:53 UTC (permalink / raw)
To: Avi Kivity, Marcelo Tosatti, kvm
When page fault, we always call get_user_pages(write=1).
Actually, we don't need to do this when it is not write fault.
get_user_pages(write=1) will cause shared page(ksm) copied.
If this page is not modified in future, this copying and the copied page
are just wasted. Ksm may scan and merge them and may cause thrash.
This patch is not for inclusion, because I know nothing about mmio
and this patch includes a "workaround" which ensures mmio pfns
are always writable in tdp_page_fault().
The guest can't even boot up without this workaround.
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
mmu.c | 21 ++++++++++++++-------
paging_tmpl.h | 15 +++++++--------
2 files changed, 21 insertions(+), 15 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9b9b1c3..1e0826d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1902,7 +1902,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned pte_access, int user_fault,
int write_fault, int dirty, int level,
gfn_t gfn, pfn_t pfn, bool speculative,
- bool can_unsync, bool reset_host_protection)
+ bool can_unsync, bool host_writable)
{
u64 spte;
int ret = 0;
@@ -1929,8 +1929,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
kvm_is_mmio_pfn(pfn));
- if (reset_host_protection)
+ if (host_writable)
spte |= SPTE_HOST_WRITEABLE;
+ else
+ pte_access &= ~ACC_WRITE_MASK;
spte |= (u64)pfn << PAGE_SHIFT;
@@ -1983,7 +1985,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
int user_fault, int write_fault, int dirty,
int *ptwrite, int level, gfn_t gfn,
pfn_t pfn, bool speculative,
- bool reset_host_protection)
+ bool host_writable)
{
int was_rmapped = 0;
int was_writable = is_writable_pte(*sptep);
@@ -2018,7 +2020,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
dirty, level, gfn, pfn, speculative, true,
- reset_host_protection)) {
+ host_writable)) {
if (write_fault)
*ptwrite = 1;
kvm_x86_ops->tlb_flush(vcpu);
@@ -2066,7 +2068,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
if (iterator.level == level) {
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
0, write, 1, &pt_write,
- level, gfn, pfn, false, true);
+ level, gfn, pfn, false, write);
++vcpu->stat.pf_fixed;
break;
}
@@ -2135,7 +2137,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
- pfn = kvm_get_pfn_for_gfn(vcpu->kvm, gfn, 1);
+ pfn = kvm_get_pfn_for_gfn(vcpu->kvm, gfn, write);
/* mmio */
if (is_error_pfn(pfn))
@@ -2357,7 +2359,12 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
- pfn = kvm_get_pfn_for_gfn(vcpu->kvm, gfn, 1);
+ pfn = kvm_get_pfn_for_gfn(vcpu->kvm, gfn, error_code & PFERR_WRITE_MASK);
+ if (!(error_code & PFERR_WRITE_MASK) && kvm_is_mmio_pfn(pfn)) {
+ kvm_release_pfn_clean(pfn);
+ /* I don't know why we have to ensure mmio pfns are always writable. */
+ pfn = kvm_get_pfn_for_gfn(vcpu->kvm, gfn, 1);
+ }
if (is_error_pfn(pfn))
return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
spin_lock(&vcpu->kvm->mmu_lock);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index fddb726..e0cb33a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -283,7 +283,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
return;
kvm_get_pfn(pfn);
/*
- * we call mmu_set_spte() with reset_host_protection = true beacuse that
+ * we call mmu_set_spte() with host_writable = true beacuse that
* vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
*/
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
@@ -321,7 +321,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
user_fault, write_fault,
gw->ptes[gw->level-1] & PT_DIRTY_MASK,
ptwrite, level,
- gw->gfn, pfn, false, true);
+ gw->gfn, pfn, false, write_fault);
break;
}
@@ -430,7 +430,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
- pfn = kvm_get_pfn_for_gfn(vcpu->kvm, walker.gfn, 1);
+ pfn = kvm_get_pfn_for_gfn(vcpu->kvm, walker.gfn, write_fault);
/* mmio */
if (is_error_pfn(pfn))
@@ -579,7 +579,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
int i, offset, nr_present;
- bool reset_host_protection;
+ bool host_writable;
gpa_t first_pte_gpa;
offset = nr_present = 0;
@@ -623,15 +623,14 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
nr_present++;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) {
- pte_access &= ~ACC_WRITE_MASK;
- reset_host_protection = 0;
+ host_writable = false;
} else {
- reset_host_protection = 1;
+ host_writable = true;
}
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
spte_to_pfn(sp->spt[i]), true, false,
- reset_host_protection);
+ host_writable);
}
return !nr_present;
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [RFC PATCH 2/2] kvm, x86: use ro page and don't copy shared page
2010-06-09 7:53 [RFC PATCH 2/2] kvm, x86: use ro page and don't copy shared page Lai Jiangshan
@ 2010-06-09 9:28 ` Avi Kivity
2010-06-10 2:47 ` Lai Jiangshan
2010-06-10 11:19 ` Lai Jiangshan
0 siblings, 2 replies; 4+ messages in thread
From: Avi Kivity @ 2010-06-09 9:28 UTC (permalink / raw)
To: Lai Jiangshan; +Cc: Marcelo Tosatti, kvm
On 06/09/2010 10:53 AM, Lai Jiangshan wrote:
> When page fault, we always call get_user_pages(write=1).
>
> Actually, we don't need to do this when it is not write fault.
> get_user_pages(write=1) will cause shared page(ksm) copied.
> If this page is not modified in future, this copying and the copied page
> are just wasted. Ksm may scan and merge them and may cause thrash.
>
> This patch is not for inclusion, because I know nothing about mmio
> and this patch includes a "workaround" which ensures mmio pfns
> are always writable in tdp_page_fault().
> The guest can't even boot up without this workaround.
>
mmio pfns are used for device assignment. These are host pfns that
don't have a struct page, instead they belong to a device BAR.
I don't understand why you see a failure since they aren't even present
on guests without assigned devices.
> @@ -2357,7 +2359,12 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
>
> mmu_seq = vcpu->kvm->mmu_notifier_seq;
> smp_rmb();
> - pfn = kvm_get_pfn_for_gfn(vcpu->kvm, gfn, 1);
> + pfn = kvm_get_pfn_for_gfn(vcpu->kvm, gfn, error_code& PFERR_WRITE_MASK);
>
This is a pessimization, since now we may need two faults per page, one
to page it in read-only and another to establish write access.
May not be so bad for tdp, but will surely reduce performance on shadow.
The way I think it should be improved, is to extend
get_user_pages_fast() to also return the pte. So now, if we get a page
for read, but it happens to have a writeable/dirty pte, we can still
allow write access in the spte.
> + if (!(error_code& PFERR_WRITE_MASK)&& kvm_is_mmio_pfn(pfn)) {
> + kvm_release_pfn_clean(pfn);
> + /* I don't know why we have to ensure mmio pfns are always writable. */
> + pfn = kvm_get_pfn_for_gfn(vcpu->kvm, gfn, 1);
> + }
>
Wierd. For what gfn/pfns does this trigger?
In general this is a great optimization.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC PATCH 2/2] kvm, x86: use ro page and don't copy shared page
2010-06-09 9:28 ` Avi Kivity
@ 2010-06-10 2:47 ` Lai Jiangshan
2010-06-10 11:19 ` Lai Jiangshan
1 sibling, 0 replies; 4+ messages in thread
From: Lai Jiangshan @ 2010-06-10 2:47 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marcelo Tosatti, kvm
Avi Kivity wrote:
> Wierd. For what gfn/pfns does this trigger?
>
It is ZERO_PAGE which also has Reserved bit, not mmio page.
The pte is set write-protected, I still don't know why it cause the system
hangup.
set_spte() {
if (tdp_enabled)
spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
kvm_is_mmio_pfn(pfn));
/* guest can write to the page because of this statement? */
}
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC PATCH 2/2] kvm, x86: use ro page and don't copy shared page
2010-06-09 9:28 ` Avi Kivity
2010-06-10 2:47 ` Lai Jiangshan
@ 2010-06-10 11:19 ` Lai Jiangshan
1 sibling, 0 replies; 4+ messages in thread
From: Lai Jiangshan @ 2010-06-10 11:19 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marcelo Tosatti, kvm
Avi Kivity wrote:
I have found out what have happen, current code has 2 serious bugs!
I will sent patches to fix these bugs tomorrow(because my box is broken).
And I will sent this "use ro page and don't copy shared page"
pathset next week.
Thanks a lot.
Lai
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2010-06-10 11:20 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-09 7:53 [RFC PATCH 2/2] kvm, x86: use ro page and don't copy shared page Lai Jiangshan
2010-06-09 9:28 ` Avi Kivity
2010-06-10 2:47 ` Lai Jiangshan
2010-06-10 11:19 ` Lai Jiangshan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox