From: Marcelo Tosatti <marcelo-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org>
To: Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
Cc: kvm-devel <kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org>
Subject: Re: [PATCH] Fix SMP shadow instantiation race
Date: Tue, 11 Dec 2007 19:12:27 -0500 [thread overview]
Message-ID: <20071212001227.GA27190@dmt> (raw)
In-Reply-To: <475D726A.2040901-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
On Mon, Dec 10, 2007 at 07:07:54PM +0200, Avi Kivity wrote:
> Marcelo Tosatti wrote:
> >There is a race where VCPU0 is shadowing a pagetable entry while VCPU1
> >is updating it, which results in a stale shadow copy.
> >
> >Fix that by comparing the contents of the cached guest pte with the
> >current guest pte after write-protecting the guest pagetable.
> >
> >Attached program kvm_shadow_race.c demonstrates the problem.
> >
> >
>
> Where is it?
>
> >Signed-off-by: Marcelo Tosatti <mtosatti-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
> >
> >
> >diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
> >index 72d4816..4fece01 100644
> >--- a/drivers/kvm/paging_tmpl.h
> >+++ b/drivers/kvm/paging_tmpl.h
> >@@ -66,6 +66,7 @@ struct guest_walker {
> > int level;
> > gfn_t table_gfn[PT_MAX_FULL_LEVELS];
> > pt_element_t pte;
> >+ gpa_t pte_gpa;
> >
>
> I think this needs to be an array like table_gfn[]. The guest may play
> with the pde (and upper entries) as well as the pte.
Alright, then it also needs to record the cached pte's for upper
levels.
> >
> >+ kvm_read_guest(vcpu->kvm, walker->pte_gpa, &curr_pte,
> >sizeof(curr_pte));
> >+
> >+ if (curr_pte != walker->pte)
> >+ return 0;
> >+
> >
>
> 'return NULL'
>
> It would also be preferable to read the pte only if we shadowed the page
> just now. Perhaps pass the pte and the index to kvm_mmu_get_page()
> which would use them as a guard when the page is being shadowed:
>
> if (lookup page succeeds)
> return it
> shadow page
> write protect it
> if (guard check succeeds)
> return it
> else
> return NULL
>
> or perhaps have kvm_mmu_get_page() return an additional bool signifying
> it is a new page. but this is ugly.
Given that it would be necessary to determine the size of pt_element_t inside
kvm_mmu_get_page(), I prefer the latter:
-------------
There is a race where VCPU0 is shadowing a pagetable entry while VCPU1
is updating it, which results in a stale shadow copy.
Fix that by comparing the contents of the cached guest pte with the
current guest pte after write-protecting the guest pagetable.
Signed-off-by: Marcelo Tosatti <mtosatti-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index ba71e8d..92ac0d1 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -681,7 +681,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
unsigned level,
int metaphysical,
unsigned access,
- u64 *parent_pte)
+ u64 *parent_pte,
+ bool *new_page)
{
union kvm_mmu_page_role role;
unsigned index;
@@ -720,6 +721,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
vcpu->mmu.prefetch_page(vcpu, sp);
if (!metaphysical)
rmap_write_protect(vcpu->kvm, gfn);
+ if (new_page)
+ *new_page = 1;
return sp;
}
@@ -993,7 +996,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
>> PAGE_SHIFT;
new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
v, level - 1,
- 1, ACC_ALL, &table[index]);
+ 1, ACC_ALL, &table[index],
+ NULL);
if (!new_table) {
pgprintk("nonpaging_map: ENOMEM\n");
return -ENOMEM;
@@ -1059,7 +1063,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
- PT64_ROOT_LEVEL, 0, ACC_ALL, NULL);
+ PT64_ROOT_LEVEL, 0, ACC_ALL, NULL, NULL);
root = __pa(sp->spt);
++sp->root_count;
vcpu->mmu.root_hpa = root;
@@ -1080,7 +1084,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
root_gfn = 0;
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
PT32_ROOT_LEVEL, !is_paging(vcpu),
- ACC_ALL, NULL);
+ ACC_ALL, NULL, NULL);
root = __pa(sp->spt);
++sp->root_count;
vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 72d4816..c3ddcf2 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -65,7 +65,8 @@
struct guest_walker {
int level;
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
- pt_element_t pte;
+ pt_element_t ptes[PT_MAX_FULL_LEVELS];
+ gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
unsigned pt_access;
unsigned pte_access;
gfn_t gfn;
@@ -150,6 +151,7 @@ walk:
pte_gpa = gfn_to_gpa(table_gfn);
pte_gpa += index * sizeof(pt_element_t);
walker->table_gfn[walker->level - 1] = table_gfn;
+ walker->pte_gpa[walker->level - 1] = pte_gpa;
pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
walker->level - 1, table_gfn);
@@ -180,6 +182,8 @@ walk:
pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
+ walker->ptes[walker->level - 1] = pte;
+
if (walker->level == PT_PAGE_TABLE_LEVEL) {
walker->gfn = gpte_to_gfn(pte);
break;
@@ -209,9 +213,9 @@ walk:
goto walk;
pte |= PT_DIRTY_MASK;
kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
+ walker->ptes[walker->level - 1] = pte;
}
- walker->pte = pte;
walker->pt_access = pt_access;
walker->pte_access = pte_access;
pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
@@ -268,7 +272,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
u64 *shadow_ent;
unsigned access = walker->pt_access;
- if (!is_present_pte(walker->pte))
+ if (!is_present_pte(walker->ptes[walker->level - 1]))
return NULL;
shadow_addr = vcpu->mmu.root_hpa;
@@ -285,6 +289,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
u64 shadow_pte;
int metaphysical;
gfn_t table_gfn;
+ bool new_page = 0;
shadow_ent = ((u64 *)__va(shadow_addr)) + index;
if (is_shadow_present_pte(*shadow_ent)) {
@@ -300,16 +305,23 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
if (level - 1 == PT_PAGE_TABLE_LEVEL
&& walker->level == PT_DIRECTORY_LEVEL) {
metaphysical = 1;
- if (!is_dirty_pte(walker->pte))
+ if (!is_dirty_pte(walker->ptes[level - 1]))
access &= ~ACC_WRITE_MASK;
- table_gfn = gpte_to_gfn(walker->pte);
+ table_gfn = gpte_to_gfn(walker->ptes[level - 1]);
} else {
metaphysical = 0;
table_gfn = walker->table_gfn[level - 2];
}
shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
metaphysical, access,
- shadow_ent);
+ shadow_ent, &new_page);
+ if (new_page && !metaphysical) {
+ pt_element_t curr_pte;
+ kvm_read_guest(vcpu->kvm, walker->pte_gpa[level - 2],
+ &curr_pte, sizeof(curr_pte));
+ if (curr_pte != walker->ptes[level - 2])
+ return NULL;
+ }
shadow_addr = __pa(shadow_page->spt);
shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
| PT_WRITABLE_MASK | PT_USER_MASK;
@@ -317,7 +329,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
}
mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
- user_fault, write_fault, walker->pte & PT_DIRTY_MASK,
+ user_fault, write_fault,
+ walker->ptes[walker->level-1] & PT_DIRTY_MASK,
ptwrite, walker->gfn);
return shadow_ent;
@@ -382,7 +395,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
/*
* mmio: emulate if accessible, otherwise its a guest fault.
*/
- if (is_io_pte(*shadow_pte))
+ if (shadow_pte && is_io_pte(*shadow_pte))
return 1;
++vcpu->stat.pf_fixed;
-------------------------------------------------------------------------
SF.Net email is sponsored by:
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://sourceforge.net/services/buy/index.php
next prev parent reply other threads:[~2007-12-12 0:12 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-12-10 16:19 [PATCH] Fix SMP shadow instantiation race Marcelo Tosatti
2007-12-10 17:07 ` Avi Kivity
[not found] ` <475D726A.2040901-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-12-10 19:12 ` Marcelo Tosatti
2007-12-10 21:27 ` Avi Kivity
[not found] ` <475DAF51.8060804-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-12-10 22:22 ` Marcelo Tosatti
2007-12-12 0:12 ` Marcelo Tosatti [this message]
2007-12-13 8:37 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20071212001227.GA27190@dmt \
--to=marcelo-bw31mazkks3ytjvyw6ydsg@public.gmane.org \
--cc=avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org \
--cc=kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox