* KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction
@ 2008-07-04 1:06 Marcelo Tosatti
2008-07-05 17:25 ` Avi Kivity
0 siblings, 1 reply; 16+ messages in thread
From: Marcelo Tosatti @ 2008-07-04 1:06 UTC (permalink / raw)
To: Avi Kivity, Richard W.M. Jones; +Cc: kvm-devel
During RH6.2 graphical installation the following oops is triggered:
BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
IP: [<ffffffffa00bf172>] :kvm:gfn_to_rmap+0x3e/0x61
Pid: 4559, comm: qemu-system-x86 Not tainted
RIP: 0010:[<ffffffffa00bf172>] [<ffffffffa00bf172>] :kvm:gfn_to_rmap+0x3e/0x61
Process qemu-system-x86 (pid: 4559, threadinfo ffff810220920000, task
ffff81022e82b2f0)
Stack: 0000000000217a4b ffff81021a48e9d8 ffff81021a48fa80
ffffffffa00bf240
ffff810220ab82f0 ffff81021a48e9d8 ffff81021a48fa80 000000000000013b
ffff81022288c000 ffffffffa00bf3b7 000000000003aed3 ffff81022288c000
Call Trace:
[<ffffffffa00bf240>] ? :kvm:rmap_remove+0xab/0x19d
[<ffffffffa00bf3b7>] ? :kvm:kvm_mmu_zap_page+0x85/0x26e
[<ffffffffa00bf8a5>] ? :kvm:kvm_mmu_zap_all+0x2b/0x46
[<ffffffffa00bb37c>] ? :kvm:kvm_arch_vm_ioctl+0x262/0x575
[<ffffffffa00b6fb8>] ? :kvm:kvm_read_guest+0x3f/0x7d
[<ffffffffa00c19c9>] ? :kvm:paging32_walk_addr+0xac/0x262
[<ffffffffa00b6d7c>] ? :kvm:gfn_to_hva+0x9/0x5d
[<ffffffffa00b6f44>] ? :kvm:kvm_read_guest_page+0x11/0x46
The problem is that KVM allows shadow pagetable entries that
point to a removed memslot to exist. In this case the cirrus vram
mapping was removed, and the NULL dereference happened during
kvm_set_memory_alias()'s zap_all_pages().
Since a malicious guest could have a thread's root table inside the
to-be-removed memslot, also guarantee there this is not the case before
removal.
This behaviour will also be useful for memory hotplugging.
As a side note, RH 6.2 graphical installation still won't function (due
to cirrus emulation bugs).
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -4040,6 +4040,12 @@ int kvm_arch_set_memory_region(struct kv
return 0;
}
+int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot)
+{
+ kvm_mmu_zap_all(kvm);
+ return kvm_mmu_slot_has_shadowed_page(kvm, slot);
+}
+
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -168,6 +168,7 @@ int kvm_arch_set_memory_region(struct kv
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
int user_alloc);
+int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot);
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -405,6 +405,12 @@ int __kvm_set_memory_region(struct kvm *
if (mem->slot >= kvm->nmemslots)
kvm->nmemslots = mem->slot + 1;
+ if (!npages) {
+ r = kvm_arch_destroy_memory_region(kvm, mem->slot);
+ if (r)
+ goto out_free;
+ }
+
*memslot = new;
r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
Index: kvm/arch/ia64/kvm/kvm-ia64.c
===================================================================
--- kvm.orig/arch/ia64/kvm/kvm-ia64.c
+++ kvm/arch/ia64/kvm/kvm-ia64.c
@@ -1455,6 +1455,10 @@ int kvm_arch_set_memory_region(struct kv
return 0;
}
+int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot)
+{
+ return 0;
+}
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
Index: kvm/arch/powerpc/kvm/powerpc.c
===================================================================
--- kvm.orig/arch/powerpc/kvm/powerpc.c
+++ kvm/arch/powerpc/kvm/powerpc.c
@@ -170,6 +170,11 @@ int kvm_arch_set_memory_region(struct kv
return 0;
}
+int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot)
+{
+ return 0;
+}
+
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvm_vcpu *vcpu;
Index: kvm/arch/s390/kvm/kvm-s390.c
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -675,6 +675,11 @@ int kvm_arch_set_memory_region(struct kv
return 0;
}
+int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot)
+{
+ return 0;
+}
+
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
{
return gfn;
Index: kvm/arch/x86/kvm/mmu.c
===================================================================
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -1955,6 +1955,22 @@ void kvm_mmu_slot_remove_write_access(st
}
}
+int kvm_mmu_slot_has_shadowed_page(struct kvm *kvm, int slot)
+{
+ struct kvm_mmu_page *sp;
+ int ret = 0;
+
+ spin_lock(&kvm->mmu_lock);
+ list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) {
+ if (test_bit(slot, &sp->slot_bitmap)) {
+ ret = -EINVAL;
+ break;
+ }
+ }
+ spin_unlock(&kvm->mmu_lock);
+ return ret;
+}
+
void kvm_mmu_zap_all(struct kvm *kvm)
{
struct kvm_mmu_page *sp, *node;
Index: kvm/include/asm-x86/kvm_host.h
===================================================================
--- kvm.orig/include/asm-x86/kvm_host.h
+++ kvm/include/asm-x86/kvm_host.h
@@ -457,6 +457,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
+int kvm_mmu_slot_has_shadowed_page(struct kvm *kvm, int slot);
void kvm_mmu_zap_all(struct kvm *kvm);
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction 2008-07-04 1:06 KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction Marcelo Tosatti @ 2008-07-05 17:25 ` Avi Kivity 2008-07-05 19:23 ` Marcelo Tosatti 0 siblings, 1 reply; 16+ messages in thread From: Avi Kivity @ 2008-07-05 17:25 UTC (permalink / raw) To: Marcelo Tosatti; +Cc: Richard W.M. Jones, kvm-devel Marcelo Tosatti wrote: > During RH6.2 graphical installation the following oops is triggered: > > BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 > IP: [<ffffffffa00bf172>] :kvm:gfn_to_rmap+0x3e/0x61 > Pid: 4559, comm: qemu-system-x86 Not tainted > RIP: 0010:[<ffffffffa00bf172>] [<ffffffffa00bf172>] :kvm:gfn_to_rmap+0x3e/0x61 > > @@ -1955,6 +1955,22 @@ void kvm_mmu_slot_remove_write_access(st > } > } > > +int kvm_mmu_slot_has_shadowed_page(struct kvm *kvm, int slot) > +{ > + struct kvm_mmu_page *sp; > + int ret = 0; > + > + spin_lock(&kvm->mmu_lock); > + list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { > + if (test_bit(slot, &sp->slot_bitmap)) { > + ret = -EINVAL; > + break; > + } > + } > + spin_unlock(&kvm->mmu_lock); > + return ret; > +} > + > I don't like the guest influencing host actions in this way. It's just a guest. But I think it's unneeded. kvm_mmu_zap_page() will mark a root shadow page invalid and force all vcpus to reload it, so all that's needed is to keep the mmu spinlock held while removing the slot. -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction 2008-07-05 17:25 ` Avi Kivity @ 2008-07-05 19:23 ` Marcelo Tosatti 2008-07-05 21:15 ` Avi Kivity 0 siblings, 1 reply; 16+ messages in thread From: Marcelo Tosatti @ 2008-07-05 19:23 UTC (permalink / raw) To: Avi Kivity; +Cc: Richard W.M. Jones, kvm-devel On Sat, Jul 05, 2008 at 08:25:30PM +0300, Avi Kivity wrote: >> @@ -1955,6 +1955,22 @@ void kvm_mmu_slot_remove_write_access(st >> } >> } >> +int kvm_mmu_slot_has_shadowed_page(struct kvm *kvm, int slot) >> +{ >> + struct kvm_mmu_page *sp; >> + int ret = 0; >> + >> + spin_lock(&kvm->mmu_lock); >> + list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { >> + if (test_bit(slot, &sp->slot_bitmap)) { >> + ret = -EINVAL; >> + break; >> + } >> + } >> + spin_unlock(&kvm->mmu_lock); >> + return ret; >> +} >> + >> > > I don't like the guest influencing host actions in this way. It's just > a guest. > > But I think it's unneeded. kvm_mmu_zap_page() will mark a root shadow > page invalid and force all vcpus to reload it, so all that's needed is > to keep the mmu spinlock held while removing the slot. You're still keeping a shadowed page around with sp->gfn pointing to non-existant memslot. The code generally makes the assumption that gfn_to_memslot(gfn) on shadowed info will not fail. kvm_mmu_zap_page -> unaccount_shadowed, for example. The other option is to harden gfn_to_memslot() callers to handle failure, is that saner? ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction 2008-07-05 19:23 ` Marcelo Tosatti @ 2008-07-05 21:15 ` Avi Kivity 2008-07-07 17:31 ` Marcelo Tosatti 0 siblings, 1 reply; 16+ messages in thread From: Avi Kivity @ 2008-07-05 21:15 UTC (permalink / raw) To: Marcelo Tosatti; +Cc: Richard W.M. Jones, kvm-devel Marcelo Tosatti wrote: > On Sat, Jul 05, 2008 at 08:25:30PM +0300, Avi Kivity wrote: > >>> @@ -1955,6 +1955,22 @@ void kvm_mmu_slot_remove_write_access(st >>> } >>> } >>> +int kvm_mmu_slot_has_shadowed_page(struct kvm *kvm, int slot) >>> +{ >>> + struct kvm_mmu_page *sp; >>> + int ret = 0; >>> + >>> + spin_lock(&kvm->mmu_lock); >>> + list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { >>> + if (test_bit(slot, &sp->slot_bitmap)) { >>> + ret = -EINVAL; >>> + break; >>> + } >>> + } >>> + spin_unlock(&kvm->mmu_lock); >>> + return ret; >>> +} >>> + >>> >>> >> I don't like the guest influencing host actions in this way. It's just >> a guest. >> >> But I think it's unneeded. kvm_mmu_zap_page() will mark a root shadow >> page invalid and force all vcpus to reload it, so all that's needed is >> to keep the mmu spinlock held while removing the slot. >> > > You're still keeping a shadowed page around with sp->gfn pointing to > non-existant memslot. The code generally makes the assumption that > gfn_to_memslot(gfn) on shadowed info will not fail. > > kvm_mmu_zap_page -> unaccount_shadowed, for example. > > The page has already been zapped, so we might as well unaccount_shadowed() on the first run. It needs to be moved until after the reload_remote_mmus() call, though. > The other option is to harden gfn_to_memslot() callers to handle > failure, is that saner? > I don't think so. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction 2008-07-05 21:15 ` Avi Kivity @ 2008-07-07 17:31 ` Marcelo Tosatti 2008-07-07 19:58 ` Marcelo Tosatti 0 siblings, 1 reply; 16+ messages in thread From: Marcelo Tosatti @ 2008-07-07 17:31 UTC (permalink / raw) To: Avi Kivity; +Cc: Richard W.M. Jones, kvm-devel On Sun, Jul 06, 2008 at 12:15:56AM +0300, Avi Kivity wrote: > Marcelo Tosatti wrote: >> On Sat, Jul 05, 2008 at 08:25:30PM +0300, Avi Kivity wrote: >> >>>> @@ -1955,6 +1955,22 @@ void kvm_mmu_slot_remove_write_access(st >>>> } >>>> } >>>> +int kvm_mmu_slot_has_shadowed_page(struct kvm *kvm, int slot) >>>> +{ >>>> + struct kvm_mmu_page *sp; >>>> + int ret = 0; >>>> + >>>> + spin_lock(&kvm->mmu_lock); >>>> + list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { >>>> + if (test_bit(slot, &sp->slot_bitmap)) { >>>> + ret = -EINVAL; >>>> + break; >>>> + } >>>> + } >>>> + spin_unlock(&kvm->mmu_lock); >>>> + return ret; >>>> +} >>>> + >>>> >>> I don't like the guest influencing host actions in this way. It's >>> just a guest. >>> >>> But I think it's unneeded. kvm_mmu_zap_page() will mark a root >>> shadow page invalid and force all vcpus to reload it, so all that's >>> needed is to keep the mmu spinlock held while removing the slot. >>> >> >> You're still keeping a shadowed page around with sp->gfn pointing to >> non-existant memslot. The code generally makes the assumption that >> gfn_to_memslot(gfn) on shadowed info will not fail. >> >> kvm_mmu_zap_page -> unaccount_shadowed, for example. >> >> > > The page has already been zapped, so we might as well > unaccount_shadowed() on the first run. It needs to be moved until after > the reload_remote_mmus() call, though. During RH6.2 graphical installation the following oops is triggered: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 IP: [<ffffffffa00bf172>] :kvm:gfn_to_rmap+0x3e/0x61 Pid: 4559, comm: qemu-system-x86 Not tainted The problem is that KVM allows shadow pagetable entries that point to a removed memslot to exist. In this case the cirrus vram mapping was removed, and the NULL dereference happened during kvm_set_memory_alias()'s zap_all_pages(). So nuke all shadowed pages before memslot removal. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index a4cf4a2..76259da 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1455,6 +1455,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot) +{ + return 0; +} long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b850d24..07d69cf 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -170,6 +170,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot) +{ + return 0; +} + struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 399acf3..201a7e1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -675,6 +675,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot) +{ + return 0; +} + gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) { return gfn; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1fd8e3b..1617b68 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -930,7 +930,7 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) } kvm_mmu_page_unlink_children(kvm, sp); if (!sp->root_count) { - if (!sp->role.metaphysical) + if (!sp->role.metaphysical && !sp->role.invalid) unaccount_shadowed(kvm, sp->gfn); hlist_del(&sp->hash_link); kvm_mmu_free_page(kvm, sp); @@ -938,6 +938,8 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) list_move(&sp->link, &kvm->arch.active_mmu_pages); sp->role.invalid = 1; kvm_reload_remote_mmus(kvm); + if (!sp->role.metaphysical) + unaccount_shadowed(kvm, sp->gfn); } kvm_mmu_reset_last_pte_updated(kvm); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5a83c3b..8815f1d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4040,6 +4040,12 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot) +{ + kvm_mmu_zap_all(kvm); + return 0; +} + int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d220b49..0fc7ddc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -168,6 +168,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, int user_alloc); +int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot); gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b90da0b..5ef3a5e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -405,6 +405,12 @@ int __kvm_set_memory_region(struct kvm *kvm, if (mem->slot >= kvm->nmemslots) kvm->nmemslots = mem->slot + 1; + if (!npages) { + r = kvm_arch_destroy_memory_region(kvm, mem->slot); + if (r) + goto out_free; + } + *memslot = new; r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); ^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction 2008-07-07 17:31 ` Marcelo Tosatti @ 2008-07-07 19:58 ` Marcelo Tosatti 2008-07-10 14:42 ` Avi Kivity 0 siblings, 1 reply; 16+ messages in thread From: Marcelo Tosatti @ 2008-07-07 19:58 UTC (permalink / raw) To: Avi Kivity; +Cc: Richard W.M. Jones, kvm-devel On Mon, Jul 07, 2008 at 02:31:55PM -0300, Marcelo Tosatti wrote: > On Sun, Jul 06, 2008 at 12:15:56AM +0300, Avi Kivity wrote: > > Marcelo Tosatti wrote: > >> On Sat, Jul 05, 2008 at 08:25:30PM +0300, Avi Kivity wrote: > >> > >>>> @@ -1955,6 +1955,22 @@ void kvm_mmu_slot_remove_write_access(st > >>>> } > >>>> } > >>>> +int kvm_mmu_slot_has_shadowed_page(struct kvm *kvm, int slot) > >>>> +{ > >>>> + struct kvm_mmu_page *sp; > >>>> + int ret = 0; > >>>> + > >>>> + spin_lock(&kvm->mmu_lock); > >>>> + list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { > >>>> + if (test_bit(slot, &sp->slot_bitmap)) { > >>>> + ret = -EINVAL; > >>>> + break; > >>>> + } > >>>> + } > >>>> + spin_unlock(&kvm->mmu_lock); > >>>> + return ret; > >>>> +} > >>>> + > >>>> > >>> I don't like the guest influencing host actions in this way. It's > >>> just a guest. > >>> > >>> But I think it's unneeded. kvm_mmu_zap_page() will mark a root > >>> shadow page invalid and force all vcpus to reload it, so all that's > >>> needed is to keep the mmu spinlock held while removing the slot. > >>> > >> > >> You're still keeping a shadowed page around with sp->gfn pointing to > >> non-existant memslot. The code generally makes the assumption that > >> gfn_to_memslot(gfn) on shadowed info will not fail. > >> > >> kvm_mmu_zap_page -> unaccount_shadowed, for example. > >> > >> > > > > The page has already been zapped, so we might as well > > unaccount_shadowed() on the first run. It needs to be moved until after > > the reload_remote_mmus() call, though. Oops, previous patch was unaccounting multiple times for invalid pages. This should be better: During RH6.2 graphical installation the following oops is triggered: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 IP: [<ffffffffa00bf172>] :kvm:gfn_to_rmap+0x3e/0x61 Pid: 4559, comm: qemu-system-x86 Not tainted The problem is that KVM allows shadow pagetable entries that point to a removed memslot to exist. In this case the cirrus vram mapping was removed, and the NULL dereference happened during kvm_set_memory_alias()'s zap_all_pages(). So nuke all shadowed pages before memslot removal. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index a4cf4a2..76259da 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1455,6 +1455,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot) +{ + return 0; +} long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b850d24..07d69cf 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -170,6 +170,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot) +{ + return 0; +} + struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 399acf3..201a7e1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -675,6 +675,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot) +{ + return 0; +} + gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) { return gfn; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1fd8e3b..89cd1cf 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -930,14 +930,17 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) } kvm_mmu_page_unlink_children(kvm, sp); if (!sp->root_count) { - if (!sp->role.metaphysical) + if (!sp->role.metaphysical && !sp->role.invalid) unaccount_shadowed(kvm, sp->gfn); hlist_del(&sp->hash_link); kvm_mmu_free_page(kvm, sp); } else { + int invalid = sp->role.invalid; list_move(&sp->link, &kvm->arch.active_mmu_pages); sp->role.invalid = 1; kvm_reload_remote_mmus(kvm); + if (!sp->role.metaphysical && !invalid) + unaccount_shadowed(kvm, sp->gfn); } kvm_mmu_reset_last_pte_updated(kvm); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5a83c3b..8815f1d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4040,6 +4040,12 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot) +{ + kvm_mmu_zap_all(kvm); + return 0; +} + int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d220b49..0fc7ddc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -168,6 +168,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, int user_alloc); +int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot); gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b90da0b..5ef3a5e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -405,6 +405,12 @@ int __kvm_set_memory_region(struct kvm *kvm, if (mem->slot >= kvm->nmemslots) kvm->nmemslots = mem->slot + 1; + if (!npages) { + r = kvm_arch_destroy_memory_region(kvm, mem->slot); + if (r) + goto out_free; + } + *memslot = new; r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); ^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction 2008-07-07 19:58 ` Marcelo Tosatti @ 2008-07-10 14:42 ` Avi Kivity 2008-07-10 18:58 ` Hollis Blanchard 2008-07-10 23:54 ` Marcelo Tosatti 0 siblings, 2 replies; 16+ messages in thread From: Avi Kivity @ 2008-07-10 14:42 UTC (permalink / raw) To: Marcelo Tosatti Cc: Richard W.M. Jones, kvm-devel, Hollis Blanchard, Zhang, Xiantao Marcelo Tosatti wrote: > On Mon, Jul 07, 2008 at 02:31:55PM -0300, Marcelo Tosatti wrote: > >> On Sun, Jul 06, 2008 at 12:15:56AM +0300, Avi Kivity wrote: >> >>> Marcelo Tosatti wrote: >>> >>>> On Sat, Jul 05, 2008 at 08:25:30PM +0300, Avi Kivity wrote: >>>> >>>> >>>>>> @@ -1955,6 +1955,22 @@ void kvm_mmu_slot_remove_write_access(st >>>>>> } >>>>>> } >>>>>> +int kvm_mmu_slot_has_shadowed_page(struct kvm *kvm, int slot) >>>>>> +{ >>>>>> + struct kvm_mmu_page *sp; >>>>>> + int ret = 0; >>>>>> + >>>>>> + spin_lock(&kvm->mmu_lock); >>>>>> + list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { >>>>>> + if (test_bit(slot, &sp->slot_bitmap)) { >>>>>> + ret = -EINVAL; >>>>>> + break; >>>>>> + } >>>>>> + } >>>>>> + spin_unlock(&kvm->mmu_lock); >>>>>> + return ret; >>>>>> +} >>>>>> + >>>>>> >>>>>> >>>>> I don't like the guest influencing host actions in this way. It's >>>>> just a guest. >>>>> >>>>> But I think it's unneeded. kvm_mmu_zap_page() will mark a root >>>>> shadow page invalid and force all vcpus to reload it, so all that's >>>>> needed is to keep the mmu spinlock held while removing the slot. >>>>> >>>>> >>>> You're still keeping a shadowed page around with sp->gfn pointing to >>>> non-existant memslot. The code generally makes the assumption that >>>> gfn_to_memslot(gfn) on shadowed info will not fail. >>>> >>>> kvm_mmu_zap_page -> unaccount_shadowed, for example. >>>> >>>> >>>> >>> The page has already been zapped, so we might as well >>> unaccount_shadowed() on the first run. It needs to be moved until after >>> the reload_remote_mmus() call, though. >>> > > Oops, previous patch was unaccounting multiple times for invalid pages. > This should be better: > > > During RH6.2 graphical installation the following oops is triggered: > > BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 > IP: [<ffffffffa00bf172>] :kvm:gfn_to_rmap+0x3e/0x61 > Pid: 4559, comm: qemu-system-x86 Not tainted > > The problem is that KVM allows shadow pagetable entries that > point to a removed memslot to exist. In this case the cirrus vram > mapping was removed, and the NULL dereference happened during > kvm_set_memory_alias()'s zap_all_pages(). > > So nuke all shadowed pages before memslot removal. > > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> > > > diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c > index a4cf4a2..76259da 100644 > --- a/arch/ia64/kvm/kvm-ia64.c > +++ b/arch/ia64/kvm/kvm-ia64.c > @@ -1455,6 +1455,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, > return 0; > } > > +int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot) > +{ > + return 0; > +} > > This (and its friends) ought to be static inlines. On the other hand, don't the other arches have to flush their tlbs? Xiantao/Hollis? So maybe this function needs to be renamed kvm_flush_shadow() and implemented across the board. > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index b90da0b..5ef3a5e 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -405,6 +405,12 @@ int __kvm_set_memory_region(struct kvm *kvm, > if (mem->slot >= kvm->nmemslots) > kvm->nmemslots = mem->slot + 1; > > + if (!npages) { > + r = kvm_arch_destroy_memory_region(kvm, mem->slot); > + if (r) > + goto out_free; > + } > + > Destructors should never fail, since there is no possible recovery. And indeed you have 'return 0' in the actual implementation. So I think the function better return void. -- error compiling committee.c: too many arguments to function ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction 2008-07-10 14:42 ` Avi Kivity @ 2008-07-10 18:58 ` Hollis Blanchard 2008-07-10 23:49 ` Marcelo Tosatti 2008-07-10 23:54 ` Marcelo Tosatti 1 sibling, 1 reply; 16+ messages in thread From: Hollis Blanchard @ 2008-07-10 18:58 UTC (permalink / raw) To: Avi Kivity; +Cc: Marcelo Tosatti, Richard W.M. Jones, kvm-devel, Zhang, Xiantao On Thu, 2008-07-10 at 17:42 +0300, Avi Kivity wrote: > > > During RH6.2 graphical installation the following oops is triggered: > > > > BUG: unable to handle kernel NULL pointer dereference at > 0000000000000000 > > IP: [<ffffffffa00bf172>] :kvm:gfn_to_rmap+0x3e/0x61 > > Pid: 4559, comm: qemu-system-x86 Not tainted > > > > The problem is that KVM allows shadow pagetable entries that > > point to a removed memslot to exist. In this case the cirrus vram > > mapping was removed, and the NULL dereference happened during > > kvm_set_memory_alias()'s zap_all_pages(). > > > > So nuke all shadowed pages before memslot removal. > > > > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> > > > > > > diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c > > index a4cf4a2..76259da 100644 > > --- a/arch/ia64/kvm/kvm-ia64.c > > +++ b/arch/ia64/kvm/kvm-ia64.c > > @@ -1455,6 +1455,10 @@ int kvm_arch_set_memory_region(struct kvm > *kvm, > > return 0; > > } > > > > +int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot) > > +{ > > + return 0; > > +} > > > > > > This (and its friends) ought to be static inlines. > > On the other hand, don't the other arches have to flush their tlbs? > Xiantao/Hollis? So maybe this function needs to be renamed > kvm_flush_shadow() and implemented across the board. Agreed, I think that's the right approach. -- Hollis Blanchard IBM Linux Technology Center ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction 2008-07-10 18:58 ` Hollis Blanchard @ 2008-07-10 23:49 ` Marcelo Tosatti 2008-07-11 14:48 ` Avi Kivity 2008-07-21 21:03 ` Hollis Blanchard 0 siblings, 2 replies; 16+ messages in thread From: Marcelo Tosatti @ 2008-07-10 23:49 UTC (permalink / raw) To: Hollis Blanchard; +Cc: Avi Kivity, kvm-devel, Zhang, Xiantao On Thu, Jul 10, 2008 at 01:58:24PM -0500, Hollis Blanchard wrote: > > This (and its friends) ought to be static inlines. > > > > On the other hand, don't the other arches have to flush their tlbs? > > Xiantao/Hollis? So maybe this function needs to be renamed > > kvm_flush_shadow() and implemented across the board. > > Agreed, I think that's the right approach. Ok, here it is, Hollis and Xiantao can you fill in the blanks? ---------- Flush the shadow mmu before removing regions to avoid stale entries. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index a4cf4a2..d10e35b 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1455,6 +1455,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +void kvm_arch_flush_shadow(struct kvm *kvm) +{ + return; +} long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b850d24..2c438a7 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -170,6 +170,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +void kvm_arch_flush_shadow(struct kvm *kvm) +{ + return; +} + struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 399acf3..5612c00 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -675,6 +675,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +void kvm_arch_flush_shadow(struct kvm *kvm) +{ + return; +} + gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) { return gfn; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c9b8a04..dedb581 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4040,6 +4040,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +void kvm_arch_flush_shadow(struct kvm *kvm) +{ + kvm_mmu_zap_all(kvm); +} + int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fc685c5..3798097 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -168,6 +168,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, int user_alloc); +void kvm_arch_flush_shadow(struct kvm *kvm); gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b90da0b..c459383 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -405,6 +405,9 @@ int __kvm_set_memory_region(struct kvm *kvm, if (mem->slot >= kvm->nmemslots) kvm->nmemslots = mem->slot + 1; + if (!npages) + kvm_arch_flush_shadow(kvm); + *memslot = new; r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); ^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction 2008-07-10 23:49 ` Marcelo Tosatti @ 2008-07-11 14:48 ` Avi Kivity 2008-07-21 21:03 ` Hollis Blanchard 1 sibling, 0 replies; 16+ messages in thread From: Avi Kivity @ 2008-07-11 14:48 UTC (permalink / raw) To: Marcelo Tosatti; +Cc: Hollis Blanchard, kvm-devel, Zhang, Xiantao Marcelo Tosatti wrote: > On Thu, Jul 10, 2008 at 01:58:24PM -0500, Hollis Blanchard wrote: > >>> This (and its friends) ought to be static inlines. >>> >>> On the other hand, don't the other arches have to flush their tlbs? >>> Xiantao/Hollis? So maybe this function needs to be renamed >>> kvm_flush_shadow() and implemented across the board. >>> >> Agreed, I think that's the right approach. >> > > Ok, here it is, Hollis and Xiantao can you fill in the blanks? > > ---------- > > Flush the shadow mmu before removing regions to avoid stale entries. > > Applied, thanks. -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction 2008-07-10 23:49 ` Marcelo Tosatti 2008-07-11 14:48 ` Avi Kivity @ 2008-07-21 21:03 ` Hollis Blanchard 2008-07-21 21:34 ` Marcelo Tosatti 2008-07-22 5:18 ` Avi Kivity 1 sibling, 2 replies; 16+ messages in thread From: Hollis Blanchard @ 2008-07-21 21:03 UTC (permalink / raw) To: Marcelo Tosatti; +Cc: Avi Kivity, kvm-devel, Zhang, Xiantao On Thu, 2008-07-10 at 20:49 -0300, Marcelo Tosatti wrote: > diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c > index b850d24..2c438a7 100644 > --- a/arch/powerpc/kvm/powerpc.c > +++ b/arch/powerpc/kvm/powerpc.c > @@ -170,6 +170,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm, > return 0; > } > > +void kvm_arch_flush_shadow(struct kvm *kvm) > +{ > + return; > +} > + > struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int > id) > { > struct kvm_vcpu *vcpu; By the way, what is the testcase for this, i.e. how do I remove a memslot? -- Hollis Blanchard IBM Linux Technology Center ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction 2008-07-21 21:03 ` Hollis Blanchard @ 2008-07-21 21:34 ` Marcelo Tosatti 2008-07-21 22:22 ` Hollis Blanchard 2008-07-22 5:18 ` Avi Kivity 1 sibling, 1 reply; 16+ messages in thread From: Marcelo Tosatti @ 2008-07-21 21:34 UTC (permalink / raw) To: Hollis Blanchard; +Cc: Avi Kivity, kvm-devel, Zhang, Xiantao On Mon, Jul 21, 2008 at 04:03:27PM -0500, Hollis Blanchard wrote: > On Thu, 2008-07-10 at 20:49 -0300, Marcelo Tosatti wrote: > > diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c > > index b850d24..2c438a7 100644 > > --- a/arch/powerpc/kvm/powerpc.c > > +++ b/arch/powerpc/kvm/powerpc.c > > @@ -170,6 +170,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm, > > return 0; > > } > > > > +void kvm_arch_flush_shadow(struct kvm *kvm) > > +{ > > + return; > > +} > > + > > struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int > > id) > > { > > struct kvm_vcpu *vcpu; > > By the way, what is the testcase for this, i.e. how do I remove a > memslot? The testcase I used was RH6.2 graphical install, which changes the cirrus mode from linear frame buffer to the standard one, thus destroying the vram memory slot (should be able to do that with the framebuffer driver). All you need on this callback is destroy all shadow mappings, to avoid the MMU code from referencing a memslot that is no longer existant. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction 2008-07-21 21:34 ` Marcelo Tosatti @ 2008-07-21 22:22 ` Hollis Blanchard 0 siblings, 0 replies; 16+ messages in thread From: Hollis Blanchard @ 2008-07-21 22:22 UTC (permalink / raw) To: Marcelo Tosatti; +Cc: Avi Kivity, kvm-devel, Zhang, Xiantao On Monday 21 July 2008 16:34:40 Marcelo Tosatti wrote: > On Mon, Jul 21, 2008 at 04:03:27PM -0500, Hollis Blanchard wrote: > > > > By the way, what is the testcase for this, i.e. how do I remove a > > memslot? > > The testcase I used was RH6.2 graphical install, which changes the > cirrus mode from linear frame buffer to the standard one, thus > destroying the vram memory slot (should be able to do that with the > framebuffer driver). Hmm, we're not using any graphics adapters with PowerPC right now. I've skimmed through cirrus_vga.c but I don't see any obvious "mode change" routine; what is the function that triggers this behavior? Some "(un)register physical memory" call? > All you need on this callback is destroy all shadow mappings, to avoid > the MMU code from referencing a memslot that is no longer existant. In addition to that, I also need a testcase to actually exercise the code... :) -- Hollis Blanchard IBM Linux Technology Center ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction 2008-07-21 21:03 ` Hollis Blanchard 2008-07-21 21:34 ` Marcelo Tosatti @ 2008-07-22 5:18 ` Avi Kivity 1 sibling, 0 replies; 16+ messages in thread From: Avi Kivity @ 2008-07-22 5:18 UTC (permalink / raw) To: Hollis Blanchard; +Cc: Marcelo Tosatti, kvm-devel, Zhang, Xiantao Hollis Blanchard wrote: > On Thu, 2008-07-10 at 20:49 -0300, Marcelo Tosatti wrote: > >> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c >> index b850d24..2c438a7 100644 >> --- a/arch/powerpc/kvm/powerpc.c >> +++ b/arch/powerpc/kvm/powerpc.c >> @@ -170,6 +170,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm, >> return 0; >> } >> >> +void kvm_arch_flush_shadow(struct kvm *kvm) >> +{ >> + return; >> +} >> + >> struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int >> id) >> { >> struct kvm_vcpu *vcpu; >> > > By the way, what is the testcase for this, i.e. how do I remove a > memslot? > > 1. Memory hotunplug (perhaps not very relevant to embedded operating systems) 2. PCI memory resource remapping (aka framebuffers) -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction 2008-07-10 14:42 ` Avi Kivity 2008-07-10 18:58 ` Hollis Blanchard @ 2008-07-10 23:54 ` Marcelo Tosatti 2008-07-11 15:09 ` Avi Kivity 1 sibling, 1 reply; 16+ messages in thread From: Marcelo Tosatti @ 2008-07-10 23:54 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm-devel, Hollis Blanchard, Zhang, Xiantao KVM: MMU: improve invalid shadow root page handling Harden kvm_mmu_zap_page() against invalid root pages that had been shadowed from memslots that are gone. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ff7cf63..7f57da6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -930,14 +930,17 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) } kvm_mmu_page_unlink_children(kvm, sp); if (!sp->root_count) { - if (!sp->role.metaphysical) + if (!sp->role.metaphysical && !sp->role.invalid) unaccount_shadowed(kvm, sp->gfn); hlist_del(&sp->hash_link); kvm_mmu_free_page(kvm, sp); } else { + int invalid = sp->role.invalid; list_move(&sp->link, &kvm->arch.active_mmu_pages); sp->role.invalid = 1; kvm_reload_remote_mmus(kvm); + if (!sp->role.metaphysical && !invalid) + unaccount_shadowed(kvm, sp->gfn); } kvm_mmu_reset_last_pte_updated(kvm); } ^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction 2008-07-10 23:54 ` Marcelo Tosatti @ 2008-07-11 15:09 ` Avi Kivity 0 siblings, 0 replies; 16+ messages in thread From: Avi Kivity @ 2008-07-11 15:09 UTC (permalink / raw) To: Marcelo Tosatti; +Cc: kvm-devel, Hollis Blanchard, Zhang, Xiantao Marcelo Tosatti wrote: > KVM: MMU: improve invalid shadow root page handling > > Harden kvm_mmu_zap_page() against invalid root pages that > had been shadowed from memslots that are gone. > > Applied, thanks. -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. ^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2008-07-22 5:18 UTC | newest] Thread overview: 16+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-07-04 1:06 KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction Marcelo Tosatti 2008-07-05 17:25 ` Avi Kivity 2008-07-05 19:23 ` Marcelo Tosatti 2008-07-05 21:15 ` Avi Kivity 2008-07-07 17:31 ` Marcelo Tosatti 2008-07-07 19:58 ` Marcelo Tosatti 2008-07-10 14:42 ` Avi Kivity 2008-07-10 18:58 ` Hollis Blanchard 2008-07-10 23:49 ` Marcelo Tosatti 2008-07-11 14:48 ` Avi Kivity 2008-07-21 21:03 ` Hollis Blanchard 2008-07-21 21:34 ` Marcelo Tosatti 2008-07-21 22:22 ` Hollis Blanchard 2008-07-22 5:18 ` Avi Kivity 2008-07-10 23:54 ` Marcelo Tosatti 2008-07-11 15:09 ` Avi Kivity
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox