public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/2] KVM, Fix QEMU-KVM is killed by guest SRAO MCE
@ 2010-04-27  7:04 Huang Ying
  2010-04-27  7:47 ` Avi Kivity
  0 siblings, 1 reply; 7+ messages in thread
From: Huang Ying @ 2010-04-27  7:04 UTC (permalink / raw)
  To: Avi Kivity; +Cc: linux-kernel, Andi Kleen, Andrew Morton, masbock

In common cases, guest SRAO MCE will cause corresponding poisoned page
be un-mapped and SIGBUS be sent to QEMU-KVM, then QEMU-KVM will relay
the MCE to guest OS.

But it is reported that if the poisoned page is accessed in guest
after un-mapped and before MCE is relayed to guest OS, QEMU-KVM will
be killed.

The reason is as follow. Because poisoned page has been un-mapped,
guest access will cause guest exit and kvm_mmu_page_fault will be
called. kvm_mmu_page_fault can not get the poisoned page for fault
address, so kernel and user space MMIO processing is tried in turn. In
user MMIO processing, poisoned page is accessed again, then QEMU-KVM
is killed by force_sig_info.

To fix the bug, kvm_mmu_page_fault send HWPOISON signal to QEMU-KVM
and do not try kernel and user space MMIO processing for poisoned
page.

Reported-by: Max Asbock <masbock@linux.vnet.ibm.com>
Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/kvm/mmu.c         |   24 ++++++++++++++++++++++--
 arch/x86/kvm/paging_tmpl.h |   10 ++++++++--
 include/linux/kvm_host.h   |    1 +
 virt/kvm/kvm_main.c        |   30 ++++++++++++++++++++++++++++--
 4 files changed, 59 insertions(+), 6 deletions(-)

--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -32,6 +32,7 @@
 #include <linux/compiler.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
+#include <linux/uaccess.h>
 
 #include <asm/page.h>
 #include <asm/cmpxchg.h>
@@ -1972,6 +1973,17 @@ static int __direct_map(struct kvm_vcpu
 	return pt_write;
 }
 
+static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn)
+{
+	char buf[1];
+	void __user *hva;
+	int r;
+
+	/* Touch the page, so send SIGBUS */
+	hva = (void __user *)gfn_to_hva(kvm, gfn);
+	r = copy_from_user(buf, hva, 1);
+}
+
 static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 {
 	int r;
@@ -1997,7 +2009,11 @@ static int nonpaging_map(struct kvm_vcpu
 	/* mmio */
 	if (is_error_pfn(pfn)) {
 		kvm_release_pfn_clean(pfn);
-		return 1;
+		if (is_hwpoison_pfn(pfn)) {
+			kvm_send_hwpoison_signal(vcpu->kvm, gfn);
+			return 0;
+		} else
+			return 1;
 	}
 
 	spin_lock(&vcpu->kvm->mmu_lock);
@@ -2203,7 +2219,11 @@ static int tdp_page_fault(struct kvm_vcp
 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
 	if (is_error_pfn(pfn)) {
 		kvm_release_pfn_clean(pfn);
-		return 1;
+		if (is_hwpoison_pfn(pfn)) {
+			kvm_send_hwpoison_signal(vcpu->kvm, gfn);
+			return 0;
+		} else
+			return 1;
 	}
 	spin_lock(&vcpu->kvm->mmu_lock);
 	if (mmu_notifier_retry(vcpu, mmu_seq))
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -425,9 +425,15 @@ static int FNAME(page_fault)(struct kvm_
 
 	/* mmio */
 	if (is_error_pfn(pfn)) {
-		pgprintk("gfn %lx is mmio\n", walker.gfn);
 		kvm_release_pfn_clean(pfn);
-		return 1;
+		if (is_hwpoison_pfn(pfn)) {
+			pgprintk("gfn %lx is hwpoisoned\n", walker.gfn);
+			kvm_send_hwpoison_signal(vcpu->kvm, walker.gfn);
+			return 0;
+		} else {
+			pgprintk("gfn %lx is mmio\n", walker.gfn);
+			return 1;
+		}
 	}
 
 	spin_lock(&vcpu->kvm->mmu_lock);
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -249,6 +249,7 @@ extern pfn_t bad_pfn;
 
 int is_error_page(struct page *page);
 int is_error_pfn(pfn_t pfn);
+int is_hwpoison_pfn(pfn_t pfn);
 int kvm_is_error_hva(unsigned long addr);
 int kvm_set_memory_region(struct kvm *kvm,
 			  struct kvm_userspace_memory_region *mem,
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -92,6 +92,9 @@ static bool kvm_rebooting;
 
 static bool largepages_enabled = true;
 
+struct page *hwpoison_page;
+pfn_t hwpoison_pfn;
+
 inline int kvm_is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn)) {
@@ -805,16 +808,22 @@ EXPORT_SYMBOL_GPL(kvm_disable_largepages
 
 int is_error_page(struct page *page)
 {
-	return page == bad_page;
+	return page == bad_page || page == hwpoison_page;
 }
 EXPORT_SYMBOL_GPL(is_error_page);
 
 int is_error_pfn(pfn_t pfn)
 {
-	return pfn == bad_pfn;
+	return pfn == bad_pfn || pfn == hwpoison_pfn;
 }
 EXPORT_SYMBOL_GPL(is_error_pfn);
 
+int is_hwpoison_pfn(pfn_t pfn)
+{
+	return pfn == hwpoison_pfn;
+}
+EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
+
 static inline unsigned long bad_hva(void)
 {
 	return PAGE_OFFSET;
@@ -935,6 +944,11 @@ static pfn_t hva_to_pfn(struct kvm *kvm,
 	if (unlikely(npages != 1)) {
 		struct vm_area_struct *vma;
 
+		if (npages == -EHWPOISON) {
+			get_page(hwpoison_page);
+			return page_to_pfn(hwpoison_page);
+		}
+
 		down_read(&current->mm->mmap_sem);
 		vma = find_vma(current->mm, addr);
 
@@ -2191,6 +2205,15 @@ int kvm_init(void *opaque, unsigned int
 
 	bad_pfn = page_to_pfn(bad_page);
 
+	hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+
+	if (hwpoison_page == NULL) {
+		r = -ENOMEM;
+		goto out_free_0;
+	}
+
+	hwpoison_pfn = page_to_pfn(hwpoison_page);
+
 	if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
 		r = -ENOMEM;
 		goto out_free_0;
@@ -2262,6 +2285,8 @@ out_free_1:
 out_free_0a:
 	free_cpumask_var(cpus_hardware_enabled);
 out_free_0:
+	if (hwpoison_page)
+		__free_page(hwpoison_page);
 	__free_page(bad_page);
 out:
 	kvm_arch_exit();
@@ -2284,6 +2309,7 @@ void kvm_exit(void)
 	kvm_arch_hardware_unsetup();
 	kvm_arch_exit();
 	free_cpumask_var(cpus_hardware_enabled);
+	__free_page(hwpoison_page);
 	__free_page(bad_page);
 }
 EXPORT_SYMBOL_GPL(kvm_exit);



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] KVM, Fix QEMU-KVM is killed by guest SRAO MCE
  2010-04-27  7:04 [PATCH 2/2] KVM, Fix QEMU-KVM is killed by guest SRAO MCE Huang Ying
@ 2010-04-27  7:47 ` Avi Kivity
  2010-04-27  9:25   ` Huang Ying
  0 siblings, 1 reply; 7+ messages in thread
From: Avi Kivity @ 2010-04-27  7:47 UTC (permalink / raw)
  To: Huang Ying; +Cc: linux-kernel, Andi Kleen, Andrew Morton, masbock

(please copy kvm@vger.kernel.org on kvm patches)


On 04/27/2010 10:04 AM, Huang Ying wrote:
> In common cases, guest SRAO MCE will cause corresponding poisoned page
> be un-mapped and SIGBUS be sent to QEMU-KVM, then QEMU-KVM will relay
> the MCE to guest OS.
>
> But it is reported that if the poisoned page is accessed in guest
> after un-mapped and before MCE is relayed to guest OS, QEMU-KVM will
> be killed.
>
> The reason is as follow. Because poisoned page has been un-mapped,
> guest access will cause guest exit and kvm_mmu_page_fault will be
> called. kvm_mmu_page_fault can not get the poisoned page for fault
> address, so kernel and user space MMIO processing is tried in turn. In
> user MMIO processing, poisoned page is accessed again, then QEMU-KVM
> is killed by force_sig_info.
>
> To fix the bug, kvm_mmu_page_fault send HWPOISON signal to QEMU-KVM
> and do not try kernel and user space MMIO processing for poisoned
> page.
>
>
>
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -32,6 +32,7 @@
>   #include<linux/compiler.h>
>   #include<linux/srcu.h>
>   #include<linux/slab.h>
> +#include<linux/uaccess.h>
>
>   #include<asm/page.h>
>   #include<asm/cmpxchg.h>
> @@ -1972,6 +1973,17 @@ static int __direct_map(struct kvm_vcpu
>   	return pt_write;
>   }
>
> +static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn)
> +{
> +	char buf[1];
> +	void __user *hva;
> +	int r;
> +
> +	/* Touch the page, so send SIGBUS */
> +	hva = (void __user *)gfn_to_hva(kvm, gfn);
> +	r = copy_from_user(buf, hva, 1);
>    

No error check?  What will a copy_from_user() of poisoned page expected 
to return?

Best to return -EFAULT on failure for consistency.

> +}
> +
>   static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
>   {
>   	int r;
> @@ -1997,7 +2009,11 @@ static int nonpaging_map(struct kvm_vcpu
>   	/* mmio */
>   	if (is_error_pfn(pfn)) {
>   		kvm_release_pfn_clean(pfn);
> -		return 1;
> +		if (is_hwpoison_pfn(pfn)) {
> +			kvm_send_hwpoison_signal(vcpu->kvm, gfn);
> +			return 0;
> +		} else
> +			return 1;
>   	}
>    

This is duplicated several times.  Please introduce a kvm_handle_bad_page():

     if (is_error_pfn(pfn))
         return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] KVM, Fix QEMU-KVM is killed by guest SRAO MCE
  2010-04-27  7:47 ` Avi Kivity
@ 2010-04-27  9:25   ` Huang Ying
  2010-04-27  9:30     ` Avi Kivity
  0 siblings, 1 reply; 7+ messages in thread
From: Huang Ying @ 2010-04-27  9:25 UTC (permalink / raw)
  To: Avi Kivity
  Cc: linux-kernel@vger.kernel.org, Andi Kleen, Andrew Morton,
	masbock@linux.vnet.ibm.com, kvm

On Tue, 2010-04-27 at 15:47 +0800, Avi Kivity wrote:
> (please copy kvm@vger.kernel.org on kvm patches)

Sorry, will do that for all future patches.

> On 04/27/2010 10:04 AM, Huang Ying wrote:
> >
> > +static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn)
> > +{
> > +	char buf[1];
> > +	void __user *hva;
> > +	int r;
> > +
> > +	/* Touch the page, so send SIGBUS */
> > +	hva = (void __user *)gfn_to_hva(kvm, gfn);
> > +	r = copy_from_user(buf, hva, 1);
> >    
> 
> No error check?  What will a copy_from_user() of poisoned page expected 
> to return?
> 
> Best to return -EFAULT on failure for consistency.

Just want to use the side effect of copy_from_user, SIGBUS will be sent
to current process because the page touched is marked as poisoned. That
is, failure is expected, so the return value is not checked.

> > +}
> > +
> >   static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
> >   {
> >   	int r;
> > @@ -1997,7 +2009,11 @@ static int nonpaging_map(struct kvm_vcpu
> >   	/* mmio */
> >   	if (is_error_pfn(pfn)) {
> >   		kvm_release_pfn_clean(pfn);
> > -		return 1;
> > +		if (is_hwpoison_pfn(pfn)) {
> > +			kvm_send_hwpoison_signal(vcpu->kvm, gfn);
> > +			return 0;
> > +		} else
> > +			return 1;
> >   	}
> >    
> 
> This is duplicated several times.  Please introduce a kvm_handle_bad_page():
> 
>      if (is_error_pfn(pfn))
>          return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);

OK. Will do that.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] KVM, Fix QEMU-KVM is killed by guest SRAO MCE
  2010-04-27  9:25   ` Huang Ying
@ 2010-04-27  9:30     ` Avi Kivity
  2010-04-28  2:56       ` Huang Ying
  0 siblings, 1 reply; 7+ messages in thread
From: Avi Kivity @ 2010-04-27  9:30 UTC (permalink / raw)
  To: Huang Ying
  Cc: linux-kernel@vger.kernel.org, Andi Kleen, Andrew Morton,
	masbock@linux.vnet.ibm.com, kvm

On 04/27/2010 12:25 PM, Huang Ying wrote:
>
>
>> On 04/27/2010 10:04 AM, Huang Ying wrote:
>>      
>>> +static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn)
>>> +{
>>> +	char buf[1];
>>> +	void __user *hva;
>>> +	int r;
>>> +
>>> +	/* Touch the page, so send SIGBUS */
>>> +	hva = (void __user *)gfn_to_hva(kvm, gfn);
>>> +	r = copy_from_user(buf, hva, 1);
>>>
>>>        
>> No error check?  What will a copy_from_user() of poisoned page expected
>> to return?
>>
>> Best to return -EFAULT on failure for consistency.
>>      
> Just want to use the side effect of copy_from_user, SIGBUS will be sent
> to current process because the page touched is marked as poisoned. That
> is, failure is expected, so the return value is not checked.
>    

What if the failure doesn't happen?  Say, someone mmap()ed over the page.

btw, better to use (void)copy_from_user(...) instead to avoid the 
initialized but not used warning the compiler may generate.


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] KVM, Fix QEMU-KVM is killed by guest SRAO MCE
  2010-04-27  9:30     ` Avi Kivity
@ 2010-04-28  2:56       ` Huang Ying
  2010-04-28  9:47         ` Avi Kivity
  0 siblings, 1 reply; 7+ messages in thread
From: Huang Ying @ 2010-04-28  2:56 UTC (permalink / raw)
  To: Avi Kivity
  Cc: linux-kernel@vger.kernel.org, Andi Kleen, Andrew Morton,
	masbock@linux.vnet.ibm.com, kvm@vger.kernel.org

On Tue, 2010-04-27 at 17:30 +0800, Avi Kivity wrote:
> On 04/27/2010 12:25 PM, Huang Ying wrote:
> >
> >
> >> On 04/27/2010 10:04 AM, Huang Ying wrote:
> >>      
> >>> +static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn)
> >>> +{
> >>> +	char buf[1];
> >>> +	void __user *hva;
> >>> +	int r;
> >>> +
> >>> +	/* Touch the page, so send SIGBUS */
> >>> +	hva = (void __user *)gfn_to_hva(kvm, gfn);
> >>> +	r = copy_from_user(buf, hva, 1);
> >>>
> >>>        
> >> No error check?  What will a copy_from_user() of poisoned page expected
> >> to return?
> >>
> >> Best to return -EFAULT on failure for consistency.
> >>      
> > Just want to use the side effect of copy_from_user, SIGBUS will be sent
> > to current process because the page touched is marked as poisoned. That
> > is, failure is expected, so the return value is not checked.
> >    
> 
> What if the failure doesn't happen?  Say, someone mmap()ed over the page.

Sorry, not get your idea clearly. hva is re-mmap()ed? We just read the
hva, not write, so I think it should be OK here.

> btw, better to use (void)copy_from_user(...) instead to avoid the 
> initialized but not used warning the compiler may generate.

OK. Will do that.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] KVM, Fix QEMU-KVM is killed by guest SRAO MCE
  2010-04-28  2:56       ` Huang Ying
@ 2010-04-28  9:47         ` Avi Kivity
  2010-04-29  1:31           ` Huang Ying
  0 siblings, 1 reply; 7+ messages in thread
From: Avi Kivity @ 2010-04-28  9:47 UTC (permalink / raw)
  To: Huang Ying
  Cc: linux-kernel@vger.kernel.org, Andi Kleen, Andrew Morton,
	masbock@linux.vnet.ibm.com, kvm@vger.kernel.org

On 04/28/2010 05:56 AM, Huang Ying wrote:
>    
>>>
>>> Just want to use the side effect of copy_from_user, SIGBUS will be sent
>>> to current process because the page touched is marked as poisoned. That
>>> is, failure is expected, so the return value is not checked.
>>>
>>>        
>> What if the failure doesn't happen?  Say, someone mmap()ed over the page.
>>      
> Sorry, not get your idea clearly. hva is re-mmap()ed? We just read the
> hva, not write, so I think it should be OK here.
>
>    

We don't generate a signal in this case.  Does the code continue to work 
correctly (not sure what correctly is in this case... should probably 
just continue).

There's also the possibility of -EFAULT.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] KVM, Fix QEMU-KVM is killed by guest SRAO MCE
  2010-04-28  9:47         ` Avi Kivity
@ 2010-04-29  1:31           ` Huang Ying
  0 siblings, 0 replies; 7+ messages in thread
From: Huang Ying @ 2010-04-29  1:31 UTC (permalink / raw)
  To: Avi Kivity
  Cc: linux-kernel@vger.kernel.org, Andi Kleen, Andrew Morton,
	masbock@linux.vnet.ibm.com, kvm@vger.kernel.org

On Wed, 2010-04-28 at 17:47 +0800, Avi Kivity wrote:
> On 04/28/2010 05:56 AM, Huang Ying wrote:
> >    
> >>>
> >>> Just want to use the side effect of copy_from_user, SIGBUS will be sent
> >>> to current process because the page touched is marked as poisoned. That
> >>> is, failure is expected, so the return value is not checked.
> >>>
> >>>        
> >> What if the failure doesn't happen?  Say, someone mmap()ed over the page.
> >>      
> > Sorry, not get your idea clearly. hva is re-mmap()ed? We just read the
> > hva, not write, so I think it should be OK here.
> >
> >    
> 
> We don't generate a signal in this case.  Does the code continue to work 
> correctly (not sure what correctly is in this case... should probably 
> just continue).
> 
> There's also the possibility of -EFAULT.

I think signal should be generated for copy_from_user, because the hva
is poisoned now. The signal will not generated only if the hva is
re-mmap()ped to some other physical page, but this should be impossible
unless we have memory hotadd/hotremove in KVM.

If the signal is not generated, lost or overwritten, guest will
continue, and if the hva is still poisoned, the page fault will be
triggered again; if the hva is not poisoned, there will be no further
page fault.

Best Regards,
Huang Ying




^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2010-04-29  1:31 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-27  7:04 [PATCH 2/2] KVM, Fix QEMU-KVM is killed by guest SRAO MCE Huang Ying
2010-04-27  7:47 ` Avi Kivity
2010-04-27  9:25   ` Huang Ying
2010-04-27  9:30     ` Avi Kivity
2010-04-28  2:56       ` Huang Ying
2010-04-28  9:47         ` Avi Kivity
2010-04-29  1:31           ` Huang Ying

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox