All of lore.kernel.org
 help / color / mirror / Atom feed
From: Marcelo Tosatti <mtosatti@redhat.com>
To: Huang Ying <ying.huang@intel.com>
Cc: Avi Kivity <avi@redhat.com>, Andi Kleen <andi@firstfloor.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	masbock@linux.vnet.ibm.com, "Wu,
	Fengguang" <fengguang.wu@intel.com>,
	linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Subject: Re: [PATCH -v2] KVM, Fix QEMU-KVM is killed by guest SRAO MCE
Date: Thu, 13 May 2010 18:43:07 -0300	[thread overview]
Message-ID: <20100513214307.GA25003@amt.cnet> (raw)
In-Reply-To: <1273646643.3564.51.camel@yhuang-dev.sh.intel.com>

On Wed, May 12, 2010 at 02:44:03PM +0800, Huang Ying wrote:
> In common cases, guest SRAO MCE will cause corresponding poisoned page
> be un-mapped and SIGBUS be sent to QEMU-KVM, then QEMU-KVM will relay
> the MCE to guest OS.
> 
> But it is reported that if the poisoned page is accessed in guest
> after un-mapped and before MCE is relayed to guest OS, QEMU-KVM will
> be killed.
> 
> The reason is as follow. Because poisoned page has been un-mapped,
> guest access will cause guest exit and kvm_mmu_page_fault will be
> called. kvm_mmu_page_fault can not get the poisoned page for fault
> address, so kernel and user space MMIO processing is tried in turn. In
> user MMIO processing, poisoned page is accessed again, then QEMU-KVM
> is killed by force_sig_info.
> 
> To fix the bug, kvm_mmu_page_fault send HWPOISON signal to QEMU-KVM
> and do not try kernel and user space MMIO processing for poisoned
> page.
> 
> 
> Changelog:
> 
> v2:
> 
> - Use page table walker to determine whether the virtual address is
>   poisoned to avoid change user space interface (via changing
>   get_user_pages).
> 
> - Wrap bad page processing into kvm_handle_bad_page to avoid code
>   duplicating.
> 
> Reported-by: Max Asbock <masbock@linux.vnet.ibm.com>
> Signed-off-by: Huang Ying <ying.huang@intel.com>
> ---
>  arch/x86/kvm/mmu.c         |   34 ++++++++++++++++++++++++++--------
>  arch/x86/kvm/paging_tmpl.h |    7 ++-----
>  include/linux/kvm_host.h   |    1 +
>  include/linux/mm.h         |    8 ++++++++
>  mm/memory-failure.c        |   28 ++++++++++++++++++++++++++++
>  virt/kvm/kvm_main.c        |   30 ++++++++++++++++++++++++++++--
>  6 files changed, 93 insertions(+), 15 deletions(-)
> 
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -32,6 +32,7 @@
>  #include <linux/compiler.h>
>  #include <linux/srcu.h>
>  #include <linux/slab.h>
> +#include <linux/uaccess.h>
>  
>  #include <asm/page.h>
>  #include <asm/cmpxchg.h>
> @@ -1975,6 +1976,27 @@ static int __direct_map(struct kvm_vcpu
>  	return pt_write;
>  }
>  
> +static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn)
> +{
> +	char buf[1];
> +	void __user *hva;
> +	int r;
> +
> +	/* Touch the page, so send SIGBUS */
> +	hva = (void __user *)gfn_to_hva(kvm, gfn);
> +	r = copy_from_user(buf, hva, 1);
> +}

A SIGBUS signal has been raised by memory poisoning already, so i don't
see why this is needed?

To avoid the MMIO processing in userspace before the MCE is sent to the
guest you can just return -EAGAIN from the page fault handlers back to
kvm_mmu_page_fault.

> +int is_hwpoison_pfn(pfn_t pfn)
> +{
> +	return pfn == hwpoison_pfn;
> +}
> +EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
> +
>  static inline unsigned long bad_hva(void)
>  {
>  	return PAGE_OFFSET;
> @@ -939,6 +948,11 @@ static pfn_t hva_to_pfn(struct kvm *kvm,
>  	if (unlikely(npages != 1)) {
>  		struct vm_area_struct *vma;
>  
> +		if (is_hwpoison_address(addr)) {
> +			get_page(hwpoison_page);
> +			return page_to_pfn(hwpoison_page);
> +		}
> +
>  		down_read(&current->mm->mmap_sem);
>  		vma = find_vma(current->mm, addr);
>  
> @@ -2198,6 +2212,15 @@ int kvm_init(void *opaque, unsigned int
>  
>  	bad_pfn = page_to_pfn(bad_page);
>  
> +	hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +
> +	if (hwpoison_page == NULL) {
> +		r = -ENOMEM;
> +		goto out_free_0;
> +	}
> +
> +	hwpoison_pfn = page_to_pfn(hwpoison_page);
> +
>  	if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
>  		r = -ENOMEM;
>  		goto out_free_0;
> @@ -2269,6 +2292,8 @@ out_free_1:
>  out_free_0a:
>  	free_cpumask_var(cpus_hardware_enabled);
>  out_free_0:
> +	if (hwpoison_page)
> +		__free_page(hwpoison_page);
>  	__free_page(bad_page);
>  out:
>  	kvm_arch_exit();
> @@ -2291,6 +2316,7 @@ void kvm_exit(void)
>  	kvm_arch_hardware_unsetup();
>  	kvm_arch_exit();
>  	free_cpumask_var(cpus_hardware_enabled);
> +	__free_page(hwpoison_page);
>  	__free_page(bad_page);
>  }
>  EXPORT_SYMBOL_GPL(kvm_exit);
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -45,6 +45,7 @@
>  #include <linux/page-isolation.h>
>  #include <linux/suspend.h>
>  #include <linux/slab.h>
> +#include <linux/swapops.h>
>  #include "internal.h"
>  
>  int sysctl_memory_failure_early_kill __read_mostly = 0;
> @@ -1296,3 +1297,30 @@ done:
>  	/* keep elevated page count for bad page */
>  	return ret;
>  }
> +
> +int is_hwpoison_address(unsigned long addr)
> +{
> +	pgd_t *pgdp;
> +	pud_t *pudp;
> +	pmd_t *pmdp;
> +	pte_t pte, *ptep;
> +	swp_entry_t entry;
> +
> +	pgdp = pgd_offset(current->mm, addr);
> +	if (!pgd_present(*pgdp))
> +		return 0;
> +	pudp = pud_offset(pgdp, addr);
> +	if (!pud_present(*pudp))
> +		return 0;
> +	pmdp = pmd_offset(pudp, addr);
> +	if (!pmd_present(*pmdp))
> +		return 0;

Need to bail out if pmd is huge.

> +	ptep = pte_offset_map(pmdp, addr);
> +	pte = *ptep;
> +	pte_unmap(ptep);
> +	if (!is_swap_pte(pte))
> +		return 0;
> +	entry = pte_to_swp_entry(pte);
> +	return is_hwpoison_entry(entry);
> +}
> +EXPORT_SYMBOL_GPL(is_hwpoison_address);

  reply	other threads:[~2010-05-13 21:47 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-05-12  6:44 [PATCH -v2] KVM, Fix QEMU-KVM is killed by guest SRAO MCE Huang Ying
2010-05-13 21:43 ` Marcelo Tosatti [this message]
2010-05-14  0:58   ` Huang Ying

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100513214307.GA25003@amt.cnet \
    --to=mtosatti@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=avi@redhat.com \
    --cc=fengguang.wu@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=masbock@linux.vnet.ibm.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.