kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Marcelo Tosatti <mtosatti@redhat.com>
To: Huang Ying <ying.huang@intel.com>
Cc: Avi Kivity <avi@redhat.com>, Andi Kleen <andi@firstfloor.org>,
	Anthony Liguori <aliguori@us.ibm.com>,
	"kvm@vger.kernel.org" <kvm@vger.kernel.org>
Subject: Re: [PATCH -v2] QEMU-KVM: MCE: Relay UCR MCE to guest
Date: Wed, 16 Sep 2009 14:59:32 -0300	[thread overview]
Message-ID: <20090916175931.GA7997@amt.cnet> (raw)
In-Reply-To: <1252463282.5212.44.camel@yhuang-dev.sh.intel.com>

On Wed, Sep 09, 2009 at 10:28:02AM +0800, Huang Ying wrote:
> UCR (uncorrected recovery) MCE is supported in recent Intel CPUs,
> where some hardware error such as some memory error can be reported
> without PCC (processor context corrupted). To recover from such MCE,
> the corresponding memory will be unmapped, and all processes accessing
> the memory will be killed via SIGBUS.
> 
> For KVM, if QEMU/KVM is killed, all guest processes will be killed
> too. So we relay SIGBUS from host OS to guest system via a UCR MCE
> injection. Then guest OS can isolate corresponding memory and kill
> necessary guest processes only. SIGBUS sent to main thread (not VCPU
> threads) will be broadcast to all VCPU threads as UCR MCE.
> 
> v2:
> 
> - Use qemu_ram_addr_from_host instead of self made one to covert from
>   host address to guest RAM address. Thanks Anthony Liguori.
> 
> Signed-off-by: Huang Ying <ying.huang@intel.com>
> 
> ---
>  cpu-common.h      |    1 
>  exec.c            |   20 +++++--
>  qemu-kvm.c        |  154 ++++++++++++++++++++++++++++++++++++++++++++++++++----
>  target-i386/cpu.h |   20 ++++++-
>  4 files changed, 178 insertions(+), 17 deletions(-)
> 
> --- a/qemu-kvm.c
> +++ b/qemu-kvm.c
> @@ -27,10 +27,23 @@
>  #include <sys/mman.h>
>  #include <sys/ioctl.h>
>  #include <signal.h>
> +#include <sys/signalfd.h>
> +#include <sys/prctl.h>
>  
>  #define false 0
>  #define true 1
>  
> +#ifndef PR_MCE_KILL
> +#define PR_MCE_KILL 33
> +#endif
> +
> +#ifndef BUS_MCEERR_AR
> +#define BUS_MCEERR_AR 4
> +#endif
> +#ifndef BUS_MCEERR_AO
> +#define BUS_MCEERR_AO 5
> +#endif
> +
>  #define EXPECTED_KVM_API_VERSION 12
>  
>  #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
> @@ -1507,6 +1520,37 @@ static void sig_ipi_handler(int n)
>  {
>  }
>  
> +static void sigbus_handler(int n, struct signalfd_siginfo *siginfo, void *ctx)
> +{
> +    if (siginfo->ssi_code == BUS_MCEERR_AO) {
> +        uint64_t status;
> +        unsigned long paddr;
> +        CPUState *cenv;
> +
> +        /* Hope we are lucky for AO MCE */
> +        if (do_qemu_ram_addr_from_host((void *)siginfo->ssi_addr, &paddr)) {
> +            fprintf(stderr, "Hardware memory error for memory used by "
> +                    "QEMU itself instead of guest system!: %llx\n",
> +                    (unsigned long long)siginfo->ssi_addr);
> +            return;

qemu-kvm should die here?

> +        }
> +        status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
> +            | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
> +            | 0xc0;
> +        kvm_inject_x86_mce(first_cpu, 9, status,
> +                           MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr,
> +                           (MCM_ADDR_PHYS << 6) | 0xc);
> +        for (cenv = first_cpu->next_cpu; cenv != NULL; cenv = cenv->next_cpu)
> +            kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC,
> +                               MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0);
> +        return;

Should abort if kvm_inject_x86_mce fails?

> +    } else if (siginfo->ssi_code == BUS_MCEERR_AR)
> +        fprintf(stderr, "Hardware memory error!\n");
> +    else
> +        fprintf(stderr, "Internal error in QEMU!\n");

Can you re-raise SIGBUS so you we get a coredump on non-MCE SIGBUS as
usual?

> +    exit(1);
> +}
> +
>  static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
>  {
>      struct qemu_work_item wi;
> @@ -1649,29 +1693,102 @@ static void flush_queued_work(CPUState *
>      pthread_cond_broadcast(&qemu_work_cond);
>  }
>  
> +static void kvm_on_sigbus(CPUState *env, siginfo_t *siginfo)
> +{
> +#if defined(KVM_CAP_MCE) && defined(TARGET_I386)
> +    struct kvm_x86_mce mce = {
> +            .bank = 9,
> +    };
> +    unsigned long paddr;
> +    int r;
> +
> +    if (env->mcg_cap && siginfo->si_addr
> +        && (siginfo->si_code == BUS_MCEERR_AR
> +            || siginfo->si_code == BUS_MCEERR_AO)) {
> +        if (siginfo->si_code == BUS_MCEERR_AR) {
> +            /* Fake an Intel architectural Data Load SRAR UCR */
> +            mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
> +                | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
> +                | MCI_STATUS_AR | 0x134;
> +            mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
> +            mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
> +        } else {
> +            /* Fake an Intel architectural Memory scrubbing UCR */
> +            mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
> +                | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
> +                | 0xc0;
> +            mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
> +            mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV;
> +        }
> +        if (do_qemu_ram_addr_from_host((void *)siginfo->si_addr, &paddr)) {
> +            fprintf(stderr, "Hardware memory error for memory used by "
> +                    "QEMU itself instaed of guest system!\n");
> +            /* Hope we are lucky for AO MCE */
> +            if (siginfo->si_code == BUS_MCEERR_AO)
> +                return;

Should die?

> +            else
> +                exit(1);
> +        }
> +        mce.addr = paddr;
> +        r = kvm_set_mce(env->kvm_cpu_state.vcpu_ctx, &mce);
> +        if (r < 0) {
> +            fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno));
> +            exit(1);
> +        }
> +    } else
> +#endif
> +    {
> +        if (siginfo->si_code == BUS_MCEERR_AO)
> +            return;
> +        if (siginfo->si_code == BUS_MCEERR_AR)
> +            fprintf(stderr, "Hardware memory error!\n");
> +        else
> +            fprintf(stderr, "Internal error in QEMU!\n");
> +        exit(1);
> +    }
> +}


  parent reply	other threads:[~2009-09-16 18:00 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-09  2:28 [PATCH -v2] QEMU-KVM: MCE: Relay UCR MCE to guest Huang Ying
2009-09-09 12:06 ` Avi Kivity
2009-09-09 12:16   ` Avi Kivity
2009-09-09 12:18     ` Avi Kivity
2009-09-10  2:40   ` Huang Ying
2009-09-10  9:35     ` Andi Kleen
2009-09-14  2:55       ` Huang Ying
2009-09-14  5:10         ` Avi Kivity
2009-09-16  1:09           ` Huang Ying
2009-09-16  8:10             ` Avi Kivity
2009-09-14  5:10       ` Avi Kivity
2009-09-16 17:59 ` Marcelo Tosatti [this message]
2009-09-17  1:13   ` Huang Ying
2009-09-17 21:36     ` Marcelo Tosatti
2009-09-18  3:01       ` Huang Ying

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090916175931.GA7997@amt.cnet \
    --to=mtosatti@redhat.com \
    --cc=aliguori@us.ibm.com \
    --cc=andi@firstfloor.org \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).