Re: [PATCH -v2] QEMU-KVM: MCE: Relay UCR MCE to guest

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Marcelo Tosatti <mtosatti@redhat.com>
To: Huang Ying <ying.huang@intel.com>
Cc: Avi Kivity <avi@redhat.com>, Andi Kleen <andi@firstfloor.org>,
	Anthony Liguori <aliguori@us.ibm.com>,
	"kvm@vger.kernel.org" <kvm@vger.kernel.org>
Subject: Re: [PATCH -v2] QEMU-KVM: MCE: Relay UCR MCE to guest
Date: Wed, 16 Sep 2009 14:59:32 -0300	[thread overview]
Message-ID: <20090916175931.GA7997@amt.cnet> (raw)
In-Reply-To: <1252463282.5212.44.camel@yhuang-dev.sh.intel.com>

On Wed, Sep 09, 2009 at 10:28:02AM +0800, Huang Ying wrote:
> UCR (uncorrected recovery) MCE is supported in recent Intel CPUs,
> where some hardware error such as some memory error can be reported
> without PCC (processor context corrupted). To recover from such MCE,
> the corresponding memory will be unmapped, and all processes accessing
> the memory will be killed via SIGBUS.
> 
> For KVM, if QEMU/KVM is killed, all guest processes will be killed
> too. So we relay SIGBUS from host OS to guest system via a UCR MCE
> injection. Then guest OS can isolate corresponding memory and kill
> necessary guest processes only. SIGBUS sent to main thread (not VCPU
> threads) will be broadcast to all VCPU threads as UCR MCE.
> 
> v2:
> 
> - Use qemu_ram_addr_from_host instead of self made one to covert from
>   host address to guest RAM address. Thanks Anthony Liguori.
> 
> Signed-off-by: Huang Ying <ying.huang@intel.com>
> 
> ---
>  cpu-common.h      |    1 
>  exec.c            |   20 +++++--
>  qemu-kvm.c        |  154 ++++++++++++++++++++++++++++++++++++++++++++++++++----
>  target-i386/cpu.h |   20 ++++++-
>  4 files changed, 178 insertions(+), 17 deletions(-)
> 
> --- a/qemu-kvm.c
> +++ b/qemu-kvm.c
> @@ -27,10 +27,23 @@
>  #include <sys/mman.h>
>  #include <sys/ioctl.h>
>  #include <signal.h>
> +#include <sys/signalfd.h>
> +#include <sys/prctl.h>
>  
>  #define false 0
>  #define true 1
>  
> +#ifndef PR_MCE_KILL
> +#define PR_MCE_KILL 33
> +#endif
> +
> +#ifndef BUS_MCEERR_AR
> +#define BUS_MCEERR_AR 4
> +#endif
> +#ifndef BUS_MCEERR_AO
> +#define BUS_MCEERR_AO 5
> +#endif
> +
>  #define EXPECTED_KVM_API_VERSION 12
>  
>  #if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
> @@ -1507,6 +1520,37 @@ static void sig_ipi_handler(int n)
>  {
>  }
>  
> +static void sigbus_handler(int n, struct signalfd_siginfo *siginfo, void *ctx)
> +{
> +    if (siginfo->ssi_code == BUS_MCEERR_AO) {
> +        uint64_t status;
> +        unsigned long paddr;
> +        CPUState *cenv;
> +
> +        /* Hope we are lucky for AO MCE */
> +        if (do_qemu_ram_addr_from_host((void *)siginfo->ssi_addr, &paddr)) {
> +            fprintf(stderr, "Hardware memory error for memory used by "
> +                    "QEMU itself instead of guest system!: %llx\n",
> +                    (unsigned long long)siginfo->ssi_addr);
> +            return;

qemu-kvm should die here?

> +        }
> +        status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
> +            | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
> +            | 0xc0;
> +        kvm_inject_x86_mce(first_cpu, 9, status,
> +                           MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr,
> +                           (MCM_ADDR_PHYS << 6) | 0xc);
> +        for (cenv = first_cpu->next_cpu; cenv != NULL; cenv = cenv->next_cpu)
> +            kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC,
> +                               MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0);
> +        return;

Should abort if kvm_inject_x86_mce fails?

> +    } else if (siginfo->ssi_code == BUS_MCEERR_AR)
> +        fprintf(stderr, "Hardware memory error!\n");
> +    else
> +        fprintf(stderr, "Internal error in QEMU!\n");

Can you re-raise SIGBUS so you we get a coredump on non-MCE SIGBUS as
usual?

> +    exit(1);
> +}
> +
>  static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
>  {
>      struct qemu_work_item wi;
> @@ -1649,29 +1693,102 @@ static void flush_queued_work(CPUState *
>      pthread_cond_broadcast(&qemu_work_cond);
>  }
>  
> +static void kvm_on_sigbus(CPUState *env, siginfo_t *siginfo)
> +{
> +#if defined(KVM_CAP_MCE) && defined(TARGET_I386)
> +    struct kvm_x86_mce mce = {
> +            .bank = 9,
> +    };
> +    unsigned long paddr;
> +    int r;
> +
> +    if (env->mcg_cap && siginfo->si_addr
> +        && (siginfo->si_code == BUS_MCEERR_AR
> +            || siginfo->si_code == BUS_MCEERR_AO)) {
> +        if (siginfo->si_code == BUS_MCEERR_AR) {
> +            /* Fake an Intel architectural Data Load SRAR UCR */
> +            mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
> +                | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
> +                | MCI_STATUS_AR | 0x134;
> +            mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
> +            mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
> +        } else {
> +            /* Fake an Intel architectural Memory scrubbing UCR */
> +            mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
> +                | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
> +                | 0xc0;
> +            mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
> +            mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV;
> +        }
> +        if (do_qemu_ram_addr_from_host((void *)siginfo->si_addr, &paddr)) {
> +            fprintf(stderr, "Hardware memory error for memory used by "
> +                    "QEMU itself instaed of guest system!\n");
> +            /* Hope we are lucky for AO MCE */
> +            if (siginfo->si_code == BUS_MCEERR_AO)
> +                return;

Should die?

> +            else
> +                exit(1);
> +        }
> +        mce.addr = paddr;
> +        r = kvm_set_mce(env->kvm_cpu_state.vcpu_ctx, &mce);
> +        if (r < 0) {
> +            fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno));
> +            exit(1);
> +        }
> +    } else
> +#endif
> +    {
> +        if (siginfo->si_code == BUS_MCEERR_AO)
> +            return;
> +        if (siginfo->si_code == BUS_MCEERR_AR)
> +            fprintf(stderr, "Hardware memory error!\n");
> +        else
> +            fprintf(stderr, "Internal error in QEMU!\n");
> +        exit(1);
> +    }
> +}

next prev parent reply	other threads:[~2009-09-16 18:00 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-09  2:28 [PATCH -v2] QEMU-KVM: MCE: Relay UCR MCE to guest Huang Ying
2009-09-09 12:06 ` Avi Kivity
2009-09-09 12:16   ` Avi Kivity
2009-09-09 12:18     ` Avi Kivity
2009-09-10  2:40   ` Huang Ying
2009-09-10  9:35     ` Andi Kleen
2009-09-14  2:55       ` Huang Ying
2009-09-14  5:10         ` Avi Kivity
2009-09-16  1:09           ` Huang Ying
2009-09-16  8:10             ` Avi Kivity
2009-09-14  5:10       ` Avi Kivity
2009-09-16 17:59 ` Marcelo Tosatti [this message]
2009-09-17  1:13   ` Huang Ying
2009-09-17 21:36     ` Marcelo Tosatti
2009-09-18  3:01       ` Huang Ying

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090916175931.GA7997@amt.cnet \
    --to=mtosatti@redhat.com \
    --cc=aliguori@us.ibm.com \
    --cc=andi@firstfloor.org \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.