* [Qemu-devel] [PATCH 01/10] Set cpuid definition to 0 before initializing it
2010-10-19 10:40 [Qemu-devel] [PATCH 00/10] [PULL] qemu-kvm.git uq/master queue Marcelo Tosatti
@ 2010-10-19 10:40 ` Marcelo Tosatti
2010-10-19 10:40 ` [Qemu-devel] [PATCH 02/10] Add svm cpuid features Marcelo Tosatti
` (9 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2010-10-19 10:40 UTC (permalink / raw)
To: Anthony Liguori; +Cc: Joerg Roedel, qemu-devel, kvm, Avi Kivity
From: Joerg Roedel <joerg.roedel@amd.com>
This patch cleans the (stack-allocated) cpuid definition to
0 before actually initializing it.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
target-i386/cpuid.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
index 04ba8d5..3fcf78f 100644
--- a/target-i386/cpuid.c
+++ b/target-i386/cpuid.c
@@ -788,6 +788,8 @@ int cpu_x86_register (CPUX86State *env, const char *cpu_model)
{
x86_def_t def1, *def = &def1;
+ memset(def, 0, sizeof(*def));
+
if (cpu_x86_find_by_name(def, cpu_model) < 0)
return -1;
if (def->vendor1) {
--
1.7.2.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH 02/10] Add svm cpuid features
2010-10-19 10:40 [Qemu-devel] [PATCH 00/10] [PULL] qemu-kvm.git uq/master queue Marcelo Tosatti
2010-10-19 10:40 ` [Qemu-devel] [PATCH 01/10] Set cpuid definition to 0 before initializing it Marcelo Tosatti
@ 2010-10-19 10:40 ` Marcelo Tosatti
2010-10-19 10:40 ` [Qemu-devel] [PATCH 03/10] signalfd compatibility Marcelo Tosatti
` (8 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2010-10-19 10:40 UTC (permalink / raw)
To: Anthony Liguori; +Cc: Joerg Roedel, qemu-devel, kvm, Avi Kivity
From: Joerg Roedel <joerg.roedel@amd.com>
This patch adds the svm cpuid feature flags to the qemu
intialization path. It also adds the svm features available
on phenom to its cpu-definition and extends the host cpu
type to support all svm features KVM can provide.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
target-i386/cpu.h | 12 ++++++++
target-i386/cpuid.c | 77 +++++++++++++++++++++++++++++++++++++++-----------
target-i386/kvm.c | 3 ++
3 files changed, 75 insertions(+), 17 deletions(-)
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 1144d4e..77eeab1 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -405,6 +405,17 @@
#define CPUID_EXT3_IBS (1 << 10)
#define CPUID_EXT3_SKINIT (1 << 12)
+#define CPUID_SVM_NPT (1 << 0)
+#define CPUID_SVM_LBRV (1 << 1)
+#define CPUID_SVM_SVMLOCK (1 << 2)
+#define CPUID_SVM_NRIPSAVE (1 << 3)
+#define CPUID_SVM_TSCSCALE (1 << 4)
+#define CPUID_SVM_VMCBCLEAN (1 << 5)
+#define CPUID_SVM_FLUSHASID (1 << 6)
+#define CPUID_SVM_DECODEASSIST (1 << 7)
+#define CPUID_SVM_PAUSEFILTER (1 << 10)
+#define CPUID_SVM_PFTHRESHOLD (1 << 12)
+
#define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
#define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */
#define CPUID_VENDOR_INTEL_3 0x6c65746e /* "ntel" */
@@ -702,6 +713,7 @@ typedef struct CPUX86State {
uint8_t has_error_code;
uint32_t sipi_vector;
uint32_t cpuid_kvm_features;
+ uint32_t cpuid_svm_features;
/* in order to simplify APIC support, we leave this pointer to the
user */
diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
index 3fcf78f..0e0bf60 100644
--- a/target-i386/cpuid.c
+++ b/target-i386/cpuid.c
@@ -79,6 +79,17 @@ static const char *kvm_feature_name[] = {
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};
+static const char *svm_feature_name[] = {
+ "npt", "lbrv", "svm_lock", "nrip_save",
+ "tsc_scale", "vmcb_clean", "flushbyasid", "decodeassists",
+ NULL, NULL, "pause_filter", NULL,
+ "pfthreshold", NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+};
+
/* collects per-function cpuid data
*/
typedef struct model_features_t {
@@ -192,13 +203,15 @@ static void add_flagname_to_bitmaps(const char *flagname, uint32_t *features,
uint32_t *ext_features,
uint32_t *ext2_features,
uint32_t *ext3_features,
- uint32_t *kvm_features)
+ uint32_t *kvm_features,
+ uint32_t *svm_features)
{
if (!lookup_feature(features, flagname, NULL, feature_name) &&
!lookup_feature(ext_features, flagname, NULL, ext_feature_name) &&
!lookup_feature(ext2_features, flagname, NULL, ext2_feature_name) &&
!lookup_feature(ext3_features, flagname, NULL, ext3_feature_name) &&
- !lookup_feature(kvm_features, flagname, NULL, kvm_feature_name))
+ !lookup_feature(kvm_features, flagname, NULL, kvm_feature_name) &&
+ !lookup_feature(svm_features, flagname, NULL, svm_feature_name))
fprintf(stderr, "CPU feature %s not found\n", flagname);
}
@@ -210,7 +223,8 @@ typedef struct x86_def_t {
int family;
int model;
int stepping;
- uint32_t features, ext_features, ext2_features, ext3_features, kvm_features;
+ uint32_t features, ext_features, ext2_features, ext3_features;
+ uint32_t kvm_features, svm_features;
uint32_t xlevel;
char model_id[48];
int vendor_override;
@@ -253,6 +267,7 @@ typedef struct x86_def_t {
CPUID_EXT2_PDPE1GB */
#define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \
CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
+#define TCG_SVM_FEATURES 0
/* maintains list of cpu model definitions
*/
@@ -305,6 +320,7 @@ static x86_def_t builtin_x86_defs[] = {
CPUID_EXT3_OSVW, CPUID_EXT3_IBS */
.ext3_features = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM |
CPUID_EXT3_ABM | CPUID_EXT3_SSE4A,
+ .svm_features = CPUID_SVM_NPT | CPUID_SVM_LBRV,
.xlevel = 0x8000001A,
.model_id = "AMD Phenom(tm) 9550 Quad-Core Processor"
},
@@ -505,6 +521,15 @@ static int cpu_x86_fill_host(x86_def_t *x86_cpu_def)
cpu_x86_fill_model_id(x86_cpu_def->model_id);
x86_cpu_def->vendor_override = 0;
+
+ /*
+ * Every SVM feature requires emulation support in KVM - so we can't just
+ * read the host features here. KVM might even support SVM features not
+ * available on the host hardware. Just set all bits and mask out the
+ * unsupported ones later.
+ */
+ x86_cpu_def->svm_features = -1;
+
return 0;
}
@@ -560,8 +585,14 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
char *s = strdup(cpu_model);
char *featurestr, *name = strtok(s, ",");
- uint32_t plus_features = 0, plus_ext_features = 0, plus_ext2_features = 0, plus_ext3_features = 0, plus_kvm_features = 0;
- uint32_t minus_features = 0, minus_ext_features = 0, minus_ext2_features = 0, minus_ext3_features = 0, minus_kvm_features = 0;
+ /* Features to be added*/
+ uint32_t plus_features = 0, plus_ext_features = 0;
+ uint32_t plus_ext2_features = 0, plus_ext3_features = 0;
+ uint32_t plus_kvm_features = 0, plus_svm_features = 0;
+ /* Features to be removed */
+ uint32_t minus_features = 0, minus_ext_features = 0;
+ uint32_t minus_ext2_features = 0, minus_ext3_features = 0;
+ uint32_t minus_kvm_features = 0, minus_svm_features = 0;
uint32_t numvalue;
for (def = x86_defs; def; def = def->next)
@@ -579,16 +610,22 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
add_flagname_to_bitmaps("hypervisor", &plus_features,
&plus_ext_features, &plus_ext2_features, &plus_ext3_features,
- &plus_kvm_features);
+ &plus_kvm_features, &plus_svm_features);
featurestr = strtok(NULL, ",");
while (featurestr) {
char *val;
if (featurestr[0] == '+') {
- add_flagname_to_bitmaps(featurestr + 1, &plus_features, &plus_ext_features, &plus_ext2_features, &plus_ext3_features, &plus_kvm_features);
+ add_flagname_to_bitmaps(featurestr + 1, &plus_features,
+ &plus_ext_features, &plus_ext2_features,
+ &plus_ext3_features, &plus_kvm_features,
+ &plus_svm_features);
} else if (featurestr[0] == '-') {
- add_flagname_to_bitmaps(featurestr + 1, &minus_features, &minus_ext_features, &minus_ext2_features, &minus_ext3_features, &minus_kvm_features);
+ add_flagname_to_bitmaps(featurestr + 1, &minus_features,
+ &minus_ext_features, &minus_ext2_features,
+ &minus_ext3_features, &minus_kvm_features,
+ &minus_svm_features);
} else if ((val = strchr(featurestr, '='))) {
*val = 0; val++;
if (!strcmp(featurestr, "family")) {
@@ -670,11 +707,13 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
x86_cpu_def->ext2_features |= plus_ext2_features;
x86_cpu_def->ext3_features |= plus_ext3_features;
x86_cpu_def->kvm_features |= plus_kvm_features;
+ x86_cpu_def->svm_features |= plus_svm_features;
x86_cpu_def->features &= ~minus_features;
x86_cpu_def->ext_features &= ~minus_ext_features;
x86_cpu_def->ext2_features &= ~minus_ext2_features;
x86_cpu_def->ext3_features &= ~minus_ext3_features;
x86_cpu_def->kvm_features &= ~minus_kvm_features;
+ x86_cpu_def->svm_features &= ~minus_svm_features;
if (check_cpuid) {
if (check_features_against_host(x86_cpu_def) && enforce_cpuid)
goto error;
@@ -816,6 +855,7 @@ int cpu_x86_register (CPUX86State *env, const char *cpu_model)
env->cpuid_ext3_features = def->ext3_features;
env->cpuid_xlevel = def->xlevel;
env->cpuid_kvm_features = def->kvm_features;
+ env->cpuid_svm_features = def->svm_features;
if (!kvm_enabled()) {
env->cpuid_features &= TCG_FEATURES;
env->cpuid_ext_features &= TCG_EXT_FEATURES;
@@ -825,6 +865,7 @@ int cpu_x86_register (CPUX86State *env, const char *cpu_model)
#endif
);
env->cpuid_ext3_features &= TCG_EXT3_FEATURES;
+ env->cpuid_svm_features &= TCG_SVM_FEATURES;
}
{
const char *model_id = def->model_id;
@@ -1135,11 +1176,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*ecx |= 1 << 1; /* CmpLegacy bit */
}
}
-
- if (kvm_enabled()) {
- /* Nested SVM not yet supported in upstream QEMU */
- *ecx &= ~CPUID_EXT3_SVM;
- }
break;
case 0x80000002:
case 0x80000003:
@@ -1184,10 +1220,17 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
}
break;
case 0x8000000A:
- *eax = 0x00000001; /* SVM Revision */
- *ebx = 0x00000010; /* nr of ASIDs */
- *ecx = 0;
- *edx = 0; /* optional features */
+ if (env->cpuid_ext3_features & CPUID_EXT3_SVM) {
+ *eax = 0x00000001; /* SVM Revision */
+ *ebx = 0x00000010; /* nr of ASIDs */
+ *ecx = 0;
+ *edx = env->cpuid_svm_features; /* optional features */
+ } else {
+ *eax = 0;
+ *ebx = 0;
+ *ecx = 0;
+ *edx = 0;
+ }
break;
default:
/* reserved values: zero */
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index a33d2fa..74e7b4f 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -192,6 +192,9 @@ int kvm_arch_init_vcpu(CPUState *env)
0, R_EDX);
env->cpuid_ext3_features &= kvm_arch_get_supported_cpuid(env, 0x80000001,
0, R_ECX);
+ env->cpuid_svm_features &= kvm_arch_get_supported_cpuid(env, 0x8000000A,
+ 0, R_EDX);
+
cpuid_i = 0;
--
1.7.2.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH 03/10] signalfd compatibility
2010-10-19 10:40 [Qemu-devel] [PATCH 00/10] [PULL] qemu-kvm.git uq/master queue Marcelo Tosatti
2010-10-19 10:40 ` [Qemu-devel] [PATCH 01/10] Set cpuid definition to 0 before initializing it Marcelo Tosatti
2010-10-19 10:40 ` [Qemu-devel] [PATCH 02/10] Add svm cpuid features Marcelo Tosatti
@ 2010-10-19 10:40 ` Marcelo Tosatti
2010-10-19 10:40 ` [Qemu-devel] [PATCH 04/10] iothread: use signalfd Marcelo Tosatti
` (7 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2010-10-19 10:40 UTC (permalink / raw)
To: Anthony Liguori; +Cc: Marcelo Tosatti, qemu-devel, kvm, Avi Kivity
Port qemu-kvm's signalfd compat code.
commit 5a7fdd0abd7cd24dac205317a4195446ab8748b5
Author: Anthony Liguori <aliguori@us.ibm.com>
Date: Wed May 7 11:55:47 2008 -0500
Use signalfd() in io-thread
This patch reworks the IO thread to use signalfd() instead of sigtimedwait()
This will eliminate the need to use SIGIO everywhere.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
Makefile.objs | 1 +
compatfd.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compatfd.h | 43 +++++++++++++++++++++
configure | 18 +++++++++
4 files changed, 179 insertions(+), 0 deletions(-)
create mode 100644 compatfd.c
create mode 100644 compatfd.h
diff --git a/Makefile.objs b/Makefile.objs
index 816194a..d73002d 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -125,6 +125,7 @@ common-obj-y += $(addprefix ui/, $(ui-obj-y))
common-obj-y += iov.o acl.o
common-obj-$(CONFIG_THREAD) += qemu-thread.o
+common-obj-$(CONFIG_IOTHREAD) += compatfd.o
common-obj-y += notify.o event_notifier.o
common-obj-y += qemu-timer.o
diff --git a/compatfd.c b/compatfd.c
new file mode 100644
index 0000000..a7cebc4
--- /dev/null
+++ b/compatfd.c
@@ -0,0 +1,117 @@
+/*
+ * signalfd/eventfd compatibility
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "compatfd.h"
+
+#include <sys/syscall.h>
+#include <pthread.h>
+
+struct sigfd_compat_info
+{
+ sigset_t mask;
+ int fd;
+};
+
+static void *sigwait_compat(void *opaque)
+{
+ struct sigfd_compat_info *info = opaque;
+ int err;
+ sigset_t all;
+
+ sigfillset(&all);
+ sigprocmask(SIG_BLOCK, &all, NULL);
+
+ do {
+ siginfo_t siginfo;
+
+ err = sigwaitinfo(&info->mask, &siginfo);
+ if (err == -1 && errno == EINTR) {
+ err = 0;
+ continue;
+ }
+
+ if (err > 0) {
+ char buffer[128];
+ size_t offset = 0;
+
+ memcpy(buffer, &err, sizeof(err));
+ while (offset < sizeof(buffer)) {
+ ssize_t len;
+
+ len = write(info->fd, buffer + offset,
+ sizeof(buffer) - offset);
+ if (len == -1 && errno == EINTR)
+ continue;
+
+ if (len <= 0) {
+ err = -1;
+ break;
+ }
+
+ offset += len;
+ }
+ }
+ } while (err >= 0);
+
+ return NULL;
+}
+
+static int qemu_signalfd_compat(const sigset_t *mask)
+{
+ pthread_attr_t attr;
+ pthread_t tid;
+ struct sigfd_compat_info *info;
+ int fds[2];
+
+ info = malloc(sizeof(*info));
+ if (info == NULL) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ if (pipe(fds) == -1) {
+ free(info);
+ return -1;
+ }
+
+ qemu_set_cloexec(fds[0]);
+ qemu_set_cloexec(fds[1]);
+
+ memcpy(&info->mask, mask, sizeof(*mask));
+ info->fd = fds[1];
+
+ pthread_attr_init(&attr);
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+ pthread_create(&tid, &attr, sigwait_compat, info);
+
+ pthread_attr_destroy(&attr);
+
+ return fds[0];
+}
+
+int qemu_signalfd(const sigset_t *mask)
+{
+#if defined(CONFIG_SIGNALFD)
+ int ret;
+
+ ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
+ if (ret != -1) {
+ qemu_set_cloexec(ret);
+ return ret;
+ }
+#endif
+
+ return qemu_signalfd_compat(mask);
+}
diff --git a/compatfd.h b/compatfd.h
new file mode 100644
index 0000000..fc37915
--- /dev/null
+++ b/compatfd.h
@@ -0,0 +1,43 @@
+/*
+ * signalfd/eventfd compatibility
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_COMPATFD_H
+#define QEMU_COMPATFD_H
+
+#include <signal.h>
+
+struct qemu_signalfd_siginfo {
+ uint32_t ssi_signo; /* Signal number */
+ int32_t ssi_errno; /* Error number (unused) */
+ int32_t ssi_code; /* Signal code */
+ uint32_t ssi_pid; /* PID of sender */
+ uint32_t ssi_uid; /* Real UID of sender */
+ int32_t ssi_fd; /* File descriptor (SIGIO) */
+ uint32_t ssi_tid; /* Kernel timer ID (POSIX timers) */
+ uint32_t ssi_band; /* Band event (SIGIO) */
+ uint32_t ssi_overrun; /* POSIX timer overrun count */
+ uint32_t ssi_trapno; /* Trap number that caused signal */
+ int32_t ssi_status; /* Exit status or signal (SIGCHLD) */
+ int32_t ssi_int; /* Integer sent by sigqueue(2) */
+ uint64_t ssi_ptr; /* Pointer sent by sigqueue(2) */
+ uint64_t ssi_utime; /* User CPU time consumed (SIGCHLD) */
+ uint64_t ssi_stime; /* System CPU time consumed (SIGCHLD) */
+ uint64_t ssi_addr; /* Address that generated signal
+ (for hardware-generated signals) */
+ uint8_t pad[48]; /* Pad size to 128 bytes (allow for
+ additional fields in the future) */
+};
+
+int qemu_signalfd(const sigset_t *mask);
+
+#endif
diff --git a/configure b/configure
index a079a49..ae8188e 100755
--- a/configure
+++ b/configure
@@ -1957,6 +1957,21 @@ if compile_prog "" "" ; then
splice=yes
fi
+##########################################
+# signalfd probe
+signalfd="no"
+cat > $TMPC << EOF
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <signal.h>
+int main(void) { return syscall(SYS_signalfd, -1, NULL, _NSIG / 8); }
+EOF
+
+if compile_prog "" "" ; then
+ signalfd=yes
+fi
+
# check if eventfd is supported
eventfd=no
cat > $TMPC << EOF
@@ -2559,6 +2574,9 @@ fi
if test "$fdt" = "yes" ; then
echo "CONFIG_FDT=y" >> $config_host_mak
fi
+if test "$signalfd" = "yes" ; then
+ echo "CONFIG_SIGNALFD=y" >> $config_host_mak
+fi
if test "$need_offsetof" = "yes" ; then
echo "CONFIG_NEED_OFFSETOF=y" >> $config_host_mak
fi
--
1.7.2.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH 04/10] iothread: use signalfd
2010-10-19 10:40 [Qemu-devel] [PATCH 00/10] [PULL] qemu-kvm.git uq/master queue Marcelo Tosatti
` (2 preceding siblings ...)
2010-10-19 10:40 ` [Qemu-devel] [PATCH 03/10] signalfd compatibility Marcelo Tosatti
@ 2010-10-19 10:40 ` Marcelo Tosatti
2010-10-19 10:40 ` [Qemu-devel] [PATCH 05/10] Expose thread_id in info cpus Marcelo Tosatti
` (6 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2010-10-19 10:40 UTC (permalink / raw)
To: Anthony Liguori; +Cc: Marcelo Tosatti, qemu-devel, kvm, Avi Kivity
Block SIGALRM, SIGIO and consume them via signalfd.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
cpus.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 69 insertions(+), 5 deletions(-)
diff --git a/cpus.c b/cpus.c
index b09f5e3..3875657 100644
--- a/cpus.c
+++ b/cpus.c
@@ -33,6 +33,7 @@
#include "exec-all.h"
#include "cpus.h"
+#include "compatfd.h"
#ifdef SIGRTMIN
#define SIG_IPI (SIGRTMIN+4)
@@ -329,14 +330,75 @@ static QemuCond qemu_work_cond;
static void tcg_init_ipi(void);
static void kvm_init_ipi(CPUState *env);
-static void unblock_io_signals(void);
+static sigset_t block_io_signals(void);
+
+/* If we have signalfd, we mask out the signals we want to handle and then
+ * use signalfd to listen for them. We rely on whatever the current signal
+ * handler is to dispatch the signals when we receive them.
+ */
+static void sigfd_handler(void *opaque)
+{
+ int fd = (unsigned long) opaque;
+ struct qemu_signalfd_siginfo info;
+ struct sigaction action;
+ ssize_t len;
+
+ while (1) {
+ do {
+ len = read(fd, &info, sizeof(info));
+ } while (len == -1 && errno == EINTR);
+
+ if (len == -1 && errno == EAGAIN) {
+ break;
+ }
+
+ if (len != sizeof(info)) {
+ printf("read from sigfd returned %zd: %m\n", len);
+ return;
+ }
+
+ sigaction(info.ssi_signo, NULL, &action);
+ if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
+ action.sa_sigaction(info.ssi_signo,
+ (siginfo_t *)&info, NULL);
+ } else if (action.sa_handler) {
+ action.sa_handler(info.ssi_signo);
+ }
+ }
+}
+
+static int qemu_signalfd_init(sigset_t mask)
+{
+ int sigfd;
+
+ sigfd = qemu_signalfd(&mask);
+ if (sigfd == -1) {
+ fprintf(stderr, "failed to create signalfd\n");
+ return -errno;
+ }
+
+ fcntl_setfl(sigfd, O_NONBLOCK);
+
+ qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
+ (void *)(unsigned long) sigfd);
+
+ return 0;
+}
int qemu_init_main_loop(void)
{
int ret;
+ sigset_t blocked_signals;
cpu_set_debug_excp_handler(cpu_debug_handler);
+ blocked_signals = block_io_signals();
+
+ ret = qemu_signalfd_init(blocked_signals);
+ if (ret)
+ return ret;
+
+ /* Note eventfd must be drained before signalfd handlers run */
ret = qemu_event_init();
if (ret)
return ret;
@@ -347,7 +409,6 @@ int qemu_init_main_loop(void)
qemu_mutex_init(&qemu_global_mutex);
qemu_mutex_lock(&qemu_global_mutex);
- unblock_io_signals();
qemu_thread_self(&io_thread);
return 0;
@@ -586,19 +647,22 @@ static void kvm_init_ipi(CPUState *env)
}
}
-static void unblock_io_signals(void)
+static sigset_t block_io_signals(void)
{
sigset_t set;
+ /* SIGUSR2 used by posix-aio-compat.c */
sigemptyset(&set);
sigaddset(&set, SIGUSR2);
- sigaddset(&set, SIGIO);
- sigaddset(&set, SIGALRM);
pthread_sigmask(SIG_UNBLOCK, &set, NULL);
sigemptyset(&set);
+ sigaddset(&set, SIGIO);
+ sigaddset(&set, SIGALRM);
sigaddset(&set, SIG_IPI);
pthread_sigmask(SIG_BLOCK, &set, NULL);
+
+ return set;
}
void qemu_mutex_lock_iothread(void)
--
1.7.2.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH 05/10] Expose thread_id in info cpus
2010-10-19 10:40 [Qemu-devel] [PATCH 00/10] [PULL] qemu-kvm.git uq/master queue Marcelo Tosatti
` (3 preceding siblings ...)
2010-10-19 10:40 ` [Qemu-devel] [PATCH 04/10] iothread: use signalfd Marcelo Tosatti
@ 2010-10-19 10:40 ` Marcelo Tosatti
2010-10-19 10:40 ` [Qemu-devel] [PATCH 06/10] kvm: x86: add mce support Marcelo Tosatti
` (5 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2010-10-19 10:40 UTC (permalink / raw)
To: Anthony Liguori; +Cc: Marcelo Tosatti, qemu-devel, kvm, Avi Kivity
commit ce6325ff1af34dbaee91c8d28e792277e43f1227
Author: Glauber Costa <gcosta@redhat.com>
Date: Wed Mar 5 17:01:10 2008 -0300
Augment info cpus
This patch exposes the thread id associated with each
cpu through the already well known 'info cpus' interface.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
cpu-defs.h | 1 +
cpus.c | 5 +++++
exec.c | 1 +
monitor.c | 4 ++++
osdep.c | 15 +++++++++++++++
osdep.h | 1 +
6 files changed, 27 insertions(+), 0 deletions(-)
diff --git a/cpu-defs.h b/cpu-defs.h
index 8d4bf86..eaed43e 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -197,6 +197,7 @@ typedef struct CPUWatchpoint {
int nr_cores; /* number of cores within this CPU package */ \
int nr_threads;/* number of threads within this CPU */ \
int running; /* Nonzero if cpu is currently running(usermode). */ \
+ int thread_id; \
/* user data */ \
void *opaque; \
\
diff --git a/cpus.c b/cpus.c
index 3875657..429993a 100644
--- a/cpus.c
+++ b/cpus.c
@@ -539,6 +539,7 @@ static void *kvm_cpu_thread_fn(void *arg)
qemu_mutex_lock(&qemu_global_mutex);
qemu_thread_self(env->thread);
+ env->thread_id = get_thread_id();
if (kvm_enabled())
kvm_init_vcpu(env);
@@ -578,6 +579,10 @@ static void *tcg_cpu_thread_fn(void *arg)
while (!qemu_system_ready)
qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100);
+ for (env = first_cpu; env != NULL; env = env->next_cpu) {
+ env->thread_id = get_thread_id();
+ }
+
while (1) {
cpu_exec_all();
qemu_tcg_wait_io_event();
diff --git a/exec.c b/exec.c
index 1fbe91c..c09051d 100644
--- a/exec.c
+++ b/exec.c
@@ -637,6 +637,7 @@ void cpu_exec_init(CPUState *env)
env->numa_node = 0;
QTAILQ_INIT(&env->breakpoints);
QTAILQ_INIT(&env->watchpoints);
+ env->thread_id = get_thread_id();
*penv = env;
#if defined(CONFIG_USER_ONLY)
cpu_list_unlock();
diff --git a/monitor.c b/monitor.c
index 260cc02..709d0fd 100644
--- a/monitor.c
+++ b/monitor.c
@@ -849,6 +849,9 @@ static void print_cpu_iter(QObject *obj, void *opaque)
monitor_printf(mon, " (halted)");
}
+ monitor_printf(mon, " thread_id=%" PRId64 " ",
+ qdict_get_int(cpu, "thread_id"));
+
monitor_printf(mon, "\n");
}
@@ -893,6 +896,7 @@ static void do_info_cpus(Monitor *mon, QObject **ret_data)
#elif defined(TARGET_MIPS)
qdict_put(cpu, "PC", qint_from_int(env->active_tc.PC));
#endif
+ qdict_put(cpu, "thread_id", qint_from_int(env->thread_id));
qlist_append(cpu_list, cpu);
}
diff --git a/osdep.c b/osdep.c
index 2e05b21..dda0f43 100644
--- a/osdep.c
+++ b/osdep.c
@@ -44,6 +44,10 @@
extern int madvise(caddr_t, size_t, int);
#endif
+#ifdef CONFIG_LINUX
+#include <sys/syscall.h>
+#endif
+
#ifdef CONFIG_EVENTFD
#include <sys/eventfd.h>
#endif
@@ -200,6 +204,17 @@ int qemu_create_pidfile(const char *filename)
return 0;
}
+int get_thread_id(void)
+{
+#if defined (_WIN32)
+ return GetCurrentThreadId();
+#elif defined (__linux__)
+ return syscall(SYS_gettid);
+#else
+ return getpid();
+#endif
+}
+
#ifdef _WIN32
/* mingw32 needs ffs for compilations without optimization. */
diff --git a/osdep.h b/osdep.h
index 6716281..9b3bc2e 100644
--- a/osdep.h
+++ b/osdep.h
@@ -126,6 +126,7 @@ void qemu_vfree(void *ptr);
int qemu_madvise(void *addr, size_t len, int advice);
int qemu_create_pidfile(const char *filename);
+int get_thread_id(void);
#ifdef _WIN32
int ffs(int i);
--
1.7.2.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH 06/10] kvm: x86: add mce support
2010-10-19 10:40 [Qemu-devel] [PATCH 00/10] [PULL] qemu-kvm.git uq/master queue Marcelo Tosatti
` (4 preceding siblings ...)
2010-10-19 10:40 ` [Qemu-devel] [PATCH 05/10] Expose thread_id in info cpus Marcelo Tosatti
@ 2010-10-19 10:40 ` Marcelo Tosatti
2010-10-19 10:40 ` [Qemu-devel] [PATCH 07/10] Export qemu_ram_addr_from_host Marcelo Tosatti
` (4 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2010-10-19 10:40 UTC (permalink / raw)
To: Anthony Liguori; +Cc: Marcelo Tosatti, qemu-devel, kvm, Avi Kivity
Port qemu-kvm's MCE support
commit c68b2374c9048812f488e00ffb95db66c0bc07a7
Author: Huang Ying <ying.huang@intel.com>
Date: Mon Jul 20 10:00:53 2009 +0800
Add MCE simulation support to qemu/kvm
KVM ioctls are used to initialize MCE simulation and inject MCE. The
real MCE simulation is implemented in Linux kernel. The Kernel part
has been merged.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
target-i386/helper.c | 6 +++
target-i386/kvm.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++
target-i386/kvm_x86.h | 21 ++++++++++++
3 files changed, 111 insertions(+), 0 deletions(-)
create mode 100644 target-i386/kvm_x86.h
diff --git a/target-i386/helper.c b/target-i386/helper.c
index e134340..4b430dd 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -27,6 +27,7 @@
#include "exec-all.h"
#include "qemu-common.h"
#include "kvm.h"
+#include "kvm_x86.h"
//#define DEBUG_MMU
@@ -1030,6 +1031,11 @@ void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
if (bank >= bank_num || !(status & MCI_STATUS_VAL))
return;
+ if (kvm_enabled()) {
+ kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc);
+ return;
+ }
+
/*
* if MSR_MCG_CTL is not all 1s, the uncorrected error
* reporting is disabled
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 74e7b4f..343fb02 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -27,6 +27,7 @@
#include "hw/pc.h"
#include "hw/apic.h"
#include "ioport.h"
+#include "kvm_x86.h"
#ifdef CONFIG_KVM_PARA
#include <linux/kvm_para.h>
@@ -167,6 +168,67 @@ static int get_para_features(CPUState *env)
}
#endif
+#ifdef KVM_CAP_MCE
+static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap,
+ int *max_banks)
+{
+ int r;
+
+ r = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_MCE);
+ if (r > 0) {
+ *max_banks = r;
+ return kvm_ioctl(s, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap);
+ }
+ return -ENOSYS;
+}
+
+static int kvm_setup_mce(CPUState *env, uint64_t *mcg_cap)
+{
+ return kvm_vcpu_ioctl(env, KVM_X86_SETUP_MCE, mcg_cap);
+}
+
+static int kvm_set_mce(CPUState *env, struct kvm_x86_mce *m)
+{
+ return kvm_vcpu_ioctl(env, KVM_X86_SET_MCE, m);
+}
+
+struct kvm_x86_mce_data
+{
+ CPUState *env;
+ struct kvm_x86_mce *mce;
+};
+
+static void kvm_do_inject_x86_mce(void *_data)
+{
+ struct kvm_x86_mce_data *data = _data;
+ int r;
+
+ r = kvm_set_mce(data->env, data->mce);
+ if (r < 0)
+ perror("kvm_set_mce FAILED");
+}
+#endif
+
+void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
+ uint64_t mcg_status, uint64_t addr, uint64_t misc)
+{
+#ifdef KVM_CAP_MCE
+ struct kvm_x86_mce mce = {
+ .bank = bank,
+ .status = status,
+ .mcg_status = mcg_status,
+ .addr = addr,
+ .misc = misc,
+ };
+ struct kvm_x86_mce_data data = {
+ .env = cenv,
+ .mce = &mce,
+ };
+
+ run_on_cpu(cenv, kvm_do_inject_x86_mce, &data);
+#endif
+}
+
int kvm_arch_init_vcpu(CPUState *env)
{
struct {
@@ -277,6 +339,28 @@ int kvm_arch_init_vcpu(CPUState *env)
cpuid_data.cpuid.nent = cpuid_i;
+#ifdef KVM_CAP_MCE
+ if (((env->cpuid_version >> 8)&0xF) >= 6
+ && (env->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)
+ && kvm_check_extension(env->kvm_state, KVM_CAP_MCE) > 0) {
+ uint64_t mcg_cap;
+ int banks;
+
+ if (kvm_get_mce_cap_supported(env->kvm_state, &mcg_cap, &banks))
+ perror("kvm_get_mce_cap_supported FAILED");
+ else {
+ if (banks > MCE_BANKS_DEF)
+ banks = MCE_BANKS_DEF;
+ mcg_cap &= MCE_CAP_DEF;
+ mcg_cap |= banks;
+ if (kvm_setup_mce(env, &mcg_cap))
+ perror("kvm_setup_mce FAILED");
+ else
+ env->mcg_cap = mcg_cap;
+ }
+ }
+#endif
+
return kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data);
}
diff --git a/target-i386/kvm_x86.h b/target-i386/kvm_x86.h
new file mode 100644
index 0000000..c1ebd24
--- /dev/null
+++ b/target-i386/kvm_x86.h
@@ -0,0 +1,21 @@
+/*
+ * QEMU KVM support
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef __KVM_X86_H__
+#define __KVM_X86_H__
+
+void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
+ uint64_t mcg_status, uint64_t addr, uint64_t misc);
+
+#endif
--
1.7.2.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH 07/10] Export qemu_ram_addr_from_host
2010-10-19 10:40 [Qemu-devel] [PATCH 00/10] [PULL] qemu-kvm.git uq/master queue Marcelo Tosatti
` (5 preceding siblings ...)
2010-10-19 10:40 ` [Qemu-devel] [PATCH 06/10] kvm: x86: add mce support Marcelo Tosatti
@ 2010-10-19 10:40 ` Marcelo Tosatti
2010-10-19 10:40 ` [Qemu-devel] [PATCH 08/10] Add RAM -> physical addr mapping in MCE simulation Marcelo Tosatti
` (3 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2010-10-19 10:40 UTC (permalink / raw)
To: Anthony Liguori; +Cc: Marcelo Tosatti, qemu-devel, kvm, Avi Kivity
To be used by next patches.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
cpu-common.h | 3 ++-
exec-all.h | 2 +-
exec.c | 26 +++++++++++++++++---------
3 files changed, 20 insertions(+), 11 deletions(-)
diff --git a/cpu-common.h b/cpu-common.h
index 0426bc8..a543b5d 100644
--- a/cpu-common.h
+++ b/cpu-common.h
@@ -47,7 +47,8 @@ void qemu_ram_free(ram_addr_t addr);
/* This should only be used for ram local to a device. */
void *qemu_get_ram_ptr(ram_addr_t addr);
/* This should not be used by devices. */
-ram_addr_t qemu_ram_addr_from_host(void *ptr);
+int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
+ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
CPUWriteMemoryFunc * const *mem_write,
diff --git a/exec-all.h b/exec-all.h
index 3a53fe6..c457058 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -334,7 +334,7 @@ static inline tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong add
}
p = (void *)(unsigned long)addr
+ env1->tlb_table[mmu_idx][page_index].addend;
- return qemu_ram_addr_from_host(p);
+ return qemu_ram_addr_from_host_nofail(p);
}
#endif
diff --git a/exec.c b/exec.c
index c09051d..9991203 100644
--- a/exec.c
+++ b/exec.c
@@ -2086,7 +2086,7 @@ static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
+ tlb_entry->addend);
- ram_addr = qemu_ram_addr_from_host(p);
+ ram_addr = qemu_ram_addr_from_host_nofail(p);
if (!cpu_physical_memory_is_dirty(ram_addr)) {
tlb_entry->addr_write |= TLB_NOTDIRTY;
}
@@ -2939,23 +2939,31 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
return NULL;
}
-/* Some of the softmmu routines need to translate from a host pointer
- (typically a TLB entry) back to a ram offset. */
-ram_addr_t qemu_ram_addr_from_host(void *ptr)
+int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
{
RAMBlock *block;
uint8_t *host = ptr;
QLIST_FOREACH(block, &ram_list.blocks, next) {
if (host - block->host < block->length) {
- return block->offset + (host - block->host);
+ *ram_addr = block->offset + (host - block->host);
+ return 0;
}
}
+ return -1;
+}
- fprintf(stderr, "Bad ram pointer %p\n", ptr);
- abort();
+/* Some of the softmmu routines need to translate from a host pointer
+ (typically a TLB entry) back to a ram offset. */
+ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
+{
+ ram_addr_t ram_addr;
- return 0;
+ if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
+ fprintf(stderr, "Bad ram pointer %p\n", ptr);
+ abort();
+ }
+ return ram_addr;
}
static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
@@ -3704,7 +3712,7 @@ void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
{
if (buffer != bounce.buffer) {
if (is_write) {
- ram_addr_t addr1 = qemu_ram_addr_from_host(buffer);
+ ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
while (access_len) {
unsigned l;
l = TARGET_PAGE_SIZE;
--
1.7.2.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH 08/10] Add RAM -> physical addr mapping in MCE simulation
2010-10-19 10:40 [Qemu-devel] [PATCH 00/10] [PULL] qemu-kvm.git uq/master queue Marcelo Tosatti
` (6 preceding siblings ...)
2010-10-19 10:40 ` [Qemu-devel] [PATCH 07/10] Export qemu_ram_addr_from_host Marcelo Tosatti
@ 2010-10-19 10:40 ` Marcelo Tosatti
2010-10-19 10:40 ` [Qemu-devel] [PATCH 09/10] MCE: Relay UCR MCE to guest Marcelo Tosatti
` (2 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2010-10-19 10:40 UTC (permalink / raw)
To: Anthony Liguori; +Cc: Avi Kivity, Marcelo Tosatti, qemu-devel, kvm, Huang Ying
From: Huang Ying <ying.huang@intel.com>
In QEMU-KVM, physical address != RAM address. While MCE simulation
needs physical address instead of RAM address. So
kvm_physical_memory_addr_from_ram() is implemented to do the
conversion, and it is invoked before being filled in the IA32_MCi_ADDR
MSR.
Reported-by: Dean Nelson <dnelson@redhat.com>
Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
kvm-all.c | 18 ++++++++++++++++++
kvm.h | 3 +++
2 files changed, 21 insertions(+), 0 deletions(-)
diff --git a/kvm-all.c b/kvm-all.c
index 1cc696f..37b99c7 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -137,6 +137,24 @@ static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
return found;
}
+int kvm_physical_memory_addr_from_ram(KVMState *s, ram_addr_t ram_addr,
+ target_phys_addr_t *phys_addr)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
+ KVMSlot *mem = &s->slots[i];
+
+ if (ram_addr >= mem->phys_offset &&
+ ram_addr < mem->phys_offset + mem->memory_size) {
+ *phys_addr = mem->start_addr + (ram_addr - mem->phys_offset);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
{
struct kvm_userspace_memory_region mem;
diff --git a/kvm.h b/kvm.h
index 50b6c01..8f5a754 100644
--- a/kvm.h
+++ b/kvm.h
@@ -174,6 +174,9 @@ static inline void cpu_synchronize_post_init(CPUState *env)
}
}
+int kvm_physical_memory_addr_from_ram(KVMState *s, ram_addr_t ram_addr,
+ target_phys_addr_t *phys_addr);
+
#endif
int kvm_set_ioeventfd_mmio_long(int fd, uint32_t adr, uint32_t val, bool assign);
--
1.7.2.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH 09/10] MCE: Relay UCR MCE to guest
2010-10-19 10:40 [Qemu-devel] [PATCH 00/10] [PULL] qemu-kvm.git uq/master queue Marcelo Tosatti
` (7 preceding siblings ...)
2010-10-19 10:40 ` [Qemu-devel] [PATCH 08/10] Add RAM -> physical addr mapping in MCE simulation Marcelo Tosatti
@ 2010-10-19 10:40 ` Marcelo Tosatti
2010-10-19 14:35 ` [Qemu-devel] [PATCH 09/10] MCE: Relay UCR MCE to guest (v2) Marcelo Tosatti
2010-10-19 10:40 ` [Qemu-devel] [PATCH 10/10] Add savevm/loadvm support for MCE Marcelo Tosatti
2010-10-19 13:40 ` [Qemu-devel] [PATCH 00/10] [PULL] qemu-kvm.git uq/master queue Anthony Liguori
10 siblings, 1 reply; 15+ messages in thread
From: Marcelo Tosatti @ 2010-10-19 10:40 UTC (permalink / raw)
To: Anthony Liguori; +Cc: Marcelo Tosatti, qemu-devel, kvm, Avi Kivity
Port qemu-kvm's
commit 4b62fff1101a7ad77553147717a8bd3bf79df7ef
Author: Huang Ying <ying.huang@intel.com>
Date: Mon Sep 21 10:43:25 2009 +0800
MCE: Relay UCR MCE to guest
UCR (uncorrected recovery) MCE is supported in recent Intel CPUs,
where some hardware error such as some memory error can be reported
without PCC (processor context corrupted). To recover from such MCE,
the corresponding memory will be unmapped, and all processes accessing
the memory will be killed via SIGBUS.
For KVM, if QEMU/KVM is killed, all guest processes will be killed
too. So we relay SIGBUS from host OS to guest system via a UCR MCE
injection. Then guest OS can isolate corresponding memory and kill
necessary guest processes only. SIGBUS sent to main thread (not VCPU
threads) will be broadcast to all VCPU threads as UCR MCE.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
cpus.c | 82 ++++++++++++++++++++--
kvm-stub.c | 5 ++
kvm.h | 3 +
target-i386/cpu.h | 20 +++++-
target-i386/helper.c | 2 +-
target-i386/kvm.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++-
target-i386/kvm_x86.h | 3 +-
7 files changed, 279 insertions(+), 14 deletions(-)
diff --git a/cpus.c b/cpus.c
index 429993a..62de0bc 100644
--- a/cpus.c
+++ b/cpus.c
@@ -34,6 +34,10 @@
#include "cpus.h"
#include "compatfd.h"
+#ifdef CONFIG_LINUX
+#include <sys/prctl.h>
+#include <sys/signalfd.h>
+#endif
#ifdef SIGRTMIN
#define SIG_IPI (SIGRTMIN+4)
@@ -41,6 +45,10 @@
#define SIG_IPI SIGUSR1
#endif
+#ifndef PR_MCE_KILL
+#define PR_MCE_KILL 33
+#endif
+
static CPUState *next_cpu;
/***********************************************************/
@@ -498,28 +506,77 @@ static void qemu_tcg_wait_io_event(void)
}
}
+static void sigbus_reraise(void)
+{
+ sigset_t set;
+ struct sigaction action;
+
+ memset(&action, 0, sizeof(action));
+ action.sa_handler = SIG_DFL;
+ if (!sigaction(SIGBUS, &action, NULL)) {
+ raise(SIGBUS);
+ sigemptyset(&set);
+ sigaddset(&set, SIGBUS);
+ sigprocmask(SIG_UNBLOCK, &set, NULL);
+ }
+ perror("Failed to re-raise SIGBUS!\n");
+ abort();
+}
+
+static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
+ void *ctx)
+{
+#if defined(TARGET_I386)
+ if (kvm_on_sigbus(siginfo->ssi_code, (void *)(intptr_t)siginfo->ssi_addr))
+#endif
+ sigbus_reraise();
+}
+
static void qemu_kvm_eat_signal(CPUState *env, int timeout)
{
struct timespec ts;
int r, e;
siginfo_t siginfo;
sigset_t waitset;
+ sigset_t chkset;
ts.tv_sec = timeout / 1000;
ts.tv_nsec = (timeout % 1000) * 1000000;
sigemptyset(&waitset);
sigaddset(&waitset, SIG_IPI);
+ sigaddset(&waitset, SIGBUS);
- qemu_mutex_unlock(&qemu_global_mutex);
- r = sigtimedwait(&waitset, &siginfo, &ts);
- e = errno;
- qemu_mutex_lock(&qemu_global_mutex);
+ do {
+ qemu_mutex_unlock(&qemu_global_mutex);
- if (r == -1 && !(e == EAGAIN || e == EINTR)) {
- fprintf(stderr, "sigtimedwait: %s\n", strerror(e));
- exit(1);
- }
+ r = sigtimedwait(&waitset, &siginfo, &ts);
+ e = errno;
+
+ qemu_mutex_lock(&qemu_global_mutex);
+
+ if (r == -1 && !(e == EAGAIN || e == EINTR)) {
+ fprintf(stderr, "sigtimedwait: %s\n", strerror(e));
+ exit(1);
+ }
+
+ switch (r) {
+ case SIGBUS:
+#ifdef TARGET_I386
+ if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr))
+#endif
+ sigbus_reraise();
+ break;
+ default:
+ break;
+ }
+
+ r = sigpending(&chkset);
+ if (r == -1) {
+ fprintf(stderr, "sigpending: %s\n", strerror(e));
+ exit(1);
+ }
+ } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
}
static void qemu_kvm_wait_io_event(CPUState *env)
@@ -645,6 +702,7 @@ static void kvm_init_ipi(CPUState *env)
pthread_sigmask(SIG_BLOCK, NULL, &set);
sigdelset(&set, SIG_IPI);
+ sigdelset(&set, SIGBUS);
r = kvm_set_signal_mask(env, &set);
if (r) {
fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(r));
@@ -655,6 +713,7 @@ static void kvm_init_ipi(CPUState *env)
static sigset_t block_io_signals(void)
{
sigset_t set;
+ struct sigaction action;
/* SIGUSR2 used by posix-aio-compat.c */
sigemptyset(&set);
@@ -665,8 +724,15 @@ static sigset_t block_io_signals(void)
sigaddset(&set, SIGIO);
sigaddset(&set, SIGALRM);
sigaddset(&set, SIG_IPI);
+ sigaddset(&set, SIGBUS);
pthread_sigmask(SIG_BLOCK, &set, NULL);
+ memset(&action, 0, sizeof(action));
+ action.sa_flags = SA_SIGINFO;
+ action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
+ sigaction(SIGBUS, &action, NULL);
+ prctl(PR_MCE_KILL, 1, 1, 0, 0);
+
return set;
}
diff --git a/kvm-stub.c b/kvm-stub.c
index d45f9fa..5384a4b 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -141,3 +141,8 @@ int kvm_set_ioeventfd_mmio_long(int fd, uint32_t adr, uint32_t val, bool assign)
{
return -ENOSYS;
}
+
+int kvm_on_sigbus(int code, void *addr)
+{
+ return 1;
+}
diff --git a/kvm.h b/kvm.h
index 8f5a754..eb6fcec 100644
--- a/kvm.h
+++ b/kvm.h
@@ -110,6 +110,9 @@ int kvm_arch_init_vcpu(CPUState *env);
void kvm_arch_reset_vcpu(CPUState *env);
+int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr);
+int kvm_on_sigbus(int code, void *addr);
+
struct kvm_guest_debug;
struct kvm_debug_exit_arch;
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 77eeab1..85ed30f 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -250,16 +250,32 @@
#define PG_ERROR_RSVD_MASK 0x08
#define PG_ERROR_I_D_MASK 0x10
-#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */
+#define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */
+#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
-#define MCE_CAP_DEF MCG_CTL_P
+#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P)
#define MCE_BANKS_DEF 10
+#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
+#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
+#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
+#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
+#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
+#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
+#define MCI_STATUS_AR (1ULL<<55) /* Action required */
+
+/* MISC register defines */
+#define MCM_ADDR_SEGOFF 0 /* segment offset */
+#define MCM_ADDR_LINEAR 1 /* linear address */
+#define MCM_ADDR_PHYS 2 /* physical address */
+#define MCM_ADDR_MEM 3 /* memory address */
+#define MCM_ADDR_GENERIC 7 /* generic */
#define MSR_IA32_TSC 0x10
#define MSR_IA32_APICBASE 0x1b
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 4b430dd..4fff4a8 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1032,7 +1032,7 @@ void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
return;
if (kvm_enabled()) {
- kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc);
+ kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, 0);
return;
}
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 343fb02..84bd400 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -46,6 +46,13 @@
#define MSR_KVM_WALL_CLOCK 0x11
#define MSR_KVM_SYSTEM_TIME 0x12
+#ifndef BUS_MCEERR_AR
+#define BUS_MCEERR_AR 4
+#endif
+#ifndef BUS_MCEERR_AO
+#define BUS_MCEERR_AO 5
+#endif
+
#ifdef KVM_CAP_EXT_CPUID
static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
@@ -192,10 +199,39 @@ static int kvm_set_mce(CPUState *env, struct kvm_x86_mce *m)
return kvm_vcpu_ioctl(env, KVM_X86_SET_MCE, m);
}
+static int kvm_get_msr(CPUState *env, struct kvm_msr_entry *msrs, int n)
+{
+ struct kvm_msrs *kmsrs = qemu_malloc(sizeof *kmsrs + n * sizeof *msrs);
+ int r;
+
+ kmsrs->nmsrs = n;
+ memcpy(kmsrs->entries, msrs, n * sizeof *msrs);
+ r = kvm_vcpu_ioctl(env, KVM_GET_MSRS, kmsrs);
+ memcpy(msrs, kmsrs->entries, n * sizeof *msrs);
+ free(kmsrs);
+ return r;
+}
+
+/* FIXME: kill this and kvm_get_msr, use env->mcg_status instead */
+static int kvm_mce_in_exception(CPUState *env)
+{
+ struct kvm_msr_entry msr_mcg_status = {
+ .index = MSR_MCG_STATUS,
+ };
+ int r;
+
+ r = kvm_get_msr(env, &msr_mcg_status, 1);
+ if (r == -1 || r == 0) {
+ return -1;
+ }
+ return !!(msr_mcg_status.data & MCG_STATUS_MCIP);
+}
+
struct kvm_x86_mce_data
{
CPUState *env;
struct kvm_x86_mce *mce;
+ int abort_on_error;
};
static void kvm_do_inject_x86_mce(void *_data)
@@ -203,14 +239,26 @@ static void kvm_do_inject_x86_mce(void *_data)
struct kvm_x86_mce_data *data = _data;
int r;
+ /* If there is an MCE excpetion being processed, ignore this SRAO MCE */
+ r = kvm_mce_in_exception(data->env);
+ if (r == -1)
+ fprintf(stderr, "Failed to get MCE status\n");
+ else if (r && !(data->mce->status & MCI_STATUS_AR))
+ return;
+
r = kvm_set_mce(data->env, data->mce);
- if (r < 0)
+ if (r < 0) {
perror("kvm_set_mce FAILED");
+ if (data->abort_on_error) {
+ abort();
+ }
+ }
}
#endif
void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
- uint64_t mcg_status, uint64_t addr, uint64_t misc)
+ uint64_t mcg_status, uint64_t addr, uint64_t misc,
+ int abort_on_error)
{
#ifdef KVM_CAP_MCE
struct kvm_x86_mce mce = {
@@ -225,7 +273,15 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
.mce = &mce,
};
+ if (!cenv->mcg_cap) {
+ fprintf(stderr, "MCE support is not enabled!\n");
+ return;
+ }
+
run_on_cpu(cenv, kvm_do_inject_x86_mce, &data);
+#else
+ if (abort_on_error)
+ abort();
#endif
}
@@ -1528,3 +1584,121 @@ bool kvm_arch_stop_on_emulation_error(CPUState *env)
((env->segs[R_CS].selector & 3) != 3);
}
+static void hardware_memory_error(void)
+{
+ fprintf(stderr, "Hardware memory error!\n");
+ exit(1);
+}
+
+int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
+{
+#if defined(KVM_CAP_MCE)
+ struct kvm_x86_mce mce = {
+ .bank = 9,
+ };
+ void *vaddr;
+ ram_addr_t ram_addr;
+ unsigned long paddr;
+ int r;
+
+ if ((env->mcg_cap & MCG_SER_P) && addr
+ && (code == BUS_MCEERR_AR
+ || code == BUS_MCEERR_AO)) {
+ if (code == BUS_MCEERR_AR) {
+ /* Fake an Intel architectural Data Load SRAR UCR */
+ mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
+ | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
+ | MCI_STATUS_AR | 0x134;
+ mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
+ mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
+ } else {
+ /*
+ * If there is an MCE excpetion being processed, ignore
+ * this SRAO MCE
+ */
+ r = kvm_mce_in_exception(env);
+ if (r == -1) {
+ fprintf(stderr, "Failed to get MCE status\n");
+ } else if (r) {
+ return 0;
+ }
+ /* Fake an Intel architectural Memory scrubbing UCR */
+ mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
+ | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
+ | 0xc0;
+ mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
+ mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV;
+ }
+ vaddr = (void *)addr;
+ if (qemu_ram_addr_from_host(vaddr, &ram_addr) ||
+ !kvm_physical_memory_addr_from_ram(env->kvm_state, ram_addr, &paddr)) {
+ fprintf(stderr, "Hardware memory error for memory used by "
+ "QEMU itself instead of guest system!\n");
+ /* Hope we are lucky for AO MCE */
+ if (code == BUS_MCEERR_AO) {
+ return 0;
+ } else {
+ hardware_memory_error();
+ }
+ }
+ mce.addr = paddr;
+ r = kvm_set_mce(env, &mce);
+ if (r < 0) {
+ fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno));
+ abort();
+ }
+ } else
+#endif
+ {
+ if (code == BUS_MCEERR_AO) {
+ return 0;
+ } else if (code == BUS_MCEERR_AR) {
+ hardware_memory_error();
+ } else {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int kvm_on_sigbus(int code, void *addr)
+{
+#if defined(KVM_CAP_MCE)
+ if ((first_cpu->mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) {
+ uint64_t status;
+ void *vaddr;
+ ram_addr_t ram_addr;
+ unsigned long paddr;
+ CPUState *cenv;
+
+ /* Hope we are lucky for AO MCE */
+ vaddr = addr;
+ if (qemu_ram_addr_from_host(vaddr, &ram_addr) ||
+ !kvm_physical_memory_addr_from_ram(first_cpu->kvm_state, ram_addr, &paddr)) {
+ fprintf(stderr, "Hardware memory error for memory used by "
+ "QEMU itself instead of guest system!: %p\n", addr);
+ return 0;
+ }
+ status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
+ | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
+ | 0xc0;
+ kvm_inject_x86_mce(first_cpu, 9, status,
+ MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr,
+ (MCM_ADDR_PHYS << 6) | 0xc, 1);
+ for (cenv = first_cpu->next_cpu; cenv != NULL; cenv = cenv->next_cpu) {
+ kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC,
+ MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1);
+ }
+ } else
+#endif
+ {
+ if (code == BUS_MCEERR_AO) {
+ return 0;
+ } else if (code == BUS_MCEERR_AR) {
+ hardware_memory_error();
+ } else {
+ return 1;
+ }
+ }
+ return 0;
+}
diff --git a/target-i386/kvm_x86.h b/target-i386/kvm_x86.h
index c1ebd24..04932cf 100644
--- a/target-i386/kvm_x86.h
+++ b/target-i386/kvm_x86.h
@@ -16,6 +16,7 @@
#define __KVM_X86_H__
void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
- uint64_t mcg_status, uint64_t addr, uint64_t misc);
+ uint64_t mcg_status, uint64_t addr, uint64_t misc,
+ int abort_on_error);
#endif
--
1.7.2.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH 09/10] MCE: Relay UCR MCE to guest (v2)
2010-10-19 10:40 ` [Qemu-devel] [PATCH 09/10] MCE: Relay UCR MCE to guest Marcelo Tosatti
@ 2010-10-19 14:35 ` Marcelo Tosatti
0 siblings, 0 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2010-10-19 14:35 UTC (permalink / raw)
To: Anthony Liguori; +Cc: qemu-devel, kvm, Avi Kivity
Port qemu-kvm's
commit 4b62fff1101a7ad77553147717a8bd3bf79df7ef
Author: Huang Ying <ying.huang@intel.com>
Date: Mon Sep 21 10:43:25 2009 +0800
MCE: Relay UCR MCE to guest
UCR (uncorrected recovery) MCE is supported in recent Intel CPUs,
where some hardware error such as some memory error can be reported
without PCC (processor context corrupted). To recover from such MCE,
the corresponding memory will be unmapped, and all processes accessing
the memory will be killed via SIGBUS.
For KVM, if QEMU/KVM is killed, all guest processes will be killed
too. So we relay SIGBUS from host OS to guest system via a UCR MCE
injection. Then guest OS can isolate corresponding memory and kill
necessary guest processes only. SIGBUS sent to main thread (not VCPU
threads) will be broadcast to all VCPU threads as UCR MCE.
v2: use target_phys_addr_t type for paddr.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
cpus.c | 82 ++++++++++++++++++++--
kvm-stub.c | 5 ++
kvm.h | 3 +
target-i386/cpu.h | 20 +++++-
target-i386/helper.c | 2 +-
target-i386/kvm.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++-
target-i386/kvm_x86.h | 3 +-
7 files changed, 279 insertions(+), 14 deletions(-)
diff --git a/cpus.c b/cpus.c
index 429993a..62de0bc 100644
--- a/cpus.c
+++ b/cpus.c
@@ -34,6 +34,10 @@
#include "cpus.h"
#include "compatfd.h"
+#ifdef CONFIG_LINUX
+#include <sys/prctl.h>
+#include <sys/signalfd.h>
+#endif
#ifdef SIGRTMIN
#define SIG_IPI (SIGRTMIN+4)
@@ -41,6 +45,10 @@
#define SIG_IPI SIGUSR1
#endif
+#ifndef PR_MCE_KILL
+#define PR_MCE_KILL 33
+#endif
+
static CPUState *next_cpu;
/***********************************************************/
@@ -498,28 +506,77 @@ static void qemu_tcg_wait_io_event(void)
}
}
+static void sigbus_reraise(void)
+{
+ sigset_t set;
+ struct sigaction action;
+
+ memset(&action, 0, sizeof(action));
+ action.sa_handler = SIG_DFL;
+ if (!sigaction(SIGBUS, &action, NULL)) {
+ raise(SIGBUS);
+ sigemptyset(&set);
+ sigaddset(&set, SIGBUS);
+ sigprocmask(SIG_UNBLOCK, &set, NULL);
+ }
+ perror("Failed to re-raise SIGBUS!\n");
+ abort();
+}
+
+static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
+ void *ctx)
+{
+#if defined(TARGET_I386)
+ if (kvm_on_sigbus(siginfo->ssi_code, (void *)(intptr_t)siginfo->ssi_addr))
+#endif
+ sigbus_reraise();
+}
+
static void qemu_kvm_eat_signal(CPUState *env, int timeout)
{
struct timespec ts;
int r, e;
siginfo_t siginfo;
sigset_t waitset;
+ sigset_t chkset;
ts.tv_sec = timeout / 1000;
ts.tv_nsec = (timeout % 1000) * 1000000;
sigemptyset(&waitset);
sigaddset(&waitset, SIG_IPI);
+ sigaddset(&waitset, SIGBUS);
- qemu_mutex_unlock(&qemu_global_mutex);
- r = sigtimedwait(&waitset, &siginfo, &ts);
- e = errno;
- qemu_mutex_lock(&qemu_global_mutex);
+ do {
+ qemu_mutex_unlock(&qemu_global_mutex);
- if (r == -1 && !(e == EAGAIN || e == EINTR)) {
- fprintf(stderr, "sigtimedwait: %s\n", strerror(e));
- exit(1);
- }
+ r = sigtimedwait(&waitset, &siginfo, &ts);
+ e = errno;
+
+ qemu_mutex_lock(&qemu_global_mutex);
+
+ if (r == -1 && !(e == EAGAIN || e == EINTR)) {
+ fprintf(stderr, "sigtimedwait: %s\n", strerror(e));
+ exit(1);
+ }
+
+ switch (r) {
+ case SIGBUS:
+#ifdef TARGET_I386
+ if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr))
+#endif
+ sigbus_reraise();
+ break;
+ default:
+ break;
+ }
+
+ r = sigpending(&chkset);
+ if (r == -1) {
+ fprintf(stderr, "sigpending: %s\n", strerror(e));
+ exit(1);
+ }
+ } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
}
static void qemu_kvm_wait_io_event(CPUState *env)
@@ -645,6 +702,7 @@ static void kvm_init_ipi(CPUState *env)
pthread_sigmask(SIG_BLOCK, NULL, &set);
sigdelset(&set, SIG_IPI);
+ sigdelset(&set, SIGBUS);
r = kvm_set_signal_mask(env, &set);
if (r) {
fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(r));
@@ -655,6 +713,7 @@ static void kvm_init_ipi(CPUState *env)
static sigset_t block_io_signals(void)
{
sigset_t set;
+ struct sigaction action;
/* SIGUSR2 used by posix-aio-compat.c */
sigemptyset(&set);
@@ -665,8 +724,15 @@ static sigset_t block_io_signals(void)
sigaddset(&set, SIGIO);
sigaddset(&set, SIGALRM);
sigaddset(&set, SIG_IPI);
+ sigaddset(&set, SIGBUS);
pthread_sigmask(SIG_BLOCK, &set, NULL);
+ memset(&action, 0, sizeof(action));
+ action.sa_flags = SA_SIGINFO;
+ action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
+ sigaction(SIGBUS, &action, NULL);
+ prctl(PR_MCE_KILL, 1, 1, 0, 0);
+
return set;
}
diff --git a/kvm-stub.c b/kvm-stub.c
index d45f9fa..5384a4b 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -141,3 +141,8 @@ int kvm_set_ioeventfd_mmio_long(int fd, uint32_t adr, uint32_t val, bool assign)
{
return -ENOSYS;
}
+
+int kvm_on_sigbus(int code, void *addr)
+{
+ return 1;
+}
diff --git a/kvm.h b/kvm.h
index 8f5a754..eb6fcec 100644
--- a/kvm.h
+++ b/kvm.h
@@ -110,6 +110,9 @@ int kvm_arch_init_vcpu(CPUState *env);
void kvm_arch_reset_vcpu(CPUState *env);
+int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr);
+int kvm_on_sigbus(int code, void *addr);
+
struct kvm_guest_debug;
struct kvm_debug_exit_arch;
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 77eeab1..85ed30f 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -250,16 +250,32 @@
#define PG_ERROR_RSVD_MASK 0x08
#define PG_ERROR_I_D_MASK 0x10
-#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */
+#define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */
+#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
-#define MCE_CAP_DEF MCG_CTL_P
+#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P)
#define MCE_BANKS_DEF 10
+#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
+#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
+#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
+#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
+#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
+#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
+#define MCI_STATUS_AR (1ULL<<55) /* Action required */
+
+/* MISC register defines */
+#define MCM_ADDR_SEGOFF 0 /* segment offset */
+#define MCM_ADDR_LINEAR 1 /* linear address */
+#define MCM_ADDR_PHYS 2 /* physical address */
+#define MCM_ADDR_MEM 3 /* memory address */
+#define MCM_ADDR_GENERIC 7 /* generic */
#define MSR_IA32_TSC 0x10
#define MSR_IA32_APICBASE 0x1b
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 4b430dd..4fff4a8 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1032,7 +1032,7 @@ void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
return;
if (kvm_enabled()) {
- kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc);
+ kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, 0);
return;
}
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 343fb02..84bd400 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -46,6 +46,13 @@
#define MSR_KVM_WALL_CLOCK 0x11
#define MSR_KVM_SYSTEM_TIME 0x12
+#ifndef BUS_MCEERR_AR
+#define BUS_MCEERR_AR 4
+#endif
+#ifndef BUS_MCEERR_AO
+#define BUS_MCEERR_AO 5
+#endif
+
#ifdef KVM_CAP_EXT_CPUID
static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
@@ -192,10 +199,39 @@ static int kvm_set_mce(CPUState *env, struct kvm_x86_mce *m)
return kvm_vcpu_ioctl(env, KVM_X86_SET_MCE, m);
}
+static int kvm_get_msr(CPUState *env, struct kvm_msr_entry *msrs, int n)
+{
+ struct kvm_msrs *kmsrs = qemu_malloc(sizeof *kmsrs + n * sizeof *msrs);
+ int r;
+
+ kmsrs->nmsrs = n;
+ memcpy(kmsrs->entries, msrs, n * sizeof *msrs);
+ r = kvm_vcpu_ioctl(env, KVM_GET_MSRS, kmsrs);
+ memcpy(msrs, kmsrs->entries, n * sizeof *msrs);
+ free(kmsrs);
+ return r;
+}
+
+/* FIXME: kill this and kvm_get_msr, use env->mcg_status instead */
+static int kvm_mce_in_exception(CPUState *env)
+{
+ struct kvm_msr_entry msr_mcg_status = {
+ .index = MSR_MCG_STATUS,
+ };
+ int r;
+
+ r = kvm_get_msr(env, &msr_mcg_status, 1);
+ if (r == -1 || r == 0) {
+ return -1;
+ }
+ return !!(msr_mcg_status.data & MCG_STATUS_MCIP);
+}
+
struct kvm_x86_mce_data
{
CPUState *env;
struct kvm_x86_mce *mce;
+ int abort_on_error;
};
static void kvm_do_inject_x86_mce(void *_data)
@@ -203,14 +239,26 @@ static void kvm_do_inject_x86_mce(void *_data)
struct kvm_x86_mce_data *data = _data;
int r;
+ /* If there is an MCE excpetion being processed, ignore this SRAO MCE */
+ r = kvm_mce_in_exception(data->env);
+ if (r == -1)
+ fprintf(stderr, "Failed to get MCE status\n");
+ else if (r && !(data->mce->status & MCI_STATUS_AR))
+ return;
+
r = kvm_set_mce(data->env, data->mce);
- if (r < 0)
+ if (r < 0) {
perror("kvm_set_mce FAILED");
+ if (data->abort_on_error) {
+ abort();
+ }
+ }
}
#endif
void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
- uint64_t mcg_status, uint64_t addr, uint64_t misc)
+ uint64_t mcg_status, uint64_t addr, uint64_t misc,
+ int abort_on_error)
{
#ifdef KVM_CAP_MCE
struct kvm_x86_mce mce = {
@@ -225,7 +273,15 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
.mce = &mce,
};
+ if (!cenv->mcg_cap) {
+ fprintf(stderr, "MCE support is not enabled!\n");
+ return;
+ }
+
run_on_cpu(cenv, kvm_do_inject_x86_mce, &data);
+#else
+ if (abort_on_error)
+ abort();
#endif
}
@@ -1528,3 +1584,121 @@ bool kvm_arch_stop_on_emulation_error(CPUState *env)
((env->segs[R_CS].selector & 3) != 3);
}
+static void hardware_memory_error(void)
+{
+ fprintf(stderr, "Hardware memory error!\n");
+ exit(1);
+}
+
+int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
+{
+#if defined(KVM_CAP_MCE)
+ struct kvm_x86_mce mce = {
+ .bank = 9,
+ };
+ void *vaddr;
+ ram_addr_t ram_addr;
+ target_phys_addr_t paddr;
+ int r;
+
+ if ((env->mcg_cap & MCG_SER_P) && addr
+ && (code == BUS_MCEERR_AR
+ || code == BUS_MCEERR_AO)) {
+ if (code == BUS_MCEERR_AR) {
+ /* Fake an Intel architectural Data Load SRAR UCR */
+ mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
+ | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
+ | MCI_STATUS_AR | 0x134;
+ mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
+ mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
+ } else {
+ /*
+ * If there is an MCE excpetion being processed, ignore
+ * this SRAO MCE
+ */
+ r = kvm_mce_in_exception(env);
+ if (r == -1) {
+ fprintf(stderr, "Failed to get MCE status\n");
+ } else if (r) {
+ return 0;
+ }
+ /* Fake an Intel architectural Memory scrubbing UCR */
+ mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
+ | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
+ | 0xc0;
+ mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
+ mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV;
+ }
+ vaddr = (void *)addr;
+ if (qemu_ram_addr_from_host(vaddr, &ram_addr) ||
+ !kvm_physical_memory_addr_from_ram(env->kvm_state, ram_addr, &paddr)) {
+ fprintf(stderr, "Hardware memory error for memory used by "
+ "QEMU itself instead of guest system!\n");
+ /* Hope we are lucky for AO MCE */
+ if (code == BUS_MCEERR_AO) {
+ return 0;
+ } else {
+ hardware_memory_error();
+ }
+ }
+ mce.addr = paddr;
+ r = kvm_set_mce(env, &mce);
+ if (r < 0) {
+ fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno));
+ abort();
+ }
+ } else
+#endif
+ {
+ if (code == BUS_MCEERR_AO) {
+ return 0;
+ } else if (code == BUS_MCEERR_AR) {
+ hardware_memory_error();
+ } else {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int kvm_on_sigbus(int code, void *addr)
+{
+#if defined(KVM_CAP_MCE)
+ if ((first_cpu->mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) {
+ uint64_t status;
+ void *vaddr;
+ ram_addr_t ram_addr;
+ target_phys_addr_t paddr;
+ CPUState *cenv;
+
+ /* Hope we are lucky for AO MCE */
+ vaddr = addr;
+ if (qemu_ram_addr_from_host(vaddr, &ram_addr) ||
+ !kvm_physical_memory_addr_from_ram(first_cpu->kvm_state, ram_addr, &paddr)) {
+ fprintf(stderr, "Hardware memory error for memory used by "
+ "QEMU itself instead of guest system!: %p\n", addr);
+ return 0;
+ }
+ status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
+ | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
+ | 0xc0;
+ kvm_inject_x86_mce(first_cpu, 9, status,
+ MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr,
+ (MCM_ADDR_PHYS << 6) | 0xc, 1);
+ for (cenv = first_cpu->next_cpu; cenv != NULL; cenv = cenv->next_cpu) {
+ kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC,
+ MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1);
+ }
+ } else
+#endif
+ {
+ if (code == BUS_MCEERR_AO) {
+ return 0;
+ } else if (code == BUS_MCEERR_AR) {
+ hardware_memory_error();
+ } else {
+ return 1;
+ }
+ }
+ return 0;
+}
diff --git a/target-i386/kvm_x86.h b/target-i386/kvm_x86.h
index c1ebd24..04932cf 100644
--- a/target-i386/kvm_x86.h
+++ b/target-i386/kvm_x86.h
@@ -16,6 +16,7 @@
#define __KVM_X86_H__
void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
- uint64_t mcg_status, uint64_t addr, uint64_t misc);
+ uint64_t mcg_status, uint64_t addr, uint64_t misc,
+ int abort_on_error);
#endif
--
1.7.2.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH 10/10] Add savevm/loadvm support for MCE
2010-10-19 10:40 [Qemu-devel] [PATCH 00/10] [PULL] qemu-kvm.git uq/master queue Marcelo Tosatti
` (8 preceding siblings ...)
2010-10-19 10:40 ` [Qemu-devel] [PATCH 09/10] MCE: Relay UCR MCE to guest Marcelo Tosatti
@ 2010-10-19 10:40 ` Marcelo Tosatti
2010-10-19 13:40 ` [Qemu-devel] [PATCH 00/10] [PULL] qemu-kvm.git uq/master queue Anthony Liguori
10 siblings, 0 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2010-10-19 10:40 UTC (permalink / raw)
To: Anthony Liguori; +Cc: Marcelo Tosatti, qemu-devel, kvm, Avi Kivity
Port qemu-kvm's
commit 1bab5d11545d8de5facf46c28630085a2f9651ae
Author: Huang Ying <ying.huang@intel.com>
Date: Wed Mar 3 16:52:46 2010 +0800
Add savevm/loadvm support for MCE
MCE registers are saved/load into/from CPUState in
kvm_arch_save/load_regs. To simulate the MCG_STATUS clearing upon
reset, MSR_MCG_STATUS is set to 0 for KVM_PUT_RESET_STATE.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
target-i386/kvm.c | 39 ++++++++++++++++++++++++++++++++++++++-
1 files changed, 38 insertions(+), 1 deletions(-)
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 84bd400..d940175 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -777,7 +777,7 @@ static int kvm_put_msrs(CPUState *env, int level)
struct kvm_msr_entry entries[100];
} msr_data;
struct kvm_msr_entry *msrs = msr_data.entries;
- int n = 0;
+ int i, n = 0;
kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
@@ -797,6 +797,18 @@ static int kvm_put_msrs(CPUState *env, int level)
env->system_time_msr);
kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
}
+#ifdef KVM_CAP_MCE
+ if (env->mcg_cap) {
+ if (level == KVM_PUT_RESET_STATE)
+ kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
+ else if (level == KVM_PUT_FULL_STATE) {
+ kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
+ kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl);
+ for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++)
+ kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]);
+ }
+ }
+#endif
msr_data.info.nmsrs = n;
@@ -1004,6 +1016,15 @@ static int kvm_get_msrs(CPUState *env)
msrs[n++].index = MSR_KVM_SYSTEM_TIME;
msrs[n++].index = MSR_KVM_WALL_CLOCK;
+#ifdef KVM_CAP_MCE
+ if (env->mcg_cap) {
+ msrs[n++].index = MSR_MCG_STATUS;
+ msrs[n++].index = MSR_MCG_CTL;
+ for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++)
+ msrs[n++].index = MSR_MC0_CTL + i;
+ }
+#endif
+
msr_data.info.nmsrs = n;
ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data);
if (ret < 0)
@@ -1046,6 +1067,22 @@ static int kvm_get_msrs(CPUState *env)
case MSR_KVM_WALL_CLOCK:
env->wall_clock_msr = msrs[i].data;
break;
+#ifdef KVM_CAP_MCE
+ case MSR_MCG_STATUS:
+ env->mcg_status = msrs[i].data;
+ break;
+ case MSR_MCG_CTL:
+ env->mcg_ctl = msrs[i].data;
+ break;
+#endif
+ default:
+#ifdef KVM_CAP_MCE
+ if (msrs[i].index >= MSR_MC0_CTL &&
+ msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
+ env->mce_banks[msrs[i].index - MSR_MC0_CTL] = msrs[i].data;
+ break;
+ }
+#endif
}
}
--
1.7.2.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [PATCH 00/10] [PULL] qemu-kvm.git uq/master queue
2010-10-19 10:40 [Qemu-devel] [PATCH 00/10] [PULL] qemu-kvm.git uq/master queue Marcelo Tosatti
` (9 preceding siblings ...)
2010-10-19 10:40 ` [Qemu-devel] [PATCH 10/10] Add savevm/loadvm support for MCE Marcelo Tosatti
@ 2010-10-19 13:40 ` Anthony Liguori
10 siblings, 0 replies; 15+ messages in thread
From: Anthony Liguori @ 2010-10-19 13:40 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: qemu-devel, kvm
On 10/19/2010 05:40 AM, Marcelo Tosatti wrote:
> The following changes since commit 38cc9b607f85017b095793cab6c129bc9844f441:
>
> issue snd_pcm_start() when capturing audio (2010-10-18 00:39:06 +0400)
>
> are available in the git repository at:
> git://git.kernel.org/pub/scm/virt/kvm/qemu-kvm.git uq/master
>
This breaks the build.
cc1: warnings being treated as errors
/home/anthony/git/qemu/target-i386/kvm.c: In function ‘kvm_on_sigbus_vcpu’:
/home/anthony/git/qemu/target-i386/kvm.c:1671: error: passing argument 3
of ‘kvm_physical_memory_addr_from_ram’ from incompatible pointer type
/home/anthony/git/qemu/kvm.h:180: note: expected ‘target_phys_addr_t *’
but argument is of type ‘long unsigned int *’
/home/anthony/git/qemu/target-i386/kvm.c: In function ‘kvm_on_sigbus’:
/home/anthony/git/qemu/target-i386/kvm.c:1714: error: passing argument 3
of ‘kvm_physical_memory_addr_from_ram’ from incompatible pointer type
/home/anthony/git/qemu/kvm.h:180: note: expected ‘target_phys_addr_t *’
but argument is of type ‘long unsigned int *’
make[1]: *** [kvm.o] Error 1
make: *** [subdir-i386-softmmu] Error 2
I've pushed my tree to http://repo.or.cz/w/qemu/aliguori.git
qemu-kvm-20101019 but the merge is a fast-forward so you should have no
trouble reproducing.
anthony@titi:~/build/qemu$ uname -a
Linux titi 2.6.32.11+drm33.2-x201 #1 SMP Sat May 22 09:58:34 PDT 2010
x86_64 GNU/Linux
anthony@titi:~/build/qemu$ gcc -v
Using built-in specs.
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu
4.4.3-4ubuntu5' --with-bugurl=file:///usr/share/doc/gcc-4.4/README.Bugs
--enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr
--enable-shared --enable-multiarch --enable-linker-build-id
--with-system-zlib --libexecdir=/usr/lib --without-included-gettext
--enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.4
--program-suffix=-4.4 --enable-nls --enable-clocale=gnu
--enable-libstdcxx-debug --enable-plugin --enable-objc-gc
--disable-werror --with-arch-32=i486 --with-tune=generic
--enable-checking=release --build=x86_64-linux-gnu
--host=x86_64-linux-gnu --target=x86_64-linux-gnu
Thread model: posix
gcc version 4.4.3 (Ubuntu 4.4.3-4ubuntu5)
Regards,
Anthony Liguori
> Huang Ying (1):
> Add RAM -> physical addr mapping in MCE simulation
>
> Joerg Roedel (2):
> Set cpuid definition to 0 before initializing it
> Add svm cpuid features
>
> Marcelo Tosatti (7):
> signalfd compatibility
> iothread: use signalfd
> Expose thread_id in info cpus
> kvm: x86: add mce support
> Export qemu_ram_addr_from_host
> MCE: Relay UCR MCE to guest
> Add savevm/loadvm support for MCE
>
> Makefile.objs | 1 +
> compatfd.c | 117 +++++++++++++++++++
> compatfd.h | 43 +++++++
> configure | 18 +++
> cpu-common.h | 3 +-
> cpu-defs.h | 1 +
> cpus.c | 161 ++++++++++++++++++++++++--
> exec-all.h | 2 +-
> exec.c | 27 +++--
> kvm-all.c | 18 +++
> kvm-stub.c | 5 +
> kvm.h | 6 +
> monitor.c | 4 +
> osdep.c | 15 +++
> osdep.h | 1 +
> target-i386/cpu.h | 32 +++++-
> target-i386/cpuid.c | 79 ++++++++++---
> target-i386/helper.c | 6 +
> target-i386/kvm.c | 300 ++++++++++++++++++++++++++++++++++++++++++++++++-
> target-i386/kvm_x86.h | 22 ++++
> 20 files changed, 817 insertions(+), 44 deletions(-)
> create mode 100644 compatfd.c
> create mode 100644 compatfd.h
> create mode 100644 target-i386/kvm_x86.h
>
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread