From mboxrd@z Thu Jan 1 00:00:00 1970 From: Marcelo Tosatti Subject: [patch 1/4] QEMU/KVM: self-disabling C2 emulation Date: Sat, 24 May 2008 20:43:43 -0300 Message-ID: <20080525000036.493605190@localhost.localdomain> References: <20080524234342.983197667@localhost.localdomain> Cc: kvm-devel To: Avi Kivity , Chris Wright Return-path: Received: from mx1.redhat.com ([66.187.233.31]:56114 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753749AbYEYADA (ORCPT ); Sat, 24 May 2008 20:03:00 -0400 Content-Disposition: inline; filename=acpi-c2-fake Sender: kvm-owner@vger.kernel.org List-ID: Inform C2 state support via ACPI's CST per-processor package's, but write an invalid latency value the first time the guest attempts to idle via P_LVL2 port. This way the TSC is considered unreliable, and we get away with the costs relative to APIC timer broadcasts on enter/exit necessary for C1+. It would be nice to fallback to plain hlt idle instead of C1, which does not use the pmtimer for idle measurement, but Linux guests with CONFIG_CPUIDLE enabled fallback to poll_idle instead which is very inefficient. Index: kvm-userspace.tip/bios/acpi-dsdt.dsl =================================================================== --- kvm-userspace.tip.orig/bios/acpi-dsdt.dsl +++ kvm-userspace.tip/bios/acpi-dsdt.dsl @@ -33,8 +33,20 @@ DefinitionBlock ( PRU, 8, PRD, 8, } + OperationRegion(PWNO, SystemIO, 0xb040, 0x02) + Field (PWNO, WordAcc, NoLock, WriteAsZeros) + { + PWC, 16, + } - Processor (CPU0, 0x00, 0x0000b010, 0x06) {Method (_STA) { Return(0xF)}} + Processor (CPU0, 0x00, 0x0000b010, 0x06) { + Method (_STA) { Return(0xF)} + Name(_CST, Package() { + 1, Package() { + ResourceTemplate() {Register(SystemIO, 8, 0, 0xb014)}, + 2, 2, 300}, + }) + } Processor (CPU1, 0x01, 0x0000b010, 0x06) { Name (TMP, Buffer(0x8) {0x0, 0x8, 0x01, 0x01, 0x1, 0x0, 0x0, 0x0}) Method(_MAT, 0) { @@ -44,6 +56,11 @@ DefinitionBlock ( Method (_STA) { Return(0xF) } + Name(_CST, Package() { + 1, Package() { + ResourceTemplate() {Register(SystemIO, 8, 0, 0xb014)}, + 2, 2, 300}, + }) } Processor (CPU2, 0x02, 0x0000b010, 0x06) { Name (TMP, Buffer(0x8) {0x0, 0x8, 0x02, 0x02, 0x1, 0x0, 0x0, 0x0}) @@ -54,6 +71,11 @@ DefinitionBlock ( Method (_STA) { Return(0xF) } + Name(_CST, Package() { + 1, Package() { + ResourceTemplate() {Register(SystemIO, 8, 0, 0xb014)}, + 2, 2, 300}, + }) } Processor (CPU3, 0x03, 0x0000b010, 0x06) { Name (TMP, Buffer(0x8) {0x0, 0x8, 0x03, 0x03, 0x1, 0x0, 0x0, 0x0}) @@ -64,6 +86,11 @@ DefinitionBlock ( Method (_STA) { Return(0xF) } + Name(_CST, Package() { + 1, Package() { + ResourceTemplate() {Register(SystemIO, 8, 0, 0xb014)}, + 2, 2, 300}, + }) } Processor (CPU4, 0x04, 0x0000b010, 0x06) { Name (TMP, Buffer(0x8) {0x0, 0x8, 0x04, 0x04, 0x1, 0x0, 0x0, 0x0}) @@ -74,6 +101,11 @@ DefinitionBlock ( Method (_STA) { Return(0xF) } + Name(_CST, Package() { + 1, Package() { + ResourceTemplate() {Register(SystemIO, 8, 0, 0xb014)}, + 2, 2, 300}, + }) } Processor (CPU5, 0x05, 0x0000b010, 0x06) { Name (TMP, Buffer(0x8) {0x0, 0x8, 0x05, 0x05, 0x1, 0x0, 0x0, 0x0}) @@ -84,6 +116,11 @@ DefinitionBlock ( Method (_STA) { Return(0xF) } + Name(_CST, Package() { + 1, Package() { + ResourceTemplate() {Register(SystemIO, 8, 0, 0xb014)}, + 2, 2, 300}, + }) } Processor (CPU6, 0x06, 0x0000b010, 0x06) { Name (TMP, Buffer(0x8) {0x0, 0x8, 0x06, 0x06, 0x1, 0x0, 0x0, 0x0}) @@ -94,6 +131,11 @@ DefinitionBlock ( Method (_STA) { Return(0xF) } + Name(_CST, Package() { + 1, Package() { + ResourceTemplate() {Register(SystemIO, 8, 0, 0xb014)}, + 2, 2, 300}, + }) } Processor (CPU7, 0x07, 0x0000b010, 0x06) { Name (TMP, Buffer(0x8) {0x0, 0x8, 0x07, 0x07, 0x1, 0x0, 0x0, 0x0}) @@ -104,6 +146,11 @@ DefinitionBlock ( Method (_STA) { Return(0xF) } + Name(_CST, Package() { + 1, Package() { + ResourceTemplate() {Register(SystemIO, 8, 0, 0xb014)}, + 2, 2, 300}, + }) } Processor (CPU8, 0x08, 0x0000b010, 0x06) { Name (TMP, Buffer(0x8) {0x0, 0x8, 0x08, 0x08, 0x1, 0x0, 0x0, 0x0}) @@ -114,6 +161,11 @@ DefinitionBlock ( Method (_STA) { Return(0xF) } + Name(_CST, Package() { + 1, Package() { + ResourceTemplate() {Register(SystemIO, 8, 0, 0xb014)}, + 2, 2, 300}, + }) } Processor (CPU9, 0x09, 0x0000b010, 0x06) { Name (TMP, Buffer(0x8) {0x0, 0x8, 0x09, 0x09, 0x1, 0x0, 0x0, 0x0}) @@ -124,6 +176,11 @@ DefinitionBlock ( Method (_STA) { Return(0xF) } + Name(_CST, Package() { + 1, Package() { + ResourceTemplate() {Register(SystemIO, 8, 0, 0xb014)}, + 2, 2, 300}, + }) } Processor (CPUA, 0x0a, 0x0000b010, 0x06) { Name (TMP, Buffer(0x8) {0x0, 0x8, 0x0A, 0x0A, 0x1, 0x0, 0x0, 0x0}) @@ -134,6 +191,11 @@ DefinitionBlock ( Method (_STA) { Return(0xF) } + Name(_CST, Package() { + 1, Package() { + ResourceTemplate() {Register(SystemIO, 8, 0, 0xb014)}, + 2, 2, 300}, + }) } Processor (CPUB, 0x0b, 0x0000b010, 0x06) { Name (TMP, Buffer(0x8) {0x0, 0x8, 0x0B, 0x0B, 0x1, 0x0, 0x0, 0x0}) @@ -144,6 +206,11 @@ DefinitionBlock ( Method (_STA) { Return(0xF) } + Name(_CST, Package() { + 1, Package() { + ResourceTemplate() {Register(SystemIO, 8, 0, 0xb014)}, + 2, 2, 300}, + }) } Processor (CPUC, 0x0c, 0x0000b010, 0x06) { Name (TMP, Buffer(0x8) {0x0, 0x8, 0x0C, 0x0C, 0x1, 0x0, 0x0, 0x0}) @@ -154,6 +221,11 @@ DefinitionBlock ( Method (_STA) { Return(0xF) } + Name(_CST, Package() { + 1, Package() { + ResourceTemplate() {Register(SystemIO, 8, 0, 0xb014)}, + 2, 2, 300}, + }) } Processor (CPUD, 0x0d, 0x0000b010, 0x06) { Name (TMP, Buffer(0x8) {0x0, 0x8, 0x0D, 0x0D, 0x1, 0x0, 0x0, 0x0}) @@ -164,6 +236,11 @@ DefinitionBlock ( Method (_STA) { Return(0xF) } + Name(_CST, Package() { + 1, Package() { + ResourceTemplate() {Register(SystemIO, 8, 0, 0xb014)}, + 2, 2, 300}, + }) } Processor (CPUE, 0x0e, 0x0000b010, 0x06) { Name (TMP, Buffer(0x8) {0x0, 0x8, 0x0E, 0x0E, 0x1, 0x0, 0x0, 0x0}) @@ -174,6 +251,11 @@ DefinitionBlock ( Method (_STA) { Return(0xF) } + Name(_CST, Package() { + 1, Package() { + ResourceTemplate() {Register(SystemIO, 8, 0, 0xb014)}, + 2, 2, 300}, + }) } } @@ -1544,6 +1626,81 @@ DefinitionBlock ( Return(0x01) } Method(_L06) { + If (And(\_PR.PWC, 0x1)) { + Store (0xfffff, Index (DeRefOf (Index (\_PR.CPU0._CST, 1)), 2)) + Notify(\_PR.CPU0, 0x81) + } + + If (And(\_PR.PWC, 0x2)) { + Store (0xfffff, Index (DeRefOf (Index (\_PR.CPU1._CST, 1)), 2)) + Notify(\_PR.CPU1, 0x81) + } + + If (And(\_PR.PWC, 0x4)) { + Store (0xfffff, Index (DeRefOf (Index (\_PR.CPU2._CST, 1)), 2)) + Notify(\_PR.CPU2, 0x81) + } + + If (And(\_PR.PWC, 0x8)) { + Store (0xfffff, Index (DeRefOf (Index (\_PR.CPU3._CST, 1)), 2)) + Notify(\_PR.CPU3, 0x81) + } + + If (And(\_PR.PWC, 0x10)) { + Store (0xfffff, Index (DeRefOf (Index (\_PR.CPU4._CST, 1)), 2)) + Notify(\_PR.CPU4, 0x81) + } + + If (And(\_PR.PWC, 0x20)) { + Store (0xfffff, Index (DeRefOf (Index (\_PR.CPU5._CST, 1)), 2)) + Notify(\_PR.CPU5, 0x81) + } + + If (And(\_PR.PWC, 0x40)) { + Store (0xfffff, Index (DeRefOf (Index (\_PR.CPU6._CST, 1)), 2)) + Notify(\_PR.CPU6, 0x81) + } + + If (And(\_PR.PWC, 0x80)) { + Store (0xfffff, Index (DeRefOf (Index (\_PR.CPU7._CST, 1)), 2)) + Notify(\_PR.CPU7, 0x81) + } + + If (And(\_PR.PWC, 0x100)) { + Store (0xfffff, Index (DeRefOf (Index (\_PR.CPU8._CST, 1)), 2)) + Notify(\_PR.CPU8, 0x81) + } + + If (And(\_PR.PWC, 0x200)) { + Store (0xfffff, Index (DeRefOf (Index (\_PR.CPU9._CST, 1)), 2)) + Notify(\_PR.CPU9, 0x81) + } + + If (And(\_PR.PWC, 0x400)) { + Store (0xfffff, Index (DeRefOf (Index (\_PR.CPUA._CST, 1)), 2)) + Notify(\_PR.CPUA, 0x81) + } + + If (And(\_PR.PWC, 0x800)) { + Store (0xfffff, Index (DeRefOf (Index (\_PR.CPUB._CST, 1)), 2)) + Notify(\_PR.CPUB, 0x81) + } + + If (And(\_PR.PWC, 0x1000)) { + Store (0xfffff, Index (DeRefOf (Index (\_PR.CPUC._CST, 1)), 2)) + Notify(\_PR.CPUC, 0x81) + } + + If (And(\_PR.PWC, 0x2000)) { + Store (0xfffff, Index (DeRefOf (Index (\_PR.CPUD._CST, 1)), 2)) + Notify(\_PR.CPUD, 0x81) + } + + If (And(\_PR.PWC, 0x4000)) { + Store (0xfffff, Index (DeRefOf (Index (\_PR.CPUE._CST, 1)), 2)) + Notify(\_PR.CPUE, 0x81) + } + Return(0x01) } Method(_L07) { Index: kvm-userspace.tip/qemu/hw/acpi.c =================================================================== --- kvm-userspace.tip.orig/qemu/hw/acpi.c +++ kvm-userspace.tip/qemu/hw/acpi.c @@ -121,6 +121,31 @@ static void pm_tmr_timer(void *opaque) pm_update_sci(s); } +/* + * Fake C2 emulation, so the OS will consider the TSC unreliable + * an fallback to C1 after the latency is updated to a high value + * in acpi-dsdt.dsl. + */ +static void qemu_system_cpu_power_notify(int cpu); +static uint32_t pm_ioport_readb(void *opaque, uint32_t addr) +{ + CPUState *env = cpu_single_env; + + addr &= 0x3f; + switch (addr) { + case 0x14: /* P_LVL2 */ + qemu_system_cpu_power_notify(env->cpu_index); + } +#ifdef DEBUG + printf("pm_ioport_readb addr=%x\n", addr); +#endif + return 0; +} + +static void pm_ioport_writeb(void *opaque, uint32_t addr, uint32_t val) +{ +} + static void pm_ioport_writew(void *opaque, uint32_t addr, uint32_t val) { PIIX4PMState *s = opaque; @@ -420,6 +445,8 @@ static void pm_io_space_update(PIIX4PMSt #if defined(DEBUG) printf("PM: mapping to 0x%x\n", pm_io_base); #endif + register_ioport_write(pm_io_base, 64, 1, pm_ioport_writeb, s); + register_ioport_read(pm_io_base, 64, 1, pm_ioport_readb, s); register_ioport_write(pm_io_base, 64, 2, pm_ioport_writew, s); register_ioport_read(pm_io_base, 64, 2, pm_ioport_readw, s); register_ioport_write(pm_io_base, 64, 4, pm_ioport_writel, s); @@ -538,6 +565,7 @@ void qemu_system_powerdown(void) } #endif #define GPE_BASE 0xafe0 +#define POWER_GPE_BASE 0xb040 #define PROC_BASE 0xaf00 #define PCI_BASE 0xae00 #define PCI_EJ_BASE 0xae08 @@ -554,7 +582,12 @@ struct pci_status { uint32_t down; }; +struct power_gpe_regs { + uint8_t cpus; +}; + static struct gpe_regs gpe; +static struct power_gpe_regs power_gpe; static struct pci_status pci0_status; static uint32_t gpe_readb(void *opaque, uint32_t addr) @@ -623,6 +656,23 @@ static void gpe_writeb(void *opaque, uin #endif } +static uint32_t cpu_power_read(void *opaque, uint32_t addr) +{ + struct power_gpe_regs *p = opaque; + +#if defined(DEBUG) + printf("cpu power read %lx == %lx\n", addr, p->cpus); +#endif + return p->cpus; +} + +static void cpu_power_write(void *opaque, uint32_t addr, uint32_t val) +{ +#if defined(DEBUG) + printf("cpu power write %lx <== %lx\n", addr, val); +#endif +} + static uint32_t pcihotplug_read(void *opaque, uint32_t addr) { uint32_t val = 0; @@ -696,6 +746,9 @@ void qemu_system_hot_add_init(const char register_ioport_write(PCI_EJ_BASE, 4, 4, pciej_write, NULL); register_ioport_read(PCI_EJ_BASE, 4, 4, pciej_read, NULL); + register_ioport_write(POWER_GPE_BASE, 4, 2, cpu_power_write, &power_gpe); + register_ioport_read(POWER_GPE_BASE, 4, 2, cpu_power_read, &power_gpe); + model = cpu_model; } @@ -738,6 +791,16 @@ void qemu_system_cpu_hot_add(int cpu, in disable_processor(&gpe, cpu); qemu_set_irq(pm_state->irq, 0); } + +static void qemu_system_cpu_power_notify(int cpu) +{ + power_gpe.cpus = 0; + + qemu_set_irq(pm_state->irq, 1); + power_gpe.cpus |= (1 << cpu); + qemu_set_irq(pm_state->irq, 0); +} + #endif static void enable_device(struct pci_status *p, struct gpe_regs *g, int slot) --