kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] Make BIOS irq0->inti2 override configurable from userspace
@ 2009-04-09 22:20 Beth Kon
  2009-04-09 22:20 ` [PATCH 2/2] Finish HPET implementation for KVM Beth Kon
  2009-04-16 12:55 ` [PATCH 1/2] Make BIOS irq0->inti2 override configurable from userspace Avi Kivity
  0 siblings, 2 replies; 3+ messages in thread
From: Beth Kon @ 2009-04-09 22:20 UTC (permalink / raw)
  To: kvm; +Cc: Beth Kon

These patches resolve the irq0->inti2 override issue, and get the hpet working
on kvm with and without -no-kvm-irqchip (i.e., when hpet takes over, it 
disables userspace or in-kernel pit as appropriate).

The irq0->inti2 override will always be used unless the kernel cannot do irq 
routing (i.e., compatibility with old kernels). So if the kernel is capable, 
userspace sets up irq0->inti2 via the irq routing interface, and adds the 
irq0->inti2 override to the MADT interrupt source override table, 
and the mp table (for the no-acpi case).

A couple of months ago, Marcelo was seeing RHEL5 guests complain of invalid
checksum with these patches, but later he couldn't reproduce it, and I'm not 
seeing it now. While all guests still need to be fully tested, everything 
appears to be in order.  I've tested on win2k864, win2k832, RHEL5.3 32 bit, 
and ubuntu 8.10 64 bit. 

Signed-off-by: Beth Kon <eak@us.ibm.com>

diff --git a/bios/rombios32.c b/bios/rombios32.c
index 4dea066..5cf1f54 100755
--- a/bios/rombios32.c
+++ b/bios/rombios32.c
@@ -443,6 +443,7 @@ uint32_t cpuid_ext_features;
 unsigned long ram_size;
 uint64_t ram_end;
 uint8_t bios_uuid[16];
+uint8_t irq0_override;
 #ifdef BX_USE_EBDA_TABLES
 unsigned long ebda_cur_addr;
 #endif
@@ -475,6 +476,7 @@ void wrmsr_smp(uint32_t index, uint64_t val)
 #define QEMU_CFG_SIGNATURE  0x00
 #define QEMU_CFG_ID         0x01
 #define QEMU_CFG_UUID       0x02
+#define QEMU_CFG_IRQ0_OVERRIDE 0x0d
 
 int qemu_cfg_port;
 
@@ -516,6 +518,18 @@ void uuid_probe(void)
     memset(bios_uuid, 0, 16);
 }
 
+void irq0_override_probe(void)
+{
+#ifdef BX_QEMU
+    if(qemu_cfg_port) {
+        qemu_cfg_select(QEMU_CFG_IRQ0_OVERRIDE);
+        qemu_cfg_read(&irq0_override, 1);
+        return;
+    }
+#endif
+    memset(&irq0_override, 0, 1);
+}
+
 void cpu_probe(void)
 {
     uint32_t eax, ebx, ecx, edx;
@@ -1152,6 +1166,8 @@ static void mptable_init(void)
 
     /* irqs */
     for(i = 0; i < 16; i++) {
+        if (irq0_override && i == 2)
+            continue;
         putb(&q, 3); /* entry type = I/O interrupt */
         putb(&q, 0); /* interrupt type = vectored interrupt */
         putb(&q, 0); /* flags: po=0, el=0 */
@@ -1159,7 +1175,10 @@ static void mptable_init(void)
         putb(&q, 0); /* source bus ID = ISA */
         putb(&q, i); /* source bus IRQ */
         putb(&q, ioapic_id); /* dest I/O APIC ID */
-        putb(&q, i); /* dest I/O APIC interrupt in */
+        if (irq0_override && i == 0)
+            putb(&q, 2); /* dest I/O APIC interrupt in */
+        else
+            putb(&q, i); /* dest I/O APIC interrupt in */
     }
     /* patch length */
     len = q - mp_config_table;
@@ -1508,6 +1527,11 @@ void acpi_bios_init(void)
         sizeof(struct madt_processor_apic) * MAX_CPUS +
         sizeof(struct madt_io_apic);
     madt = (void *)(addr);
+    for (i = 0; i < 16; i++)
+        if (PCI_ISA_IRQ_MASK & (1U << i))
+            madt_size += sizeof(struct madt_intsrcovr);
+    if (irq0_override)
+        madt_size += sizeof(struct madt_intsrcovr);
     addr += madt_size;
 
     acpi_tables_size = addr - base_addr;
@@ -1597,8 +1621,15 @@ void acpi_bios_init(void)
         io_apic->interrupt = cpu_to_le32(0);
 
         intsrcovr = (struct madt_intsrcovr*)(io_apic + 1);
-        for ( i = 0; i < 16; i++ ) {
-            if ( PCI_ISA_IRQ_MASK & (1U << i) ) {
+        for (i = 0; i < 16; i++) {
+            if (irq0_override && i == 0) {
+                memset(intsrcovr, 0, sizeof(*intsrcovr));
+                intsrcovr->type   = APIC_XRUPT_OVERRIDE;
+                intsrcovr->length = sizeof(*intsrcovr);
+                intsrcovr->source = i;
+                intsrcovr->gsi    = 2;
+                intsrcovr->flags  = 0;  //conforms to bus specifications
+            } else if (PCI_ISA_IRQ_MASK & (1U << i)) {
                 memset(intsrcovr, 0, sizeof(*intsrcovr));
                 intsrcovr->type   = APIC_XRUPT_OVERRIDE;
                 intsrcovr->length = sizeof(*intsrcovr);
@@ -1610,7 +1641,6 @@ void acpi_bios_init(void)
                 continue;
             }
             intsrcovr++;
-            madt_size += sizeof(struct madt_intsrcovr);
         }
         acpi_build_table_header((struct acpi_table_header *)madt,
                                 "APIC", madt_size, 1);
@@ -2230,6 +2260,8 @@ void rombios32_init(uint32_t *s3_resume_vector, uint8_t *shutdown_flag)
 
     if (bios_table_cur_addr != 0) {
 
+        irq0_override_probe();
+
         mptable_init();
 
         uuid_probe();
diff --git a/qemu/hw/fw_cfg.c b/qemu/hw/fw_cfg.c
index e324e8d..f06dc3c 100644
--- a/qemu/hw/fw_cfg.c
+++ b/qemu/hw/fw_cfg.c
@@ -279,6 +279,7 @@ void *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
     fw_cfg_add_bytes(s, FW_CFG_UUID, qemu_uuid, 16);
     fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)nographic);
     fw_cfg_add_i16(s, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
+    fw_cfg_add_i16(s, FW_CFG_IRQ0_OVERRIDE, (uint16_t)irq0override);
 
     register_savevm("fw_cfg", -1, 1, fw_cfg_save, fw_cfg_load, s);
     qemu_register_reset(fw_cfg_reset, s);
diff --git a/qemu/hw/fw_cfg.h b/qemu/hw/fw_cfg.h
index 41a3dd0..ef6e8c6 100644
--- a/qemu/hw/fw_cfg.h
+++ b/qemu/hw/fw_cfg.h
@@ -14,6 +14,7 @@
 #define FW_CFG_INITRD_ADDR      0x0a
 #define FW_CFG_INITRD_SIZE      0x0b
 #define FW_CFG_BOOT_DEVICE      0x0c
+#define FW_CFG_IRQ0_OVERRIDE    0x0d
 #define FW_CFG_MAX_ENTRY        0x10
 
 #define FW_CFG_WRITE_CHANNEL    0x4000
diff --git a/qemu/hw/ioapic.c b/qemu/hw/ioapic.c
index 0b70cf6..4dafb52 100644
--- a/qemu/hw/ioapic.c
+++ b/qemu/hw/ioapic.c
@@ -23,6 +23,7 @@
 
 #include "hw.h"
 #include "pc.h"
+#include "sysemu.h"
 #include "qemu-timer.h"
 #include "host-utils.h"
 
@@ -95,14 +96,12 @@ void ioapic_set_irq(void *opaque, int vector, int level)
 {
     IOAPICState *s = opaque;
 
-#if 0
     /* ISA IRQs map to GSI 1-1 except for IRQ0 which maps
      * to GSI 2.  GSI maps to ioapic 1-1.  This is not
      * the cleanest way of doing it but it should work. */
 
-    if (vector == 0)
+    if (vector == 0 && irq0override)
         vector = 2;
-#endif
 
     if (vector >= 0 && vector < IOAPIC_NUM_PINS) {
         uint32_t mask = 1 << vector;
diff --git a/qemu/pc-bios/bios.bin b/qemu/pc-bios/bios.bin
index 04c1f86..c0b07b5 100644
Binary files a/qemu/pc-bios/bios.bin and b/qemu/pc-bios/bios.bin differ
diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 4164368..2755b5e 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -812,7 +812,10 @@ int kvm_qemu_create_context(void)
                 return r;
         }
         for (i = 0; i < 24; ++i) {
-            r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_IOAPIC, i);
+            if (i == 0)
+                r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_IOAPIC, 2);
+            else if (i != 2)
+                r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_IOAPIC, i);
             if (r < 0)
                 return r;
         }
diff --git a/qemu/sysemu.h b/qemu/sysemu.h
index d765465..fd127bd 100644
--- a/qemu/sysemu.h
+++ b/qemu/sysemu.h
@@ -94,6 +94,7 @@ extern int graphic_width;
 extern int graphic_height;
 extern int graphic_depth;
 extern int nographic;
+extern int irq0override;
 extern const char *keyboard_layout;
 extern int win2k_install_hack;
 extern int rtc_td_hack;
diff --git a/qemu/vl.c b/qemu/vl.c
index b3da7ad..b982b53 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -205,6 +205,7 @@ static int vga_ram_size;
 enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
 static DisplayState *display_state;
 int nographic;
+int irq0override;
 static int curses;
 static int sdl;
 const char* keyboard_layout = NULL;
@@ -4516,6 +4517,7 @@ int main(int argc, char **argv, char **envp)
 #endif
     snapshot = 0;
     nographic = 0;
+    irq0override = 1;
     curses = 0;
     kernel_filename = NULL;
     kernel_cmdline = "";
@@ -5536,8 +5538,12 @@ int main(int argc, char **argv, char **envp)
         }
     }
 
-    if (kvm_enabled())
-	kvm_init_ap();
+    if (kvm_enabled()) {
+       kvm_init_ap();
+        if (kvm_irqchip && !kvm_has_gsi_routing(kvm_context)) {
+           irq0override = 0;
+        }
+    }
 
     machine->init(ram_size, vga_ram_size, boot_devices,
                   kernel_filename, kernel_cmdline, initrd_filename, cpu_model);

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] Finish HPET implementation for KVM
  2009-04-09 22:20 [PATCH 1/2] Make BIOS irq0->inti2 override configurable from userspace Beth Kon
@ 2009-04-09 22:20 ` Beth Kon
  2009-04-16 12:55 ` [PATCH 1/2] Make BIOS irq0->inti2 override configurable from userspace Avi Kivity
  1 sibling, 0 replies; 3+ messages in thread
From: Beth Kon @ 2009-04-09 22:20 UTC (permalink / raw)
  To: kvm; +Cc: Beth Kon

Signed-off-by: Beth Kon <eak@us.ibm.com>

diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl
index 06ab25d..84697db 100755
--- a/bios/acpi-dsdt.dsl
+++ b/bios/acpi-dsdt.dsl
@@ -307,6 +307,24 @@ DefinitionBlock (
                     ,, , AddressRangeMemory, TypeStatic)
             })
         }
+        Device(HPET) {
+            Name(_HID,  EISAID("PNP0103"))
+            Name(_UID, 0)
+            Method (_STA, 0, NotSerialized) {
+                    Return(0x0F)
+            }
+            Name(_CRS, ResourceTemplate() {
+                DWordMemory(
+                    ResourceConsumer, PosDecode, MinFixed, MaxFixed,
+                    NonCacheable, ReadWrite,
+                    0x00000000,
+                    0xFED00000,
+                    0xFED003FF,
+                    0x00000000,
+                    0x00000400 /* 1K memory: FED00000 - FED003FF */
+                )
+            })
+        }
     }
 
     Scope(\_SB.PCI0) {
diff --git a/bios/rombios32.c b/bios/rombios32.c
index 5cf1f54..959a784 100755
--- a/bios/rombios32.c
+++ b/bios/rombios32.c
@@ -1275,7 +1275,7 @@ struct rsdp_descriptor         /* Root System Descriptor Pointer */
 struct rsdt_descriptor_rev1
 {
 	ACPI_TABLE_HEADER_DEF                           /* ACPI common table header */
-	uint32_t                             table_offset_entry [2]; /* Array of pointers to other */
+	uint32_t                             table_offset_entry [3]; /* Array of pointers to other */
 			 /* ACPI tables */
 } __attribute__((__packed__));
 
@@ -1415,6 +1415,31 @@ struct madt_processor_apic
 #endif
 } __attribute__((__packed__));
 
+/*
+ *  * ACPI 2.0 Generic Address Space definition.
+ *   */
+struct acpi_20_generic_address {
+    uint8_t  address_space_id;
+    uint8_t  register_bit_width;
+    uint8_t  register_bit_offset;
+    uint8_t  reserved;
+    uint64_t address;
+} __attribute__((__packed__));
+
+/*
+ *  * HPET Description Table
+ *   */
+struct acpi_20_hpet {
+    ACPI_TABLE_HEADER_DEF                        /* ACPI common table header */
+    uint32_t           timer_block_id;
+    struct acpi_20_generic_address addr;
+    uint8_t            hpet_number;
+    uint16_t           min_tick;
+    uint8_t            page_protect;
+} __attribute__((__packed__));
+
+#define ACPI_HPET_ADDRESS 0xFED00000UL
+
 struct madt_io_apic
 {
 	APIC_HEADER_DEF
@@ -1487,6 +1512,8 @@ void acpi_bios_init(void)
     struct facs_descriptor_rev1 *facs;
     struct multiple_apic_table *madt;
     uint8_t *dsdt;
+    struct acpi_20_hpet *hpet;
+    uint32_t hpet_addr;
     uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr;
     uint32_t acpi_tables_size, madt_addr, madt_size;
     int i;
@@ -1534,6 +1561,11 @@ void acpi_bios_init(void)
         madt_size += sizeof(struct madt_intsrcovr);
     addr += madt_size;
 
+    addr = (addr + 7) & ~7;
+    hpet_addr = addr;
+    hpet = (void *)(addr);
+    addr += sizeof(*hpet);
+
     acpi_tables_size = addr - base_addr;
 
     BX_INFO("ACPI tables: RSDP addr=0x%08lx ACPI DATA addr=0x%08lx size=0x%x\n",
@@ -1555,6 +1587,7 @@ void acpi_bios_init(void)
     memset(rsdt, 0, sizeof(*rsdt));
     rsdt->table_offset_entry[0] = cpu_to_le32(fadt_addr);
     rsdt->table_offset_entry[1] = cpu_to_le32(madt_addr);
+    rsdt->table_offset_entry[2] = cpu_to_le32(hpet_addr);
     acpi_build_table_header((struct acpi_table_header *)rsdt,
                             "RSDT", sizeof(*rsdt), 1);
 
@@ -1644,6 +1677,15 @@ void acpi_bios_init(void)
         }
         acpi_build_table_header((struct acpi_table_header *)madt,
                                 "APIC", madt_size, 1);
+        /* HPET */
+        memset(hpet, 0, sizeof(*hpet));
+        /* Note: timer_block_id value must be kept in sync with value 
+         * advertised by emulated hpet in hpet.c
+         */
+        hpet->timer_block_id = cpu_to_le32(0x8086a201);
+        hpet->addr.address = cpu_to_le32(ACPI_HPET_ADDRESS);
+        acpi_build_table_header((struct  acpi_table_header *)hpet,
+                                 "HPET", sizeof(*hpet), 1);
     }
 }
 
diff --git a/qemu/hw/hpet.c b/qemu/hw/hpet.c
index 7df2d05..2b817a6 100644
--- a/qemu/hw/hpet.c
+++ b/qemu/hw/hpet.c
@@ -30,6 +30,7 @@
 #include "console.h"
 #include "qemu-timer.h"
 #include "hpet_emul.h"
+#include "qemu-kvm.h"
 
 //#define HPET_DEBUG
 #ifdef HPET_DEBUG
@@ -48,6 +49,43 @@ uint32_t hpet_in_legacy_mode(void)
         return 0;
 }
 
+static void hpet_kpit_enable(void)
+{
+    struct kvm_pit_state ps;
+    kvm_get_pit(kvm_context, &ps);
+    kvm_set_pit(kvm_context, &ps);
+}
+
+static void hpet_kpit_disable(void)
+{
+    struct kvm_pit_state ps;
+    kvm_get_pit(kvm_context, &ps);
+    ps.channels[0].mode = 0xff;
+    kvm_set_pit(kvm_context, &ps);
+}
+
+static void hpet_legacy_enable(void)
+{
+    if (qemu_kvm_pit_in_kernel()) {
+       hpet_kpit_disable();
+       dprintf("qemu: hpet disabled kernel pit\n");
+    } else {
+       hpet_pit_disable();
+       dprintf("qemu: hpet disabled userspace pit\n");
+    }
+}
+
+static void hpet_legacy_disable(void)
+{
+    if (qemu_kvm_pit_in_kernel()) {
+       hpet_kpit_enable();
+       dprintf("qemu: hpet enabled kernel pit\n");
+    } else {
+       hpet_pit_enable();
+       dprintf("qemu: hpet enabled userspace pit\n");
+    }
+}
+
 static uint32_t timer_int_route(struct HPETTimer *timer)
 {
     uint32_t route;
@@ -475,9 +513,9 @@ static void hpet_ram_writel(void *opaque, target_phys_addr_t addr,
                 }
                 /* i8254 and RTC are disabled when HPET is in legacy mode */
                 if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
-                    hpet_pit_disable();
+                    hpet_legacy_enable();
                 } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
-                    hpet_pit_enable();
+                    hpet_legacy_disable();
                 }
                 break;
             case HPET_CFG + 4:
@@ -560,7 +598,7 @@ static void hpet_reset(void *opaque) {
          * hpet_reset is called due to system reset. At this point control must
          * be returned to pit until SW reenables hpet.
          */
-        hpet_pit_enable();
+        hpet_legacy_disable();
     count = 1;
 }
 
diff --git a/qemu/vl.c b/qemu/vl.c
index b982b53..d662fba 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -5539,9 +5539,14 @@ int main(int argc, char **argv, char **envp)
     }
 
     if (kvm_enabled()) {
-       kvm_init_ap();
+        kvm_init_ap();
         if (kvm_irqchip && !kvm_has_gsi_routing(kvm_context)) {
            irq0override = 0;
+           /* if kernel can't do irq routing, interrupt source
+            * override 0->2 can not be set up as required by hpet,
+            * so disable hpet.
+            */
+           no_hpet=1;
         }
     }
 

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/2] Make BIOS irq0->inti2 override configurable from userspace
  2009-04-09 22:20 [PATCH 1/2] Make BIOS irq0->inti2 override configurable from userspace Beth Kon
  2009-04-09 22:20 ` [PATCH 2/2] Finish HPET implementation for KVM Beth Kon
@ 2009-04-16 12:55 ` Avi Kivity
  1 sibling, 0 replies; 3+ messages in thread
From: Avi Kivity @ 2009-04-16 12:55 UTC (permalink / raw)
  To: Beth Kon; +Cc: kvm

Beth Kon wrote:
> These patches resolve the irq0->inti2 override issue, and get the hpet working
> on kvm with and without -no-kvm-irqchip (i.e., when hpet takes over, it 
> disables userspace or in-kernel pit as appropriate).
>
> The irq0->inti2 override will always be used unless the kernel cannot do irq 
> routing (i.e., compatibility with old kernels). So if the kernel is capable, 
> userspace sets up irq0->inti2 via the irq routing interface, and adds the 
> irq0->inti2 override to the MADT interrupt source override table, 
> and the mp table (for the no-acpi case).
>
> A couple of months ago, Marcelo was seeing RHEL5 guests complain of invalid
> checksum with these patches, but later he couldn't reproduce it, and I'm not 
> seeing it now. While all guests still need to be fully tested, everything 
> appears to be in order.  I've tested on win2k864, win2k832, RHEL5.3 32 bit, 
> and ubuntu 8.10 64 bit. 
>   

Both patches look good.  However, the bios in kvm-userspace.git is 
pretty old.  To ease later merging, I'd like to update the bios first, 
so please rebase the patches once I've done that.  It will take a few 
days since large merges are blocked while I struggle to get a working 
kvm-85 out.

Please also separate the bios patches from qemu patches.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2009-04-16 12:55 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-04-09 22:20 [PATCH 1/2] Make BIOS irq0->inti2 override configurable from userspace Beth Kon
2009-04-09 22:20 ` [PATCH 2/2] Finish HPET implementation for KVM Beth Kon
2009-04-16 12:55 ` [PATCH 1/2] Make BIOS irq0->inti2 override configurable from userspace Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).