qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Chao Peng <chao.p.peng@linux.intel.com>
To: qemu-devel@nongnu.org
Cc: "Michael S. Tsirkin" <mst@redhat.com>,
	gor Mammedov <imammedo@redhat.com>,
	Xiao Guangrong <guangrong.xiao@linux.intel.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Richard Henderson <rth@twiddle.net>,
	Eduardo Habkost <ehabkost@redhat.com>,
	Haozhong Zhang <haozhong.zhang@intel.com>
Subject: [Qemu-devel] [RFC 8/9] pc: support direct loading protected/long mode kernel
Date: Fri, 17 Jun 2016 04:14:16 -0400	[thread overview]
Message-ID: <1466151257-96318-9-git-send-email-chao.p.peng@linux.intel.com> (raw)
In-Reply-To: <1466151257-96318-1-git-send-email-chao.p.peng@linux.intel.com>

Traditionally a PC follows the following booting steps:

    QEMU -> BIOS -> bootloader -> kernel realmode code
         -> kernel protected/longmode code.

This process takes lots of time. For platform like pc-lite we let QEMU
loads protected/long mode kernel directly, hence skipping several steps
and speed up the whole booting. We do this by filling the zero page per
Linux booting protocol and then jumping to the kernel protected/long
mode entry. Registers and paging should also be put in the correct
state.

Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
---
 hw/i386/pc.c | 166 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 163 insertions(+), 3 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 46ca0e3..64ce65c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -85,6 +85,31 @@
 #define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3)
 #define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4)
 
+#define BOOT_GDT                0x500
+#define BOOT_IDT                0x520
+#define BOOT_GDT_NULL           0
+#define BOOT_GDT_CODE           1
+#define BOOT_GDT_DATA           2
+#define BOOT_GDT_TSS            3
+#define BOOT_GDT_MAX            4
+#define BOOT_GDT_FLAGS_CODE     (DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | \
+                                 DESC_R_MASK | DESC_A_MASK | DESC_G_MASK )
+#define BOOT_GDT_FLAGS_DATA     (DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |  \
+                                 DESC_A_MASK | DESC_B_MASK | DESC_G_MASK)
+#define BOOT_GDT_FLAGS_TSS      DESC_P_MASK | (11 << DESC_TYPE_SHIFT)
+#define BOOT_PML4               0x9000
+#define BOOT_PDPTE              0xA000
+#define BOOT_LOADER_SP          0x8000
+#define BOOT_CMDLINE_OFFSET     0x20000
+#define BOOT_ZEROPAGE_OFFSET    0x7000
+
+#define GDT_ENTRY(flags, base, limit)               \
+       ((((base)  & 0xff000000ULL) << (56-24)) |    \
+       (((flags) & 0x0000f0ffULL) << 40) |          \
+       (((limit) & 0x000f0000ULL) << (48-16)) |     \
+       (((base)  & 0x00ffffffULL) << 16) |          \
+       (((limit) & 0x0000ffffULL)))
+
 #define E820_NR_ENTRIES		16
 
 struct e820_entry {
@@ -98,6 +123,13 @@ struct e820_table {
     struct e820_entry entry[E820_NR_ENTRIES];
 } QEMU_PACKED __attribute((__aligned__(4)));
 
+struct kernel_boot_info {
+    uint64_t entry;
+    bool protected_mode;
+    bool long_mode;
+};
+
+static struct kernel_boot_info boot_info;
 static struct e820_table e820_reserve;
 static struct e820_entry *e820_table;
 static unsigned e820_entries;
@@ -667,6 +699,124 @@ bool e820_get_entry(int idx, uint32_t type, uint64_t *address, uint64_t *length)
     return false;
 }
 
+static void reset_cpu(CPUX86State *env)
+{
+    unsigned int flags = BOOT_GDT_FLAGS_CODE;
+
+    if (boot_info.long_mode) {
+        flags |= DESC_L_MASK;
+    }
+    cpu_x86_load_seg_cache(env, R_CS, BOOT_GDT_CODE * 8, 0, 0xfffff, flags);
+
+    cpu_x86_load_seg_cache(env, R_DS, BOOT_GDT_DATA * 8, 0, 0xfffff,
+                           BOOT_GDT_FLAGS_DATA);
+    cpu_x86_load_seg_cache(env, R_ES, BOOT_GDT_DATA * 8, 0, 0xfffff,
+                           BOOT_GDT_FLAGS_DATA);
+    cpu_x86_load_seg_cache(env, R_FS, BOOT_GDT_DATA * 8, 0, 0xfffff,
+                           BOOT_GDT_FLAGS_DATA);
+    cpu_x86_load_seg_cache(env, R_GS, BOOT_GDT_DATA * 8, 0, 0xfffff,
+                           BOOT_GDT_FLAGS_DATA);
+    cpu_x86_load_seg_cache(env, R_SS, BOOT_GDT_DATA * 8, 0, 0xfffff,
+                           BOOT_GDT_FLAGS_DATA);
+
+    env->gdt.base = BOOT_GDT;
+    env->gdt.limit = BOOT_GDT_MAX * 8 - 1;
+
+    env->idt.base = BOOT_IDT;
+
+    env->tr.selector = BOOT_GDT_TSS * 8;
+    env->tr.flags = BOOT_GDT_FLAGS_TSS;
+
+    env->cr[3] = BOOT_PML4;
+    env->cr[0] |= (CR0_PG_MASK | CR0_PE_MASK);
+
+    if (boot_info.long_mode) {
+        env->cr[4] |= CR4_PAE_MASK;
+        cpu_load_efer(env, env->efer | MSR_EFER_LME | MSR_EFER_LMA);
+    }
+
+    env->regs[R_ESP] = BOOT_LOADER_SP;
+    env->regs[R_ESI] = BOOT_ZEROPAGE_OFFSET;
+    env->eip = boot_info.entry;
+}
+
+static void setup_seg_desc_tables(void)
+{
+    uint64_t idt = 0;
+    uint64_t gdt[BOOT_GDT_MAX] = {
+             [BOOT_GDT_NULL] = GDT_ENTRY(0, 0, 0),
+             [BOOT_GDT_CODE] = GDT_ENTRY(BOOT_GDT_FLAGS_CODE, 0, 0xFFFFF),
+             [BOOT_GDT_DATA] = GDT_ENTRY(BOOT_GDT_FLAGS_DATA, 0, 0xFFFFF),
+             [BOOT_GDT_TSS ] = GDT_ENTRY(BOOT_GDT_FLAGS_TSS, 0, 0xFFFFF)
+            };
+
+    if (boot_info.long_mode) {
+        gdt[BOOT_GDT_CODE] |= (1UL << (32 + DESC_L_SHIFT));
+    }
+
+    cpu_physical_memory_write((hwaddr)BOOT_GDT, gdt, sizeof(gdt));
+    cpu_physical_memory_write((hwaddr)BOOT_IDT, &idt, sizeof(idt));
+}
+
+static void setup_page_tables(void)
+{
+    void *p;
+    size_t len = 4096;
+
+    p = cpu_physical_memory_map(BOOT_PML4, &len, 1);
+    memset(p, 0, 4096);
+    *(uint64_t*)p = (uint64_t)(BOOT_PDPTE | 3);
+    cpu_physical_memory_unmap(p, len, 1, len);
+
+    len = 4096;
+    p = cpu_physical_memory_map(BOOT_PDPTE, &len, 1);
+    memset(p, 0, 4096);
+    *(uint64_t*)p = 0x83;
+    cpu_physical_memory_unmap(p, len, 1, len);
+}
+
+static void setup_kernel_zero_page(void)
+{
+    int i;
+    uint8_t *zero_page;
+    void *e820_map;
+    size_t zero_page_size = 4096;
+    MachineState *machine = MACHINE(qdev_get_machine());
+    size_t cmdline_size = strlen(machine->kernel_cmdline) + 1;
+
+    cpu_physical_memory_write((hwaddr)BOOT_CMDLINE_OFFSET,
+                               machine->kernel_cmdline, cmdline_size);
+
+    zero_page = cpu_physical_memory_map((hwaddr)BOOT_ZEROPAGE_OFFSET,
+                                        &zero_page_size, 1);
+    memset(zero_page, 0, zero_page_size);
+
+    /* hdr.type_of_loader */
+    zero_page[0x210] = 0xFF;
+    /* hdr.boot_flag */
+    stw_p(zero_page + 0x1fe, 0xAA55);
+    /* hdr.header */
+    stl_p(zero_page + 0x202, 0x53726448);
+    /* hdr.cmd_line_ptr */
+    stl_p(zero_page + 0x228, BOOT_CMDLINE_OFFSET);
+    /* hdr.cmdline_size */
+    stl_p(zero_page + 0x238, cmdline_size);
+    /* e820_entries */
+    zero_page[0x1e8] = e820_entries;
+    /* e820_map */
+    e820_map = zero_page + 0x2d0;
+    for (i = 0; i < e820_entries; i++) {
+        stq_p(e820_map, e820_table[i].address);
+        e820_map += 8;
+        stq_p(e820_map, e820_table[i].length);
+        e820_map += 8;
+        stl_p(e820_map, e820_table[i].type);
+        e820_map += 4;
+    }
+
+    cpu_physical_memory_unmap(zero_page, zero_page_size, 1, zero_page_size);
+}
+
 /* Enables contiguous-apic-ID mode, for compatibility */
 static bool compat_apic_id_mode;
 
@@ -1928,15 +2078,25 @@ static void pc_machine_reset(void)
 
     qemu_devices_reset();
 
-    /* Reset APIC after devices have been reset to cancel
-     * any changes that qemu_devices_reset() might have done.
-     */
     CPU_FOREACH(cs) {
         cpu = X86_CPU(cs);
 
+        /* Reset APIC after devices have been reset to cancel
+         * any changes that qemu_devices_reset() might have done.
+         */
         if (cpu->apic_state) {
             device_reset(cpu->apic_state);
         }
+
+        if (boot_info.protected_mode && cpu_is_bsp(cpu)) {
+            reset_cpu(&cpu->env);
+        }
+    }
+
+    if (boot_info.protected_mode) {
+        setup_seg_desc_tables();
+        setup_page_tables();
+        setup_kernel_zero_page();
     }
 }
 
-- 
1.8.3.1

  parent reply	other threads:[~2016-06-17  8:20 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-17  8:14 [Qemu-devel] [RFC 0/9] Introduce light weight PC platform pc-lite Chao Peng
2016-06-17  8:14 ` [Qemu-devel] [RFC 1/9] acpi: introduce light weight ACPI PM emulation pm-lite Chao Peng
2016-06-17  8:14 ` [Qemu-devel] [RFC 2/9] pci: introduce light weight PCIE Host emulation pci-lite Chao Peng
2016-06-17  8:14 ` [Qemu-devel] [RFC 3/9] acpi: add support for pc-lite platform Chao Peng
2016-06-17  8:14 ` [Qemu-devel] [RFC 4/9] acpi: expose data structurs and functions of BIOS linker loader Chao Peng
2016-06-17  8:14 ` [Qemu-devel] [RFC 5/9] acpi: expose acpi_checksum() Chao Peng
2016-06-17  8:14 ` [Qemu-devel] [RFC 6/9] acpi: patch guest ACPI for pc-lite Chao Peng
2016-06-17  8:14 ` [Qemu-devel] [RFC 7/9] pc: skip setting CMOS data when RTC device is unavailable Chao Peng
2016-06-17  8:14 ` Chao Peng [this message]
2016-06-17  8:14 ` [Qemu-devel] [RFC 9/9] pc: introduce light weight PC board pc-lite Chao Peng
2016-06-17 13:24 ` [Qemu-devel] [RFC 0/9] Introduce light weight PC platform pc-lite Paolo Bonzini
2016-06-20  6:01   ` Chao Peng
2016-06-20  6:54     ` Paolo Bonzini
2016-06-20 12:31       ` Stefan Hajnoczi
2016-06-20 13:00         ` Paolo Bonzini
2016-06-21  1:23       ` Chao Peng
2016-06-21 16:44       ` Michael S. Tsirkin
2016-06-23  8:32       ` Chao Peng
2016-06-23 12:44         ` Paolo Bonzini
2016-06-24  6:39           ` Claudio Fontana
2016-06-24  6:41             ` Claudio Fontana
2016-06-24  6:53               ` Paolo Bonzini
2016-06-27  2:55                 ` Claudio Fontana
2016-06-28  9:27           ` Chao Peng
2016-06-20 10:36   ` Dr. David Alan Gilbert
2016-06-19  3:51 ` Michael S. Tsirkin
2016-06-20  6:12   ` Chao Peng
2016-06-23 12:55     ` Daniel P. Berrange
2016-06-28 10:10       ` Chao Peng
2016-06-28 10:26         ` Daniel P. Berrange
2016-06-19  8:21 ` Claudio Fontana
2016-06-20  6:30   ` Chao Peng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1466151257-96318-9-git-send-email-chao.p.peng@linux.intel.com \
    --to=chao.p.peng@linux.intel.com \
    --cc=ehabkost@redhat.com \
    --cc=guangrong.xiao@linux.intel.com \
    --cc=haozhong.zhang@intel.com \
    --cc=imammedo@redhat.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).