qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: Liam Merwick <liam.merwick@oracle.com>,
	George Kennedy <George.Kennedy@oracle.com>
Subject: [Qemu-devel] [PULL 14/76] pvh: Boot uncompressed kernel using direct boot ABI
Date: Tue,  5 Feb 2019 19:14:24 +0100	[thread overview]
Message-ID: <1549390526-24246-15-git-send-email-pbonzini@redhat.com> (raw)
In-Reply-To: <1549390526-24246-1-git-send-email-pbonzini@redhat.com>

From: Liam Merwick <liam.merwick@oracle.com>

These changes (along with corresponding Linux kernel and qboot changes)
enable a guest to be booted using the x86/HVM direct boot ABI.

This commit adds a load_elfboot() routine to pass the size and
location of the kernel entry point to qboot (which will fill in
the start_info struct information needed to to boot the guest).
Having loaded the ELF binary, load_linux() will run qboot
which continues the boot.

The address for the kernel entry point is read from an ELF Note
in the uncompressed kernel binary by a helper routine passed
to load_elf().

Co-developed-by: George Kennedy <George.Kennedy@oracle.com>
Signed-off-by: George Kennedy <George.Kennedy@oracle.com>
Signed-off-by: Liam Merwick <liam.merwick@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/i386/pc.c  | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/elf.h |  10 +++++
 2 files changed, 145 insertions(+)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 1690b19..e39ef5c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -54,6 +54,7 @@
 #include "sysemu/qtest.h"
 #include "kvm_i386.h"
 #include "hw/xen/xen.h"
+#include "hw/xen/start_info.h"
 #include "ui/qemu-spice.h"
 #include "exec/memory.h"
 #include "exec/address-spaces.h"
@@ -110,6 +111,9 @@ static struct e820_entry *e820_table;
 static unsigned e820_entries;
 struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
 
+/* Physical Address of PVH entry point read from kernel ELF NOTE */
+static size_t pvh_start_addr;
+
 GlobalProperty pc_compat_3_1[] = {
     { "intel-iommu", "dma-drain", "off" },
     { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" },
@@ -1069,6 +1073,109 @@ struct setup_data {
     uint8_t data[0];
 } __attribute__((packed));
 
+
+/*
+ * The entry point into the kernel for PVH boot is different from
+ * the native entry point.  The PVH entry is defined by the x86/HVM
+ * direct boot ABI and is available in an ELFNOTE in the kernel binary.
+ *
+ * This function is passed to load_elf() when it is called from
+ * load_elfboot() which then additionally checks for an ELF Note of
+ * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to
+ * parse the PVH entry address from the ELF Note.
+ *
+ * Due to trickery in elf_opts.h, load_elf() is actually available as
+ * load_elf32() or load_elf64() and this routine needs to be able
+ * to deal with being called as 32 or 64 bit.
+ *
+ * The address of the PVH entry point is saved to the 'pvh_start_addr'
+ * global variable.  (although the entry point is 32-bit, the kernel
+ * binary can be either 32-bit or 64-bit).
+ */
+static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64)
+{
+    size_t *elf_note_data_addr;
+
+    /* Check if ELF Note header passed in is valid */
+    if (arg1 == NULL) {
+        return 0;
+    }
+
+    if (is64) {
+        struct elf64_note *nhdr64 = (struct elf64_note *)arg1;
+        uint64_t nhdr_size64 = sizeof(struct elf64_note);
+        uint64_t phdr_align = *(uint64_t *)arg2;
+        uint64_t nhdr_namesz = nhdr64->n_namesz;
+
+        elf_note_data_addr =
+            ((void *)nhdr64) + nhdr_size64 +
+            QEMU_ALIGN_UP(nhdr_namesz, phdr_align);
+    } else {
+        struct elf32_note *nhdr32 = (struct elf32_note *)arg1;
+        uint32_t nhdr_size32 = sizeof(struct elf32_note);
+        uint32_t phdr_align = *(uint32_t *)arg2;
+        uint32_t nhdr_namesz = nhdr32->n_namesz;
+
+        elf_note_data_addr =
+            ((void *)nhdr32) + nhdr_size32 +
+            QEMU_ALIGN_UP(nhdr_namesz, phdr_align);
+    }
+
+    pvh_start_addr = *elf_note_data_addr;
+
+    return pvh_start_addr;
+}
+
+static bool load_elfboot(const char *kernel_filename,
+                   int kernel_file_size,
+                   uint8_t *header,
+                   size_t pvh_xen_start_addr,
+                   FWCfgState *fw_cfg)
+{
+    uint32_t flags = 0;
+    uint32_t mh_load_addr = 0;
+    uint32_t elf_kernel_size = 0;
+    uint64_t elf_entry;
+    uint64_t elf_low, elf_high;
+    int kernel_size;
+
+    if (ldl_p(header) != 0x464c457f) {
+        return false; /* no elfboot */
+    }
+
+    bool elf_is64 = header[EI_CLASS] == ELFCLASS64;
+    flags = elf_is64 ?
+        ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags;
+
+    if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */
+        error_report("elfboot unsupported flags = %x", flags);
+        exit(1);
+    }
+
+    uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY;
+    kernel_size = load_elf(kernel_filename, read_pvh_start_addr,
+                           NULL, &elf_note_type, &elf_entry,
+                           &elf_low, &elf_high, 0, I386_ELF_MACHINE,
+                           0, 0);
+
+    if (kernel_size < 0) {
+        error_report("Error while loading elf kernel");
+        exit(1);
+    }
+    mh_load_addr = elf_low;
+    elf_kernel_size = elf_high - elf_low;
+
+    if (pvh_start_addr == 0) {
+        error_report("Error loading uncompressed kernel without PVH ELF Note");
+        exit(1);
+    }
+    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size);
+
+    return true;
+}
+
 static void load_linux(PCMachineState *pcms,
                        FWCfgState *fw_cfg)
 {
@@ -1108,6 +1215,34 @@ static void load_linux(PCMachineState *pcms,
     if (ldl_p(header+0x202) == 0x53726448) {
         protocol = lduw_p(header+0x206);
     } else {
+        /*
+         * Check if the file is an uncompressed kernel file (ELF) and load it,
+         * saving the PVH entry point used by the x86/HVM direct boot ABI.
+         * If load_elfboot() is successful, populate the fw_cfg info.
+         */
+        if (load_elfboot(kernel_filename, kernel_size,
+                         header, pvh_start_addr, fw_cfg)) {
+            struct hvm_modlist_entry ramdisk_mod = { 0 };
+
+            fclose(f);
+
+            fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE,
+                strlen(kernel_cmdline) + 1);
+            fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
+
+            assert(machine->device_memory != NULL);
+            ramdisk_mod.paddr = machine->device_memory->base;
+            ramdisk_mod.size =
+                memory_region_size(&machine->device_memory->mr);
+
+            fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, &ramdisk_mod,
+                             sizeof(ramdisk_mod));
+            fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header));
+            fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA,
+                             header, sizeof(header));
+
+            return;
+        }
         /* This looks like a multiboot kernel. If it is, let's stop
            treating it like a Linux kernel. */
         if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename,
diff --git a/include/elf.h b/include/elf.h
index e816fb4..b35347e 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -1640,6 +1640,16 @@ typedef struct elf64_shdr {
 #define NT_ARM_HW_WATCH 0x403           /* ARM hardware watchpoint registers */
 #define NT_ARM_SYSTEM_CALL      0x404   /* ARM system call number */
 
+/*
+ * Physical entry point into the kernel.
+ *
+ * 32bit entry point into the kernel. When requested to launch the
+ * guest kernel, use this entry point to launch the guest in 32-bit
+ * protected mode with paging disabled.
+ *
+ * [ Corresponding definition in Linux kernel: include/xen/interface/elfnote.h ]
+ */
+#define XEN_ELFNOTE_PHYS32_ENTRY    18  /* 0x12 */
 
 /* Note header in a PT_NOTE section */
 typedef struct elf32_note {
-- 
1.8.3.1

  parent reply	other threads:[~2019-02-05 18:16 UTC|newest]

Thread overview: 100+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-05 18:14 [Qemu-devel] [PULL v2 00/76] misc patches for 2019-02-04 Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 01/76] vhost-user-test: use g_cond_broadcast Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 02/76] vhost-user-test: signal data_cond when s->rings changes Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 03/76] vhost-user-test: support VHOST_USER_PROTOCOL_F_CROSS_ENDIAN Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 04/76] vhost-user-test: skip if there is no memory at address 0 Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 05/76] vhost-user-test: reduce usage of global_qtest Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 06/76] tap: flush STDOUT on newline Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 07/76] cpu-exec: add assert_no_pages_locked() after longjmp Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 08/76] cpu-exec: reset BQL after longjmp in cpu_exec_step_atomic Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 09/76] configure: Add a proper check for openpty() in libutil Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 10/76] cpus: ignore ESRCH in qemu_cpu_kick_thread() Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 11/76] elf: Add optional function ptr to load_elf() to parse ELF notes Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 12/76] elf-ops.h: Add get_elf_note_type() Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 13/76] pvh: Add x86/HVM direct boot ABI header file Paolo Bonzini
2019-03-01 16:53   ` Alex Bennée
2019-03-01 17:13     ` Stefano Garzarella
2019-03-01 17:56       ` Alex Bennée
2019-03-02 10:17         ` Stefano Garzarella
2019-02-05 18:14 ` Paolo Bonzini [this message]
2019-02-05 18:14 ` [Qemu-devel] [PULL 15/76] pvh: load initrd and expose it through fw_cfg Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 16/76] linuxboot_dma: remove duplicate definitions of FW_CFG Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 17/76] linuxboot_dma: move common functions in a new header Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 18/76] optionrom: add new PVH option rom Paolo Bonzini
2021-03-19 14:06   ` Philippe Mathieu-Daudé
2021-03-19 14:37     ` Stefano Garzarella
2021-03-19 15:51     ` Stefano Garzarella
2021-03-19 17:03     ` Paolo Bonzini
2021-03-19 17:35       ` Stefano Garzarella
2021-03-19 17:52         ` Paolo Bonzini
2021-03-19 18:20           ` Stefano Garzarella
2021-03-19 18:25             ` Stefano Garzarella
2021-03-22 10:59               ` Stefano Garzarella
2021-03-22 11:52                 ` Paolo Bonzini
2021-03-22 13:57                   ` Stefano Garzarella
2021-03-22 16:26                     ` Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 19/76] hw/i386/pc: use " Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 20/76] optionrom/pvh: load initrd from fw_cfg Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 21/76] hw/i386/pc: enable PVH only for machine type >= 4.0 Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 22/76] i386: remove the new CPUID 'PCONFIG' from Icelake-Server CPU model Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 23/76] i386: remove the 'INTEL_PT' CPUID bit from named CPU models Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 24/76] Revert "i386: Add CPUID bit for PCONFIG" Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 25/76] target-i386: hvf: remove MPX support Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 26/76] contrib/elf2dmp: fix elf.h including Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 27/76] contrib/elf2dmp: use GLib in ELF processing Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 28/76] contrib/elf2dmp: use GLib in PDB processing Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 29/76] contrib/elf2dmp: fix structures definitions Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 31/76] configure: enable elf2dmp build for Windows hosts Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 32/76] monitor: do not use QTAILQ_FOREACH_SAFE across critical sections Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 33/76] crypto: finish removing TABs Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 34/76] ui: vnc: " Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 35/76] unify len and addr type for memory/address APIs Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 36/76] hw/core/loader.c: Read as long as possible in load_image_size() Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 37/76] i386: import & use bootparam.h Paolo Bonzini
2019-02-14 16:40   ` Peter Maydell
2019-02-14 16:50     ` Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 38/76] i386: allow to load initrd below 4 GB for recent linux Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 39/76] docker: adjust Xen repository for CentOS 7 Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 40/76] i386: hvf: Don't miss 16-bit displacement Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 41/76] gdbstub: Fix i386/x86_64 machine description and add control registers Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 42/76] scsi-generic: avoid possible out-of-bounds access to r->buf Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 43/76] Remove deprecated -enable-hax option Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 44/76] qemu-options: Remove deprecated "-virtioconsole" option Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 45/76] arm: disable CONFIG_SERIAL_ISA Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 46/76] ide: split ioport registration to a separate file Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 47/76] vfio: move conditional up to hw/Makefile.objs Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 48/76] hw/pci-host/Makefile.objs: make CONFIGS clear for PCI EXPRESS Paolo Bonzini
2019-02-05 18:14 ` [Qemu-devel] [PULL 49/76] build: actually use CONFIG_PAM Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 50/76] hw/i386/Makefile.objs: Build pc_piix* and pc_q35 boards Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 51/76] hw/arm/Makefile.objs: CONFIG_ARM_VIRT created for virt board Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 52/76] hw/m68k/Makefile.objs: Conditionally build boards Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 53/76] hw/microblaze/Makefile.objs: Create configs for petalogix and xilinx boards Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 54/76] hw/mips/Makefile.objs: Create CONFIG_* for r4k, malta, mipssim boards Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 55/76] hw/ppc/Makefile.objs: Build all boards conditinally with CONFIG_* Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 56/76] hw/sh4/Makefile.objs: New CONFIG_* varibales created for sh4 boards and device Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 57/76] hw/s390/Makefile.objs: Create new CONFIG_* variables for s390x boards and devices Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 58/76] hw/sparc/Makefile.objs: CONFIG_* for sun4m and leon3 created Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 59/76] hw/lm32/Makefile.objs: Conditionally build lm32 and milkmyst Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 60/76] hw/xtensa/Makefile.objs: Build xtensa_sim and xtensa_fpga conditionally Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 61/76] hw/nios2/Makefile.objs: Conditionally build nios2 Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 62/76] hw/riscv/Makefile.objs: Create CONFIG_* for riscv boards Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 63/76] hw/sparc64/Makefile.objs: Create CONFIG_* for sparc64 Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 64/76] hw/alpha/Makefile.objs: Create CONFIG_* for alpha Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 65/76] hw/cris/Makefile.objs: Create CONFIG_* for cris Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 66/76] hw/hppa/Makefile.objs: Create CONFIG_* for hppa Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 67/76] hw/moxie/Makefile.objs: Conditionally build moxie Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 68/76] hw/openrisc/Makefile.objs: Create CONFIG_* for openrisc Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 69/76] hw/tricore/Makefile.objs: Create CONFIG_* for tricore Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 70/76] hw/i2c/Makefile.objs: Create new CONFIG_* variables for EEPROM and ACPI controller Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 71/76] hw/vfio/Makefile.objs: Create new CONFIG_* variables for VFIO core and PCI Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 72/76] i386: hvf: Fix smp boot hangs Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 73/76] pc: Use hotplug_handler_(plug|unplug|unplug_request) Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 74/76] scsi-disk: Convert from DPRINTF() macro to trace events Paolo Bonzini
2019-02-05 18:15 ` [Qemu-devel] [PULL 76/76] queue: fix QTAILQ_FOREACH_REVERSE_SAFE Paolo Bonzini
2019-02-05 20:01 ` [Qemu-devel] [PULL v2 00/76] misc patches for 2019-02-04 Peter Maydell
2019-02-05 20:05   ` Peter Maydell
2019-02-05 20:26     ` Paolo Bonzini
2019-02-11 16:29     ` David Hildenbrand
2019-02-11 16:33       ` Peter Maydell
2019-02-11 16:50         ` David Hildenbrand
2019-02-07 10:34 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1549390526-24246-15-git-send-email-pbonzini@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=George.Kennedy@oracle.com \
    --cc=liam.merwick@oracle.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).