* Re: PATCH 1/3: Support booting relocatable kernels
2008-01-31 17:48 PATCH 0/3: Direct linux kernel boot for HVM Daniel P. Berrange
@ 2008-01-31 17:52 ` Daniel P. Berrange
2008-01-31 17:53 ` PATCH 2/3: Support boot of NON-relocatable kernels Daniel P. Berrange
2008-01-31 17:55 ` PATCH 3/3: XenD changes for HVM kernel boot Daniel P. Berrange
2 siblings, 0 replies; 4+ messages in thread
From: Daniel P. Berrange @ 2008-01-31 17:52 UTC (permalink / raw)
To: xen-devel
[-- Attachment #1: Type: text/plain, Size: 2122 bytes --]
This patch introduces the basic infrastructure for direct kernel
boot in the ioemu copy of QEMU. The current #ifdef disabled
code is actually obsolete wrt to upstream QEMU code. So this
is removed entirely. In its place I have imported the latest
upstream QEMU code. The QEMU code assumes that the guest RAM
is directly mapped into the QEMU process, so there were some
changes neccessary. Instead of strcpy/memcpy'ing the args
and kernel header into guest RAM, cpu_physical_memory_rw is
used. Intead of fread() the initrd and kernel into guest RAM
a helper function is used fread2guest which reads into a small
buffer and then uses cpu_physical_memory_rw.
NB in reading the following, Documentation/i386/boot.txt is
a useful reference for what's going on.
Next, instead of loading the kernel at 0x100000, this code
loads it at 0x200000. This is far enough away that there's
no risk of it overlapping with the HVM firmware image. If the
Linux kernel boot protocol is 0x205 or later, and the flag
at offset 0x234 in the kernel header is 1, then the guest
kernel was built with CONFIG_RELOCATABLE=y.
In this scenario we merely need to tell the kernel what address
it has been relocated to by writing 0x200000 into the kernel
header at offset 0x214. When switching from real mode into
protected mode the kernel will immediately start executing at
0x200000 and be happy with life. This should work for 2.6.20 or
later on i386, and 2.6.22 or later on x86_64.
This has been verified with Fedora 7 and Fedora 8 bare metal kernels
on i386 and x86_64 from the $TREE/images/pxeboot of the install trees.
pc.c | 352 ++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 265 insertions(+), 87 deletions(-)
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Regards,
Dan.
--
|=- Red Hat, Engineering, Emerging Technologies, Boston. +1 978 392 2496 -=|
|=- Perl modules: http://search.cpan.org/~danberr/ -=|
|=- Projects: http://freshmeat.net/~danielpb/ -=|
|=- GnuPG: 7D3B9505 F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 -=|
[-- Attachment #2: xen-hvm-kernel-boot-relocatable.patch --]
[-- Type: text/plain, Size: 12230 bytes --]
diff -rup xen-unstable-16606.orig/tools/ioemu/hw/pc.c xen-unstable-16606.new/tools/ioemu/hw/pc.c
--- xen-unstable-16606.orig/tools/ioemu/hw/pc.c 2007-12-17 17:52:29.000000000 -0500
+++ xen-unstable-16606.new/tools/ioemu/hw/pc.c 2007-12-17 17:53:51.000000000 -0500
@@ -31,10 +31,8 @@
#define VGABIOS_CIRRUS_FILENAME "vgabios-cirrus.bin"
#define LINUX_BOOT_FILENAME "linux_boot.bin"
-#define KERNEL_LOAD_ADDR 0x00100000
-#define INITRD_LOAD_ADDR 0x00600000
-#define KERNEL_PARAMS_ADDR 0x00090000
-#define KERNEL_CMDLINE_ADDR 0x00099000
+/* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables. */
+#define ACPI_DATA_SIZE 0x10000
static fdctrl_t *floppy_controller;
static RTCState *rtc_state;
@@ -363,36 +361,271 @@ void bochs_bios_init(void)
register_ioport_write(0x503, 1, 1, bochs_bios_write, NULL);
}
+/* Generate an initial boot sector which sets state and jump to
+ a specified vector */
+static void generate_bootsect(uint32_t gpr[8], uint16_t segs[6], uint16_t ip)
+{
+ uint8_t bootsect[512], *p;
+ int i;
+
+ if (bs_table[0] == NULL) {
+ fprintf(stderr, "A disk image must be given for 'hda' when booting "
+ "a Linux kernel\n");
+ exit(1);
+ }
+
+ memset(bootsect, 0, sizeof(bootsect));
+
+ /* Copy the MSDOS partition table if possible */
+ bdrv_read(bs_table[0], 0, bootsect, 1);
+
+ /* Make sure we have a partition signature */
+ bootsect[510] = 0x55;
+ bootsect[511] = 0xaa;
+
+ /* Actual code */
+ p = bootsect;
+ *p++ = 0xfa; /* CLI */
+ *p++ = 0xfc; /* CLD */
+
+ for (i = 0; i < 6; i++) {
+ if (i == 1) /* Skip CS */
+ continue;
-int load_kernel(const char *filename, uint8_t *addr,
- uint8_t *real_addr)
+ *p++ = 0xb8; /* MOV AX,imm16 */
+ *p++ = segs[i];
+ *p++ = segs[i] >> 8;
+ *p++ = 0x8e; /* MOV <seg>,AX */
+ *p++ = 0xc0 + (i << 3);
+ }
+
+ for (i = 0; i < 8; i++) {
+ *p++ = 0x66; /* 32-bit operand size */
+ *p++ = 0xb8 + i; /* MOV <reg>,imm32 */
+ *p++ = gpr[i];
+ *p++ = gpr[i] >> 8;
+ *p++ = gpr[i] >> 16;
+ *p++ = gpr[i] >> 24;
+ }
+
+ *p++ = 0xea; /* JMP FAR */
+ *p++ = ip; /* IP */
+ *p++ = ip >> 8;
+ *p++ = segs[1]; /* CS */
+ *p++ = segs[1] >> 8;
+
+ bdrv_set_boot_sector(bs_table[0], bootsect, sizeof(bootsect));
+}
+
+
+static long get_file_size(FILE *f)
{
- int fd, size;
- int setup_sects;
+ long where, size;
+
+ /* XXX: on Unix systems, using fstat() probably makes more sense */
+
+ where = ftell(f);
+ fseek(f, 0, SEEK_END);
+ size = ftell(f);
+ fseek(f, where, SEEK_SET);
- fd = open(filename, O_RDONLY | O_BINARY);
- if (fd < 0)
- return -1;
-
- /* load 16 bit code */
- if (read(fd, real_addr, 512) != 512)
- goto fail;
- setup_sects = real_addr[0x1F1];
- if (!setup_sects)
- setup_sects = 4;
- if (read(fd, real_addr + 512, setup_sects * 512) !=
- setup_sects * 512)
- goto fail;
-
- /* load 32 bit code */
- size = read(fd, addr, 16 * 1024 * 1024);
- if (size < 0)
- goto fail;
- close(fd);
return size;
- fail:
- close(fd);
- return -1;
+}
+
+static int fread2guest(target_phys_addr_t dst_addr, size_t nbytes, FILE *f)
+{
+ size_t offset = 0;
+ while (nbytes) {
+ uint8_t buf[4096];
+ size_t count = nbytes > sizeof(buf) ? sizeof(buf) : nbytes;
+ if (fread(buf, 1, count, f) != count)
+ return -1;
+
+ cpu_physical_memory_rw(dst_addr+offset, buf, count, 1);
+ offset += count;
+ nbytes -= count;
+ }
+ return 0;
+}
+
+static void load_linux(const char *kernel_filename,
+ const char *initrd_filename,
+ const char *kernel_cmdline)
+{
+ uint16_t protocol;
+ uint32_t gpr[8];
+ uint16_t seg[6];
+ uint16_t real_seg;
+ int setup_size, kernel_size, initrd_size, cmdline_size;
+ uint32_t initrd_max;
+ uint8_t header[1024];
+ target_phys_addr_t real_addr, reloc_prot_addr, prot_addr, cmdline_addr, initrd_addr;
+ size_t ncmdline;
+ FILE *f, *fi;
+
+ /* Align to 16 bytes as a paranoia measure */
+ cmdline_size = (strlen(kernel_cmdline)+16) & ~15;
+
+ /* load the kernel header */
+ f = fopen(kernel_filename, "rb");
+ if (!f || !(kernel_size = get_file_size(f)) ||
+ fread(header, 1, 1024, f) != 1024) {
+ fprintf(stderr, "qemu: could not load kernel '%s'\n",
+ kernel_filename);
+ exit(1);
+ }
+
+ /* kernel protocol version */
+ fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202));
+ if (ldl_p(header+0x202) == 0x53726448)
+ protocol = lduw_p(header+0x206);
+ else
+ protocol = 0;
+ fprintf(stderr, "header protocol: %x\n", protocol);
+ if (protocol < 0x200 || !(header[0x211] & 0x01)) {
+ /* Low kernel */
+ real_addr = 0x90000;
+ cmdline_addr = 0x9a000 - cmdline_size;
+ prot_addr = 0x10000;
+ reloc_prot_addr = prot_addr;
+ } else if (protocol < 0x202) {
+ /* High but ancient kernel */
+ real_addr = 0x90000;
+ cmdline_addr = 0x9a000 - cmdline_size;
+ prot_addr = 0x100000;
+ reloc_prot_addr = 0x200000;
+ } else {
+ /* High and recent kernel */
+ real_addr = 0x10000;
+ cmdline_addr = 0x20000;
+ prot_addr = 0x100000;
+ reloc_prot_addr = 0x200000;
+ }
+
+ fprintf(stderr,
+ "qemu: real_addr = %#zx\n"
+ "qemu: cmdline_addr = %#zx\n"
+ "qemu: prot_addr = %#zx\n",
+ real_addr,
+ cmdline_addr,
+ prot_addr);
+
+ /* highest address for loading the initrd */
+ if (protocol >= 0x203)
+ initrd_max = ldl_p(header+0x22c);
+ else
+ initrd_max = 0x37ffffff;
+
+ if (initrd_max >= ram_size-ACPI_DATA_SIZE)
+ initrd_max = ram_size-ACPI_DATA_SIZE-1;
+
+
+ /* kernel command line */
+ ncmdline = strlen(kernel_cmdline);
+ if (ncmdline > 4095) {
+ ncmdline = 4095;
+ ((uint8_t*)kernel_cmdline)[4095] = '\0';
+ }
+ fprintf(stderr, "qemu: kernel_cmdline: %#zx ('%s')\n", ncmdline, kernel_cmdline);
+ cpu_physical_memory_rw(cmdline_addr, (uint8_t*)kernel_cmdline, ncmdline+1, 1);
+
+ if (protocol >= 0x202) {
+ stl_p(header+0x228, cmdline_addr);
+ } else {
+ stw_p(header+0x20, 0xA33F);
+ stw_p(header+0x22, cmdline_addr-real_addr);
+ }
+
+ /* loader type */
+ /* High nybble = B reserved for Qemu; low nybble is revision number.
+ If this code is substantially changed, you may want to consider
+ incrementing the revision. */
+ if (protocol >= 0x200)
+ header[0x210] = 0xB0;
+
+ /* heap */
+ if (protocol >= 0x201) {
+ header[0x211] |= 0x80; /* CAN_USE_HEAP */
+ stw_p(header+0x224, cmdline_addr-real_addr-0x200);
+ }
+
+ /* load initrd */
+ if (initrd_filename) {
+ if (protocol < 0x200) {
+ fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n");
+ exit(1);
+ }
+
+ fi = fopen(initrd_filename, "rb");
+ if (!fi) {
+ fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
+ initrd_filename);
+ exit(1);
+ }
+
+ initrd_size = get_file_size(fi);
+ initrd_addr = ((initrd_max-initrd_size) & ~4095);
+
+ fprintf(stderr, "qemu: loading initrd (%#x bytes) at %#zx\n",
+ initrd_size, initrd_addr);
+
+ if (fread2guest(initrd_addr, initrd_size, fi) < 0) {
+ fprintf(stderr, "qemu: read error on initial ram disk '%s'\n",
+ initrd_filename);
+ exit(1);
+ }
+ fclose(fi);
+
+ stl_p(header+0x218, initrd_addr);
+ stl_p(header+0x21c, initrd_size);
+ }
+
+
+ setup_size = header[0x1f1];
+ if (setup_size == 0)
+ setup_size = 4;
+
+ setup_size = (setup_size+1)*512;
+ kernel_size -= setup_size; /* Size of protected-mode code */
+
+ /* Urgh, Xen's HVM firmware lives at 0x100000, but that's also the
+ * address Linux wants to start life at prior to relocatable support
+ */
+ if (prot_addr != reloc_prot_addr) {
+ if (protocol >= 0x205 && (header[0x234] & 1)) {
+ /* Relocatable automatically */
+ stl_p(header+0x214, reloc_prot_addr);
+ fprintf(stderr, "qemu: kernel is relocatable\n");
+ } else {
+ fprintf(stderr, "qemu: unable to load non-relocatable kernel\n");
+ exit(1);
+ }
+ }
+
+
+ fprintf(stderr, "qemu: loading kernel real mode (%#x bytes) at %#zx\n",
+ setup_size-1024, real_addr);
+ fprintf(stderr, "qemu: loading kernel protected mode (%#x bytes) at %#zx\n",
+ kernel_size, reloc_prot_addr);
+
+ /* store the finalized header and load the rest of the kernel */
+ cpu_physical_memory_rw(real_addr, header, 1024, 1);
+ if (fread2guest(real_addr+1024, setup_size-1024, f) < 0 ||
+ fread2guest(reloc_prot_addr, kernel_size, f) < 0) {
+ fprintf(stderr, "qemu: loading kernel protected mode (%#x bytes) at %#zx\n",
+ kernel_size, reloc_prot_addr);
+ exit(1);
+ }
+ fclose(f);
+
+ /* generate bootsector to set up the initial register state */
+ real_seg = (real_addr) >> 4;
+ seg[0] = seg[2] = seg[3] = seg[4] = seg[4] = real_seg;
+ seg[1] = real_seg+0x20; /* CS */
+ memset(gpr, 0, sizeof gpr);
+ gpr[4] = cmdline_addr-real_addr-16; /* SP (-16 is paranoia) */
+
+ generate_bootsect(gpr, seg, 0);
}
static void main_cpu_reset(void *opaque)
@@ -577,63 +810,8 @@ static void pc_init1(uint64_t ram_size,
bochs_bios_init();
-#ifndef CONFIG_DM
- if (linux_boot) {
- uint8_t bootsect[512];
- uint8_t old_bootsect[512];
-
- if (bs_table[0] == NULL) {
- fprintf(stderr, "A disk image must be given for 'hda' when booting a Linux kernel\n");
- exit(1);
- }
- snprintf(buf, sizeof(buf), "%s/%s", bios_dir, LINUX_BOOT_FILENAME);
- ret = load_image(buf, bootsect);
- if (ret != sizeof(bootsect)) {
- fprintf(stderr, "qemu: could not load linux boot sector '%s'\n",
- buf);
- exit(1);
- }
-
- if (bdrv_read(bs_table[0], 0, old_bootsect, 1) >= 0) {
- /* copy the MSDOS partition table */
- memcpy(bootsect + 0x1be, old_bootsect + 0x1be, 0x40);
- }
-
- bdrv_set_boot_sector(bs_table[0], bootsect, sizeof(bootsect));
-
- /* now we can load the kernel */
- ret = load_kernel(kernel_filename,
- phys_ram_base + KERNEL_LOAD_ADDR,
- phys_ram_base + KERNEL_PARAMS_ADDR);
- if (ret < 0) {
- fprintf(stderr, "qemu: could not load kernel '%s'\n",
- kernel_filename);
- exit(1);
- }
-
- /* load initrd */
- initrd_size = 0;
- if (initrd_filename) {
- initrd_size = load_image(initrd_filename, phys_ram_base + INITRD_LOAD_ADDR);
- if (initrd_size < 0) {
- fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
- initrd_filename);
- exit(1);
- }
- }
- if (initrd_size > 0) {
- stl_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x218, INITRD_LOAD_ADDR);
- stl_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x21c, initrd_size);
- }
- pstrcpy(phys_ram_base + KERNEL_CMDLINE_ADDR, 4096,
- kernel_cmdline);
- stw_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x20, 0xA33F);
- stw_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x22,
- KERNEL_CMDLINE_ADDR - KERNEL_PARAMS_ADDR);
- /* loader type */
- stw_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x210, 0x01);
- }
-#endif /* !CONFIG_DM */
+ if (linux_boot)
+ load_linux(kernel_filename, initrd_filename, kernel_cmdline);
if (pci_enabled) {
pci_bus = i440fx_init(&i440fx_state);
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: PATCH 2/3: Support boot of NON-relocatable kernels
2008-01-31 17:48 PATCH 0/3: Direct linux kernel boot for HVM Daniel P. Berrange
2008-01-31 17:52 ` PATCH 1/3: Support booting relocatable kernels Daniel P. Berrange
@ 2008-01-31 17:53 ` Daniel P. Berrange
2008-01-31 17:55 ` PATCH 3/3: XenD changes for HVM kernel boot Daniel P. Berrange
2 siblings, 0 replies; 4+ messages in thread
From: Daniel P. Berrange @ 2008-01-31 17:53 UTC (permalink / raw)
To: xen-devel
[-- Attachment #1: Type: text/plain, Size: 1929 bytes --]
This patch introduces a hack to make non-relocatable kernels
bootable too. Non-relocatable kernels absolutely want to run
at 0x100000 and are not at all happy about being at 0x200000.
Fortunately, thanks to crazy programs like LOADLIN, Linux has
a couple of hooks in its boot process which can be used to
play games. The 'code32_switch' hook is executed immediately
following the switch to protected mode. To quote the kernel
docs
[quote Documentation/i386/boot.txt]
code32_start:
A 32-bit flat-mode routine *jumped* to immediately after the
transition to protected mode, but before the kernel is
uncompressed. No segments, except CS, are set up; you should
set them up to KERNEL_DS (0x18) yourself.
After completing your hook, you should jump to the address
that was in this field before your boot loader overwrote it.
IMPORTANT: All the hooks are required to preserve %esp, %ebp, %esi and
%edi across invocation.
[/quote]
So, this patch installs a hook at 0x200000+kernel_size. The hook
is hand crafted assembly which sets up all the segments as needed,
then essentially does memmove(0x100000,0x200000,kernel_size) and
finally does an unconditional jmp to 0x100000.
Amazingly this actually really does work. It has been successfully
tested with RHEL-2.1 and Fedora Core 6 install kernels on i386, and
Fedora Core 6 and 7 kernels on x86_64.
pc.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 93 insertions(+), 2 deletions(-)
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Regards,
Dan.
--
|=- Red Hat, Engineering, Emerging Technologies, Boston. +1 978 392 2496 -=|
|=- Perl modules: http://search.cpan.org/~danberr/ -=|
|=- Projects: http://freshmeat.net/~danielpb/ -=|
|=- GnuPG: 7D3B9505 F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 -=|
[-- Attachment #2: xen-hvm-kernel-boot-nonrelocatable.patch --]
[-- Type: text/plain, Size: 4735 bytes --]
--- xen-unstable-16606.orig/tools/ioemu/hw/pc.c 2007-12-18 14:15:17.000000000 -0500
+++ xen-unstable-16606/tools/ioemu/hw/pc.c 2007-12-18 23:53:56.000000000 -0500
@@ -417,6 +417,90 @@ static void generate_bootsect(uint32_t g
bdrv_set_boot_sector(bs_table[0], bootsect, sizeof(bootsect));
}
+/*
+ * Evil helper for non-relocatable kernels
+ *
+ * So it works out like this:
+ *
+ * 0x100000 - Xen HVM firmware lives here. Kernel wants to boot here
+ *
+ * You can't both live there and HVM firmware is needed first, thus
+ * our plan is
+ *
+ * 0x200000 - kernel is loaded here by QEMU
+ * 0x200000+kernel_size - helper code is put here by QEMU
+ *
+ * code32_switch in kernel header is set to point at out helper
+ * code at 0x200000+kernel_size
+ *
+ * Our helper basically does memmove(0x100000,0x200000,kernel_size)
+ * and then jmps to 0x1000000.
+ *
+ * So we've overwritten the HVM firmware (which was no longer
+ * needed) and the non-relocatable kernel can happily boot
+ * at its usual address.
+ *
+ * Simple, eh ?
+ *
+ * Well the assembler needed to do this is fairly short:
+ *
+ * # Load segments
+ * cld
+ * cli
+ * movl $0x18,%eax
+ * mov %ax,%ds
+ * mov %ax,%es
+ * mov %ax,%fs
+ * mov %ax,%gs
+ * mov %ax,%ss
+ *
+ * # Move the kernel into position
+ * xor %edx,%edx
+ *_doloop:
+ * movzbl 0x600000(%edx),%eax
+ * mov %al,0x100000(%edx)
+ * add $0x1,%edx
+ * cmp $0x500000,%edx
+ * jne _doloop
+ *
+ * # start kernel
+ * xorl %ebx,%ebx
+ * mov $0x100000,%ecx
+ * jmp *%ecx
+ *
+ */
+static void setup_relocator(target_phys_addr_t addr, target_phys_addr_t src, target_phys_addr_t dst, size_t len)
+{
+ /* Now this assembler corresponds to follow machine code, with our args from QEMU spliced in :-) */
+ unsigned char buf[] = {
+ /* Load segments */
+ 0xfc, /* cld */
+ 0xfa, /* cli */
+ 0xb8, 0x18, 0x00, 0x00, 0x00, /* mov $0x18,%eax */
+ 0x8e, 0xd8, /* mov %eax,%ds */
+ 0x8e, 0xc0, /* mov %eax,%es */
+ 0x8e, 0xe0, /* mov %eax,%fs */
+ 0x8e, 0xe8, /* mov %eax,%gs */
+ 0x8e, 0xd0, /* mov %eax,%ss */
+ 0x31, 0xd2, /* xor %edx,%edx */
+
+ /* Move the kernel into position */
+ 0x0f, 0xb6, 0x82, (src&0xff), ((src>>8)&0xff), ((src>>16)&0xff), ((src>>24)&0xff), /* movzbl $src(%edx),%eax */
+ 0x88, 0x82, (dst&0xff), ((dst>>8)&0xff), ((dst>>16)&0xff), ((dst>>24)&0xff), /* mov %al,$dst(%edx) */
+ 0x83, 0xc2, 0x01, /* add $0x1,%edx */
+ 0x81, 0xfa, (len&0xff), ((len>>8)&0xff), ((len>>16)&0xff), ((len>>24)&0xff), /* cmp $len,%edx */
+ 0x75, 0xe8, /* jne 13 <_doloop> */
+
+ /* Start kernel */
+ 0x31, 0xdb, /* xor %ebx,%ebx */
+ 0xb9, (dst&0xff), ((dst>>8)&0xff), ((dst>>16)&0xff), ((dst>>24)&0xff), /* mov $dst,%ecx */
+ 0xff, 0xe1, /* jmp *%ecx */
+ };
+ cpu_physical_memory_rw(addr, buf, sizeof(buf), 1);
+ fprintf(stderr, "qemu: helper at 0x%x of size %d bytes, to move kernel of %d bytes from 0x%x to 0x%x\n",
+ (int)addr, (int)sizeof(buf), (int)len, (int)src, (int)dst);
+}
+
static long get_file_size(FILE *f)
{
@@ -597,8 +681,15 @@ static void load_linux(const char *kerne
stl_p(header+0x214, reloc_prot_addr);
fprintf(stderr, "qemu: kernel is relocatable\n");
} else {
- fprintf(stderr, "qemu: unable to load non-relocatable kernel\n");
- exit(1);
+ /* Setup a helper which moves kernel back to
+ * its expected addr after firmware has got out
+ * of the way. We put a helper at reloc_prot_addr+kernel_size.
+ * It moves kernel from reloc_prot_addr to prot_addr and
+ * then jumps to prot_addr. Yes this is sick.
+ */
+ fprintf(stderr, "qemu: kernel is NOT relocatable\n");
+ stl_p(header+0x214, reloc_prot_addr + kernel_size);
+ setup_relocator(reloc_prot_addr + kernel_size, reloc_prot_addr, prot_addr, kernel_size);
}
}
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: PATCH 3/3: XenD changes for HVM kernel boot
2008-01-31 17:48 PATCH 0/3: Direct linux kernel boot for HVM Daniel P. Berrange
2008-01-31 17:52 ` PATCH 1/3: Support booting relocatable kernels Daniel P. Berrange
2008-01-31 17:53 ` PATCH 2/3: Support boot of NON-relocatable kernels Daniel P. Berrange
@ 2008-01-31 17:55 ` Daniel P. Berrange
2 siblings, 0 replies; 4+ messages in thread
From: Daniel P. Berrange @ 2008-01-31 17:55 UTC (permalink / raw)
To: xen-devel
[-- Attachment #1: Type: text/plain, Size: 2564 bytes --]
This patch provides the tools support for direct kernel boot of
HVM guests. Currently the config files in /etc/xen support the
args 'kernel', 'ramdisk' and 'extra'. For PV guests these have
the obvious meaning.
Unfortunately HVM guest configs hijacked the 'kernel' parameter
and use it to refer to the path of the HVM firmware. So, this
patch adds a new config file parameter called 'loader' which is
used to refer to the HVM firmware instead.
The conventions for loading the initrd image say that it should
live at the end of memory. This requires QEMU to know the size
of the guest's initial RAM allocation, so image.py is changed
to pass the '-m' flag to QEMU.
The HVMImageHandler class in image.py is changed so that if the
'kernel', 'ramdisk' or 'extra' params were given in the config
these are passed to QEMU with the '-kernel', '-initrd' and
'-append' flags respectively.
Finally, the 'loader' param is used as the arg to 'xc_hvm_build'
instead of the old 'kernel' param.
For the sake of compatability with old HVM guest config files,
if the config file has a 'kernel' param whose path matches that
of the HVM firmware, then we automatically convert this 'kernel'
param into the 'loader' param. This ensures existing HVM guests
work without changes required.
For the purposes of testing, my guest looks like this:
name = "hvmdemo"
builder = "hvm"
memory = "500"
disk = [ "file:/var/lib/xen/images/hvmdemo.img,hda,w" ]
uuid = "0a696059-d2e8-2691-86e7-1daeed939649"
device_model = "/usr/lib/xen/bin/qemu-dm"
loader = "/usr/lib/xen/boot/hvmloader"
kernel = "/root/install/vmlinuz-f8-i386"
ramdisk = "/root/install/initrd.img-f8-i386"
extra = "console=ttyS0 console=tty0"
serial = "file:/tmp/hvmdemo.log"
vnc=1
vncunused=1
apic=0
acpi=0
pae=0
Note, here we demonstrate a useful advantage of direct kernel
boot by telling the guest kernel to send its output to the
first serial device, which we then connect to a file for logging.
xend/XendConfig.py | 13 ++++++++++++-
xend/image.py | 24 +++++++++++++++++++-----
xm/create.py | 6 ++++++
3 files changed, 37 insertions(+), 6 deletions(-)
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Regards,
Dan.
--
|=- Red Hat, Engineering, Emerging Technologies, Boston. +1 978 392 2496 -=|
|=- Perl modules: http://search.cpan.org/~danberr/ -=|
|=- Projects: http://freshmeat.net/~danielpb/ -=|
|=- GnuPG: 7D3B9505 F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 -=|
[-- Attachment #2: xen-hvm-kernel-boot-xend.patch --]
[-- Type: text/plain, Size: 5899 bytes --]
diff -r 5f997b5b8a58 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py Wed Jan 30 15:19:22 2008 +0000
+++ b/tools/python/xen/xend/XendConfig.py Thu Jan 31 12:50:30 2008 -0500
@@ -124,7 +124,7 @@ LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(
LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(XENAPI_CFG_TO_LEGACY_CFG)
# Platform configuration keys.
-XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 'boot', 'device_model', 'display',
+XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 'boot', 'device_model', 'loader', 'display',
'fda', 'fdb', 'keymap', 'isa', 'localtime', 'monitor',
'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl',
'soundhw','stdvga', 'usb', 'usbdevice', 'vnc',
@@ -404,6 +404,17 @@ class XendConfig(dict):
self['platform']['device_model'] = xen.util.auxbin.pathTo("qemu-dm")
if self.is_hvm():
+ if 'loader' not in self['platform']:
+ log.debug("No loader present")
+ # Old configs may have hvmloder set as PV_kernel param,
+ # so lets migrate them....
+ if self['PV_kernel'] == "/usr/lib/xen/boot/hvmloader":
+ self['platform']['loader'] = self['PV_kernel']
+ log.debug("Loader copied from kernel %s" % str(self['platform']['loader']))
+ else:
+ self['platform']['loader'] = "/usr/lib/xen/boot/hvmloader"
+ log.debug("Loader %s" % str(self['platform']['loader']))
+
# Compatibility hack, can go away soon.
if 'soundhw' not in self['platform'] and \
self['platform'].get('enable_audio'):
diff -r 5f997b5b8a58 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py Wed Jan 30 15:19:22 2008 +0000
+++ b/tools/python/xen/xend/image.py Thu Jan 31 12:50:30 2008 -0500
@@ -127,7 +127,7 @@ class ImageHandler:
"""
# Set params and call buildDomain().
- if not os.path.isfile(self.kernel):
+ if self.kernel and not os.path.isfile(self.kernel):
raise VmError('Kernel image does not exist: %s' % self.kernel)
if self.ramdisk and not os.path.isfile(self.ramdisk):
raise VmError('Kernel ramdisk does not exist: %s' % self.ramdisk)
@@ -186,6 +186,10 @@ class ImageHandler:
# xm config file
def parseDeviceModelArgs(self, vmConfig):
ret = ["-domain-name", str(self.vm.info['name_label'])]
+
+ # Tell QEMU how large the guest's memory allocation is
+ # to help it when loading the initrd (if neccessary)
+ ret += ["-m", str(self.getRequiredInitialReservation() / 1024)]
# Find RFB console device, and if it exists, make QEMU enable
# the VNC console.
@@ -420,8 +424,7 @@ class HVMImageHandler(ImageHandler):
def configure(self, vmConfig):
ImageHandler.configure(self, vmConfig)
- if not self.kernel:
- self.kernel = '/usr/lib/xen/boot/hvmloader'
+ self.loader = vmConfig['platform'].get('loader')
info = xc.xeninfo()
if 'hvm' not in info['xen_caps']:
@@ -445,6 +448,17 @@ class HVMImageHandler(ImageHandler):
def parseDeviceModelArgs(self, vmConfig):
ret = ImageHandler.parseDeviceModelArgs(self, vmConfig)
ret = ret + ['-vcpus', str(self.vm.getVCpuCount())]
+
+ if self.kernel and self.kernel != "/usr/lib/xen/boot/hvmloader":
+ log.debug("kernel = %s", self.kernel)
+ ret = ret + ['-kernel', self.kernel]
+ if self.ramdisk:
+ log.debug("ramdisk = %s", self.ramdisk)
+ ret = ret + ['-initrd', self.ramdisk]
+ if self.cmdline:
+ log.debug("cmdline = %s", self.cmdline)
+ ret = ret + ['-append', self.cmdline]
+
dmargs = [ 'boot', 'fda', 'fdb', 'soundhw',
'localtime', 'serial', 'stdvga', 'isa',
@@ -521,7 +535,7 @@ class HVMImageHandler(ImageHandler):
mem_mb = self.getRequiredInitialReservation() / 1024
log.debug("domid = %d", self.vm.getDomid())
- log.debug("image = %s", self.kernel)
+ log.debug("image = %s", self.loader)
log.debug("store_evtchn = %d", store_evtchn)
log.debug("memsize = %d", mem_mb)
log.debug("vcpus = %d", self.vm.getVCpuCount())
@@ -529,7 +543,7 @@ class HVMImageHandler(ImageHandler):
log.debug("apic = %d", self.apic)
rc = xc.hvm_build(domid = self.vm.getDomid(),
- image = self.kernel,
+ image = self.loader,
memsize = mem_mb,
vcpus = self.vm.getVCpuCount(),
acpi = self.acpi,
diff -r 5f997b5b8a58 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py Wed Jan 30 15:19:22 2008 +0000
+++ b/tools/python/xen/xm/create.py Thu Jan 31 12:50:30 2008 -0500
@@ -158,6 +158,10 @@ gopts.var('ramdisk', val='FILE',
fn=set_value, default='',
use="Path to ramdisk.")
+gopts.var('loader', val='FILE',
+ fn=set_value, default='',
+ use="Path to HVM firmware.")
+
gopts.var('features', val='FEATURES',
fn=set_value, default='',
use="Features to enable in guest kernel")
@@ -561,6 +565,8 @@ def configure_image(vals):
config_image.append([ 'kernel', os.path.abspath(vals.kernel) ])
if vals.ramdisk:
config_image.append([ 'ramdisk', os.path.abspath(vals.ramdisk) ])
+ if vals.loader:
+ config_image.append([ 'loader', os.path.abspath(vals.loader) ])
if vals.cmdline_ip:
cmdline_ip = strip('ip=', vals.cmdline_ip)
config_image.append(['ip', cmdline_ip])
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 4+ messages in thread