From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1LU7Vb-0006Xu-1J for qemu-devel@nongnu.org; Mon, 02 Feb 2009 17:43:15 -0500 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1LU7Va-0006XF-7c for qemu-devel@nongnu.org; Mon, 02 Feb 2009 17:43:14 -0500 Received: from [199.232.76.173] (port=37387 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1LU7Va-0006X3-1R for qemu-devel@nongnu.org; Mon, 02 Feb 2009 17:43:14 -0500 Received: from yw-out-1718.google.com ([74.125.46.152]:11414) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1LU7VZ-0007Ys-JZ for qemu-devel@nongnu.org; Mon, 02 Feb 2009 17:43:13 -0500 Received: by yw-out-1718.google.com with SMTP id 6so819894ywa.82 for ; Mon, 02 Feb 2009 14:43:12 -0800 (PST) Message-ID: <498776EC.1050208@codemonkey.ws> Date: Mon, 02 Feb 2009 16:42:52 -0600 From: Anthony Liguori MIME-Version: 1.0 Subject: Re: [Qemu-devel] [PATCH] Add multi-boot kernel loading support References: <49873680.2040603@exactcode.de> In-Reply-To: <49873680.2040603@exactcode.de> Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 8bit Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: =?ISO-8859-1?Q?Ren=E9_Rebe?= , Alexander Graf Rene Rebe wrote: > Hi all, > > Alexander Graf implemented multi-boot kernel loading during > his work to run Darwin inside Qemu/KVM. As the boot loader > expects to load the kernel in an EFI environment a custom > booter is used to load the kernel using a legacy BIOS. > > This is a port of the patch to the new extload / INT 19 > machinery (including minor cleanups). > > Signed-off-by: René Rebe > > Index: elf_ops.h > =================================================================== > --- elf_ops.h (revision 6501) > +++ elf_ops.h (working copy) > @@ -195,6 +195,10 @@ > } > > if (ELF_MACHINE != ehdr.e_machine) > +#if (ELF_MACHINE == EM_X86_64) && !defined(CONFIG_USER_ONLY) > + /* x86_64 systems can run i386 code as well */ > + if(ehdr.e_machine != EM_386) > +#endif > goto fail; > > if (pentry) Seems like an unrelated fix. > Index: hw/pc.c > =================================================================== > --- hw/pc.c (revision 6501) > +++ hw/pc.c (working copy) > @@ -533,6 +533,438 @@ > return size; > } > > +/* Generate an initial boot sector which sets state and jump to > + a specified vector */ > +static void generate_bootsect_multiboot(uint8_t *option_rom, > + uint32_t mh_entry_addr, > uint32_t bootinfo) > +{ > + uint8_t rom[512], *p, *reloc, *pgdt, *pmmaploop; > + uint8_t sum; > + uint32_t ip; > + int i; > + int mmaploop; > + > + memset(rom, 0, sizeof(rom)); > + > + p = rom; > + /* Make sure we have an option rom signature */ > + *p++ = 0x55; > + *p++ = 0xaa; > + > + /* ROM size in sectors*/ > + *p++ = 1; > + > + /* Hook int19 */ > + > + *p++ = 0x50; /* push ax */ > + *p++ = 0x1e; /* push ds */ > + *p++ = 0x31; *p++ = 0xc0; /* xor ax, ax */ > + *p++ = 0x8e; *p++ = 0xd8; /* mov ax, ds */ > + > + *p++ = 0xc7; *p++ = 0x06; /* movvw _start,0x64 */ > + *p++ = 0x64; *p++ = 0x00; > + reloc = p; > + *p++ = 0x00; *p++ = 0x00; > + > + *p++ = 0x8c; *p++ = 0x0e; /* mov cs,0x66 */ > + *p++ = 0x66; *p++ = 0x00; > + > + *p++ = 0x1f; /* pop ds */ > + *p++ = 0x58; /* pop ax */ > + *p++ = 0xcb; /* lret */ > + /* Actual code */ > + *reloc = (p - rom); The above could be refactored to common code. > + *p++ = 0xfa; /* CLI */ > + *p++ = 0xfc; /* CLD */ > + > + /* 660f011528000000 lgdt [0x28] */ > + *p++ = 0x66; /* 32-bit operand size */ > + *p++ = 0x67; /* 32-bit addr size */ > + *p++ = 0x0f; /* LGDT [0x128] */ > + *p++ = 0x01; > + *p++ = 0x15; > + pgdt=p; /* we calculate the gdt position later */ > + p+=4; > + > + /* Initialize multiboot mmap structs using the 0x15(e820) */ > + *p++ = 0x31; /* XOR BX,BX */ > + *p++ = 0xdb; > + > + *p++ = 0x66; /* 32-bit operand size */ > + *p++ = 0x67; /* 32-bit addr size */ > + *p++ = 0xbf; /* MOV EDI,0x9004 */ > + *p++ = 0x04; > + *p++ = 0x90; > + *p++ = 0x00; > + *p++ = 0x00; > + > + pmmaploop = p; > + *p++ = 0x66; /* 32-bit operand size */ > + *p++ = 0x67; /* 32-bit addr size */ > + *p++ = 0xb8; /* MOV EAX,0x20 */ > + *p++ = 0x20; > + *p++ = 0x00; > + *p++ = 0x00; > + *p++ = 0x00; > + > + *p++ = 0x66; /* 32-bit operand size */ > + *p++ = 0x67; /* 32-bit addr size */ > + *p++ = 0x89; /* MOV -4(EDI),EAX */ > + *p++ = 0x47; > + *p++ = 0xfc; > + > + *p++ = 0x66; /* 32-bit operand size */ > + *p++ = 0x67; /* 32-bit addr size */ > + *p++ = 0xb8; /* MOV EAX,0x0000e820 */ > + *p++ = 0x20; > + *p++ = 0xe8; > + *p++ = 0x00; > + *p++ = 0x00; > + > + *p++ = 0x66; /* 32-bit operand size */ > + *p++ = 0x67; /* 32-bit addr size */ > + *p++ = 0xba; /* MOV EDX,0x534d4150 */ > + *p++ = 0x50; > + *p++ = 0x41; > + *p++ = 0x4d; > + *p++ = 0x53; > + > + *p++ = 0x66; /* 32-bit operand size */ > + *p++ = 0x67; /* 32-bit addr size */ > + *p++ = 0xb9; /* MOV ECX,0x20 */ > + *p++ = 0x20; > + *p++ = 0x00; > + *p++ = 0x00; > + *p++ = 0x00; > + > + *p++ = 0xcd; /* INT 0x15 */ > + *p++ = 0x15; > + > + *p++ = 0x66; /* 32-bit operand size */ > + *p++ = 0x67; /* 32-bit addr size */ > + *p++ = 0xb8; /* MOV EAX, 0x24 */ > + *p++ = 0x24; > + *p++ = 0x00; > + *p++ = 0x00; > + *p++ = 0x00; > + > + *p++ = 0xf7; /* MUL AX, BX */ > + *p++ = 0xe3; > + > + *p++ = 0x66; /* 32-bit operand size */ > + *p++ = 0x67; /* 32-bit addr size */ > + *p++ = 0x21; /* AND EBX, EBX */ > + *p++ = 0xdb; > + > + /* don't store if bx = 0 */ > + *p++ = 0x66; /* 32-bit operand size */ > + *p++ = 0x67; /* 32-bit addr size */ > + *p++ = 0x0f; /* JZ next instruction */ > + *p++ = 0x84; > + *p++ = 0x07; > + *p++ = 0x00; > + *p++ = 0x00; > + *p++ = 0x00; > + > + /* store the amount of blocks in the bootinfo struct */ > + *p++ = 0x66; /* 32-bit operand size */ > + *p++ = 0x67; /* 32-bit addr size */ > + *p++ = 0xa3; /* MOV [bootinfo+0x2c], EAX */ > + *p++ = (bootinfo+0x2c); > + *p++ = (bootinfo+0x2c) >> 8; > + *p++ = (bootinfo+0x2c) >> 16; > + *p++ = (bootinfo+0x2c) >> 24; > + > + *p++ = 0x66; /* 32-bit operand size */ > + *p++ = 0x67; /* 32-bit addr size */ > + *p++ = 0x05; /* ADD EAX, 0x9004 */ > + *p++ = 0x04; > + *p++ = 0x90; > + *p++ = 0x00; > + *p++ = 0x00; > + > + *p++ = 0x89; /* MOV DI, AX */ > + *p++ = 0xc7; > + > + *p++ = 0x66; /* 32-bit operand size */ > + *p++ = 0x67; /* 32-bit addr size */ > + *p++ = 0x21; /* AND EBX, EBX */ > + *p++ = 0xdb; > + > + /* process next entry */ > + *p++ = 0x66; /* 32-bit operand size */ > + *p++ = 0x67; /* 32-bit addr size */ > + *p++ = 0x0f; /* JNZ mmaploop */ > + *p++ = 0x85; > + mmaploop = (int)((long)pmmaploop) - ((long)p) - 4; > + *p++ = mmaploop; > + *p++ = mmaploop >> 8; > + *p++ = mmaploop >> 16; > + *p++ = mmaploop >> 24; > + > + /* get us to protected mode now */ > + > + *p++ = 0x66; > + *p++ = 0xb8; /* MOV EAX,0x01 */ > + *p++ = 0x01; > + *p++ = 0x00; > + *p++ = 0x00; > + *p++ = 0x00; > + > + *p++ = 0x0f; /* MOV CR0,EAX */ > + *p++ = 0x22; > + *p++ = 0xc0; > + > + /* the JMP sets CS for us and gets us to 32-bit */ > + ip = 0x000d0000 + (p - rom) + 8; /* set i to the IP after the JMP */ > + *p++ = 0x66; /* 32-bit operand size */ > + *p++ = 0xea; /* JMP */ > + *p++ = ip; /* IP */ > + *p++ = ip >> 8; > + *p++ = ip >> 16; > + *p++ = ip >> 24; > + *p++ = 0x08; > + *p++ = 0x00; > + > + /* initialize all other segments */ > + *p++ = 0xb8; /* MOV EAX,0x10 */ > + *p++ = 0x10; > + *p++ = 0x00; > + *p++ = 0x00; > + *p++ = 0x00; > + for (i = 0; i < 6; i++) { > + if (i == 1) /* Skip CS */ > + continue; > + > + *p++ = 0x8e; /* MOV ,EAX */ > + *p++ = 0xc0 + (i << 3); > + } > + > + /* EBX contains a pointer to the bootinfo struct */ > + *p++ = 0xbb; /* MOV EBX,imm32 */ > + *p++ = bootinfo; > + *p++ = bootinfo >> 8; > + *p++ = bootinfo >> 16; > + *p++ = bootinfo >> 24; > + > + /* EAX has to contain the following magic */ > + *p++ = 0xb8; /* MOV EAX,0x2badb002 */ > + *p++ = 0x02; > + *p++ = 0xb0; > + *p++ = 0xad; > + *p++ = 0x2b; > + > + /* Jump off to the kernel */ > + *p++ = 0xea; /* JMP */ > + *p++ = mh_entry_addr; /* IP */ > + *p++ = mh_entry_addr >> 8; > + *p++ = mh_entry_addr >> 16; > + *p++ = mh_entry_addr >> 24; > + *p++ = 0x08; > + *p++ = 0x00; > + > + { /* GDT loading */ > + uint32_t gdt_base = 0x000d0000 + (p - rom); /* 0x00007c00 is > the first IP */ > + uint32_t gdtr = gdt_base + 0x28; > + uint8_t gdt[] = { /* GDT base: 0x00000100 */ > + /* 0x00 */ > + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, > + /* 0x08: code segment (base=0, limit=0xfffff, type=32bit > code exec/read, DPL=0, 4k) */ > + 0xff, 0xff, 0x00, 0x00, 0x00, 0x9a, 0xcf, 0x00, > + /* 0x10: data segment (base=0, limit=0xfffff, type=32bit > data read/write, DPL=0, 4k) */ > + 0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0xcf, 0x00, > + /* 0x18: code segment (base=0, limit=0x0ffff, type=16bit > code exec/read/conf, DPL=0, 1b) */ > + 0xff, 0xff, 0x00, 0x00, 0x00, 0x9e, 0x00, 0x00, > + /* 0x20: data segment (base=0, limit=0x0ffff, type=16bit > data read/write, DPL=0, 1b) */ > + 0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00, > + /* 0x28: gdtdesc */ > + 0x27, 0x00, gdt_base, gdt_base >> 8, gdt_base >> 16, > gdt_base >> 24 > + }; > + > + memcpy(p, gdt, sizeof(gdt)); > + p+=sizeof(gdt); > + *pgdt++ = gdtr; > + *pgdt++ = gdtr >> 8; > + *pgdt++ = gdtr >> 16; > + *pgdt++ = gdtr >> 24; > + } It was already pushing the limit to embed the linux loader. This is a bit too much. Perhaps we should split it to a separate .S file? I guess we could apply it as is but I'd like to see this code (along with the Linux code) moved to another file at least. > + fprintf(stderr, "qemu: multiboot loader code is %d bytes > long.\n", (int)(p-rom)); > + > + /* sign rom */ > + sum = 0; > + for (i = 0; i < (sizeof(rom) - 1); i++) > + sum += rom[i]; > + rom[sizeof(rom) - 1] = -sum; > + > + memcpy(option_rom, rom, sizeof(rom)); This too could be common code. > +} > + > +static int load_multiboot(uint8_t *option_rom, > + FILE *f, > + const char *kernel_filename, > + const char *initrd_filename, > + const char *kernel_cmdline, > + uint8_t *header) > +{ > + int i, is_multiboot = 0; > + uint32_t flags = 0; > + uint32_t mh_entry_addr; > + uint32_t mh_load_addr; > + uint32_t mb_kernel_size; > + uint32_t mb_bootinfo = 0x90000; > + uint32_t tmp_size; > + > + /* Ok, let's see if it is a multiboot image */ > + for(i = 0; i < 8144; i += 4) { /* the header is 12x32bit long, so */ > + /* the latest entry may be 8192 - > 48 */ > + if(ldl_p(header+i) == 0x1BADB002) { > + uint32_t checksum = ldl_p(header+i+8); > + flags = ldl_p(header+i+4); > + checksum += flags; > + checksum += (uint32_t)0x1BADB002; > + if(!checksum) { > + is_multiboot = 1; > + break; > + } > + } > + } > + > + if(!is_multiboot) return 0; /* no multiboot */ > + fprintf(stderr, "qemu: I believe we found a multiboot image!\n"); > + > + if(flags & 0x00000004) { /* MULTIBOOT_HEADER_HAS_VBE */ > + fprintf(stderr, "qemu: multiboot knows VBE. we don't.\n"); > + } > + if(!(flags & 0x00010000)) { /* MULTIBOOT_HEADER_HAS_ADDR */ > + uint64_t elf_entry; > + int kernel_size; > + fclose(f); > + kernel_size = load_elf(kernel_filename, 0, &elf_entry, NULL, > NULL); > + if(kernel_size < 0) { > + fprintf(stderr, "Error while loading elf kernel\n"); > + exit(1); > + } > + mh_load_addr = mh_entry_addr = elf_entry; > + mb_kernel_size = kernel_size; > + > + fprintf(stderr, "qemu: loading multiboot-elf kernel (%#x > bytes) with entry %#zx\n", > + mb_kernel_size, (size_t)mh_entry_addr); > + } else { > + /* Valid if mh_flags sets MULTIBOOT_HEADER_HAS_ADDR. */ > + uint32_t mh_header_addr = ldl_p(header+i+12); > + mh_load_addr = ldl_p(header+i+16); Mixing variable definitions and code. > + uint32_t mh_load_end_addr = ldl_p(header+i+20); > + uint32_t mh_bss_end_addr = ldl_p(header+i+24); > + uint8_t *mb_kernel_addr = phys_ram_base + (mh_load_addr); > + uint32_t mb_kernel_text_offset = i - (mh_header_addr - > mh_load_addr); > + > + mh_entry_addr = ldl_p(header+i+28); > + mb_kernel_size = get_file_size(f) - mb_kernel_text_offset; > + > + /* Valid if mh_flags sets MULTIBOOT_HEADER_HAS_VBE. > + uint32_t mh_mode_type = ldl_p(header+i+32); > + uint32_t mh_width = ldl_p(header+i+36); > + uint32_t mh_height = ldl_p(header+i+40); > + uint32_t mh_depth = ldl_p(header+i+44); */ > + > + fprintf(stderr, "multiboot: mh_header_addr = %#x\n", > mh_header_addr); > + fprintf(stderr, "multiboot: mh_load_addr = %#x\n", > mh_load_addr); > + fprintf(stderr, "multiboot: mh_load_end_addr = %#x\n", > mh_load_end_addr); > + fprintf(stderr, "multiboot: mh_bss_end_addr = %#x\n", > mh_bss_end_addr); It's a bit chatty by default. > + fseek(f, mb_kernel_text_offset, SEEK_SET); > + > + fprintf(stderr, "qemu: loading multiboot kernel (%#x bytes) > at %#zx\n", > + mb_kernel_size, mb_kernel_addr - phys_ram_base); > + > + if ((tmp_size=fread(mb_kernel_addr, 1, mb_kernel_size, f)) != > mb_kernel_size) { > + fprintf(stderr, "qemu: read error on multiboot kernel > '%s' (%#x != %#x)\n", kernel_filename, tmp_size, mb_kernel_size); > + exit(1); > + } > + fclose(f); > + } > + > + > + /* load modules */ > + > + stl_p(phys_ram_base + mb_bootinfo + 20, 0x0); /* mods_count */ > + if(initrd_filename) { > + uint32_t mb_mod_info = mb_bootinfo + 0x100; > + uint32_t mb_mod_cmdline = mb_bootinfo+ 0x300; > + uint32_t mb_mod_start = mh_load_addr; > + uint32_t mb_mod_length = mb_kernel_size; > + char *next_initrd; > + char *next_space; > + int mb_mod_count = 0; > + > + do { > + next_initrd = strchr(initrd_filename, ','); > + if(next_initrd) > + *next_initrd = '\0'; > + /* if a space comes after the module filename, treat > everything after that as parameters */ > + strcpy(phys_ram_base + mb_mod_cmdline, initrd_filename); > + stl_p(phys_ram_base + mb_mod_info + 8, mb_mod_cmdline); > /* string */ > + mb_mod_cmdline += strlen(initrd_filename) + 1; > + if(next_space = strchr(initrd_filename, ' ')) > + *next_space = '\0'; > +printf("multiboot loading module: %s\n", initrd_filename); Weird indent. What all uses multiboot at this point? I know Xen does. Some additional documentation would be nice too. Regards, Anthony Liguori