qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Anthony Liguori <anthony@codemonkey.ws>
To: qemu-devel@nongnu.org
Cc: "René Rebe" <rene@exactcode.de>, "Alexander Graf" <agraf@suse.de>
Subject: Re: [Qemu-devel] [PATCH] Add multi-boot kernel loading support
Date: Mon, 02 Feb 2009 16:42:52 -0600	[thread overview]
Message-ID: <498776EC.1050208@codemonkey.ws> (raw)
In-Reply-To: <49873680.2040603@exactcode.de>

Rene Rebe wrote:
> Hi all,
>
> Alexander Graf implemented multi-boot kernel loading during
> his work to run Darwin inside Qemu/KVM. As the boot loader
> expects to load the kernel in an EFI environment a custom
> booter is used to load the kernel using a legacy BIOS.
>
> This is a port of the patch to the new extload / INT 19
> machinery (including minor cleanups).
>
> Signed-off-by: René Rebe <rene@exactcode.de>
>
> Index: elf_ops.h
> ===================================================================
> --- elf_ops.h    (revision 6501)
> +++ elf_ops.h    (working copy)
> @@ -195,6 +195,10 @@
>      }
>
>      if (ELF_MACHINE != ehdr.e_machine)
> +#if (ELF_MACHINE == EM_X86_64) && !defined(CONFIG_USER_ONLY)
> +      /* x86_64 systems can run i386 code as well */
> +      if(ehdr.e_machine != EM_386)
> +#endif
>          goto fail;
>
>      if (pentry)

Seems like an unrelated fix.

> Index: hw/pc.c
> ===================================================================
> --- hw/pc.c    (revision 6501)
> +++ hw/pc.c    (working copy)
> @@ -533,6 +533,438 @@
>      return size;
>  }
>
> +/* Generate an initial boot sector which sets state and jump to
> +   a specified vector */
> +static void generate_bootsect_multiboot(uint8_t *option_rom,
> +                                        uint32_t mh_entry_addr, 
> uint32_t bootinfo)
> +{
> +    uint8_t rom[512], *p, *reloc, *pgdt, *pmmaploop;
> +    uint8_t sum;
> +    uint32_t ip;
> +    int i;
> +    int mmaploop;
> +
> +    memset(rom, 0, sizeof(rom));
> +
> +    p = rom;
> +    /* Make sure we have an option rom signature */
> +    *p++ = 0x55;
> +    *p++ = 0xaa;
> +
> +    /* ROM size in sectors*/
> +    *p++ = 1;
> +
> +    /* Hook int19 */
> +
> +    *p++ = 0x50;                /* push ax */
> +    *p++ = 0x1e;                /* push ds */
> +    *p++ = 0x31; *p++ = 0xc0;   /* xor ax, ax */
> +    *p++ = 0x8e; *p++ = 0xd8;   /* mov ax, ds */
> +
> +    *p++ = 0xc7; *p++ = 0x06;   /* movvw _start,0x64 */
> +    *p++ = 0x64; *p++ = 0x00;
> +    reloc = p;
> +    *p++ = 0x00; *p++ = 0x00;
> +
> +    *p++ = 0x8c; *p++ = 0x0e;   /* mov cs,0x66 */
> +    *p++ = 0x66; *p++ = 0x00;
> +
> +    *p++ = 0x1f;                /* pop ds */
> +    *p++ = 0x58;                /* pop ax */
> +    *p++ = 0xcb;                /* lret */
> +    /* Actual code */
> +    *reloc = (p - rom);

The above could be refactored to common code.

> +    *p++ = 0xfa;                /* CLI */
> +    *p++ = 0xfc;                /* CLD */
> +
> +    /* 660f011528000000            lgdt [0x28] */
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0x0f;                /* LGDT [0x128] */
> +    *p++ = 0x01;
> +    *p++ = 0x15;
> +    pgdt=p; /* we calculate the gdt position later */
> +    p+=4;
> +
> +    /* Initialize multiboot mmap structs using the 0x15(e820) */
> +    *p++ = 0x31;                /* XOR BX,BX */
> +    *p++ = 0xdb;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0xbf;                /* MOV EDI,0x9004 */
> +    *p++ = 0x04;
> +    *p++ = 0x90;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +
> +    pmmaploop = p;
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0xb8;                /* MOV EAX,0x20 */
> +    *p++ = 0x20;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0x89;                /* MOV -4(EDI),EAX */
> +    *p++ = 0x47;
> +    *p++ = 0xfc;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0xb8;                /* MOV EAX,0x0000e820 */
> +    *p++ = 0x20;
> +    *p++ = 0xe8;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0xba;                /* MOV EDX,0x534d4150 */
> +    *p++ = 0x50;
> +    *p++ = 0x41;
> +    *p++ = 0x4d;
> +    *p++ = 0x53;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0xb9;                /* MOV ECX,0x20 */
> +    *p++ = 0x20;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +
> +    *p++ = 0xcd;                /* INT 0x15 */
> +    *p++ = 0x15;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0xb8;                /* MOV EAX, 0x24 */
> +    *p++ = 0x24;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +
> +    *p++ = 0xf7;                /* MUL AX, BX */
> +    *p++ = 0xe3;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0x21;                /* AND EBX, EBX */
> +    *p++ = 0xdb;
> +
> +    /* don't store if bx = 0 */
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0x0f;                /* JZ next instruction */
> +    *p++ = 0x84;
> +    *p++ = 0x07;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +
> +    /* store the amount of blocks in the bootinfo struct */
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0xa3;                /* MOV [bootinfo+0x2c], EAX */
> +    *p++ = (bootinfo+0x2c);
> +    *p++ = (bootinfo+0x2c) >> 8;
> +    *p++ = (bootinfo+0x2c) >> 16;
> +    *p++ = (bootinfo+0x2c) >> 24;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0x05;                /* ADD EAX, 0x9004 */
> +    *p++ = 0x04;
> +    *p++ = 0x90;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +
> +    *p++ = 0x89;                /* MOV DI, AX */
> +    *p++ = 0xc7;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0x21;                /* AND EBX, EBX */
> +    *p++ = 0xdb;
> +
> +    /* process next entry */
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0x0f;                /* JNZ mmaploop */
> +    *p++ = 0x85;
> +    mmaploop = (int)((long)pmmaploop) - ((long)p) - 4;
> +    *p++ = mmaploop;
> +    *p++ = mmaploop >> 8;
> +    *p++ = mmaploop >> 16;
> +    *p++ = mmaploop >> 24;
> +
> +    /* get us to protected mode now */
> +
> +    *p++ = 0x66;
> +    *p++ = 0xb8;                /* MOV EAX,0x01 */
> +    *p++ = 0x01;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +
> +    *p++ = 0x0f;                /* MOV CR0,EAX */
> +    *p++ = 0x22;
> +    *p++ = 0xc0;
> +
> +    /* the JMP sets CS for us and gets us to 32-bit */
> +    ip = 0x000d0000 + (p - rom) + 8; /* set i to the IP after the JMP */
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0xea;                /* JMP */
> +    *p++ = ip;        /* IP */
> +    *p++ = ip >> 8;
> +    *p++ = ip >> 16;
> +    *p++ = ip >> 24;
> +    *p++ = 0x08;
> +    *p++ = 0x00;
> +
> +    /* initialize all other segments */
> +    *p++ = 0xb8;                /* MOV EAX,0x10 */
> +    *p++ = 0x10;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +    for (i = 0; i < 6; i++) {
> +        if (i == 1)                /* Skip CS */
> +            continue;
> +
> +        *p++ = 0x8e;                /* MOV <seg>,EAX */
> +        *p++ = 0xc0 + (i << 3);
> +    }
> +
> +    /* EBX contains a pointer to the bootinfo struct */
> +    *p++ = 0xbb;                /* MOV EBX,imm32 */
> +    *p++ = bootinfo;
> +    *p++ = bootinfo >> 8;
> +    *p++ = bootinfo >> 16;
> +    *p++ = bootinfo >> 24;
> +
> +    /* EAX has to contain the following magic */
> +    *p++ = 0xb8;                /* MOV EAX,0x2badb002 */
> +    *p++ = 0x02;
> +    *p++ = 0xb0;
> +    *p++ = 0xad;
> +    *p++ = 0x2b;
> +
> +    /* Jump off to the kernel */
> +    *p++ = 0xea;                /* JMP */
> +    *p++ = mh_entry_addr;        /* IP */
> +    *p++ = mh_entry_addr >> 8;
> +    *p++ = mh_entry_addr >> 16;
> +    *p++ = mh_entry_addr >> 24;
> +    *p++ = 0x08;
> +    *p++ = 0x00;
> +
> +    { /* GDT loading */
> +        uint32_t gdt_base = 0x000d0000 + (p - rom); /* 0x00007c00 is 
> the first IP */
> +        uint32_t gdtr = gdt_base + 0x28;
> +        uint8_t gdt[] = { /* GDT base: 0x00000100 */
> +            /* 0x00 */
> +            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +            /* 0x08: code segment (base=0, limit=0xfffff, type=32bit 
> code exec/read, DPL=0, 4k) */
> +            0xff, 0xff, 0x00, 0x00, 0x00, 0x9a, 0xcf, 0x00,
> +            /* 0x10: data segment (base=0, limit=0xfffff, type=32bit 
> data read/write, DPL=0, 4k) */
> +            0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0xcf, 0x00,
> +            /* 0x18: code segment (base=0, limit=0x0ffff, type=16bit 
> code exec/read/conf, DPL=0, 1b) */
> +            0xff, 0xff, 0x00, 0x00, 0x00, 0x9e, 0x00, 0x00,
> +            /* 0x20: data segment (base=0, limit=0x0ffff, type=16bit 
> data read/write, DPL=0, 1b) */
> +            0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00,
> +            /* 0x28: gdtdesc */
> +            0x27, 0x00, gdt_base, gdt_base >> 8, gdt_base >> 16, 
> gdt_base >> 24
> +        };
> +
> +        memcpy(p, gdt, sizeof(gdt));
> +        p+=sizeof(gdt);
> +        *pgdt++ = gdtr;
> +        *pgdt++ = gdtr >> 8;
> +        *pgdt++ = gdtr >> 16;
> +        *pgdt++ = gdtr >> 24;
> +    }

It was already pushing the limit to embed the linux loader.  This is a 
bit too much.  Perhaps we should split it to a separate .S file?  I 
guess we could apply it as is but I'd like to see this code (along with 
the Linux code) moved to another file at least.

> +    fprintf(stderr, "qemu: multiboot loader code is %d bytes 
> long.\n", (int)(p-rom));
> +
> +    /* sign rom */
> +    sum = 0;
> +    for (i = 0; i < (sizeof(rom) - 1); i++)
> +        sum += rom[i];
> +    rom[sizeof(rom) - 1] = -sum;
> +
> +    memcpy(option_rom, rom, sizeof(rom));

This too could be common code.

> +}
> +
> +static int load_multiboot(uint8_t *option_rom,
> +                          FILE *f,
> +                          const char *kernel_filename,
> +                          const char *initrd_filename,
> +                          const char *kernel_cmdline,
> +                          uint8_t *header)
> +{
> +    int i, is_multiboot = 0;
> +    uint32_t flags = 0;
> +    uint32_t mh_entry_addr;
> +    uint32_t mh_load_addr;
> +    uint32_t mb_kernel_size;
> +    uint32_t mb_bootinfo = 0x90000;
> +    uint32_t tmp_size;
> +
> +    /* Ok, let's see if it is a multiboot image */
> +    for(i = 0; i < 8144; i += 4) { /* the header is 12x32bit long, so */
> +                                   /* the latest entry may be 8192 - 
> 48 */
> +        if(ldl_p(header+i) == 0x1BADB002) {
> +            uint32_t checksum = ldl_p(header+i+8);
> +            flags = ldl_p(header+i+4);
> +            checksum += flags;
> +            checksum += (uint32_t)0x1BADB002;
> +            if(!checksum) {
> +                is_multiboot = 1;
> +                break;
> +            }
> +        }
> +    }
> +
> +    if(!is_multiboot) return 0; /* no multiboot */
> +    fprintf(stderr, "qemu: I believe we found a multiboot image!\n");
> +
> +    if(flags & 0x00000004) { /* MULTIBOOT_HEADER_HAS_VBE */
> +        fprintf(stderr, "qemu: multiboot knows VBE. we don't.\n");
> +    }
> +    if(!(flags & 0x00010000)) { /* MULTIBOOT_HEADER_HAS_ADDR */
> +        uint64_t elf_entry;
> +        int kernel_size;
> +        fclose(f);
> +        kernel_size = load_elf(kernel_filename, 0, &elf_entry, NULL, 
> NULL);
> +        if(kernel_size < 0) {
> +            fprintf(stderr, "Error while loading elf kernel\n");
> +            exit(1);
> +        }
> +        mh_load_addr = mh_entry_addr = elf_entry;
> +        mb_kernel_size = kernel_size;
> +
> +        fprintf(stderr, "qemu: loading multiboot-elf kernel (%#x 
> bytes) with entry %#zx\n",
> +                mb_kernel_size, (size_t)mh_entry_addr);
> +    } else {
> +        /* Valid if mh_flags sets MULTIBOOT_HEADER_HAS_ADDR. */
> +        uint32_t mh_header_addr = ldl_p(header+i+12);
> +        mh_load_addr = ldl_p(header+i+16);

Mixing variable definitions and code.

> +        uint32_t mh_load_end_addr = ldl_p(header+i+20);
> +        uint32_t mh_bss_end_addr = ldl_p(header+i+24);
> +        uint8_t *mb_kernel_addr = phys_ram_base + (mh_load_addr);
> +        uint32_t mb_kernel_text_offset = i - (mh_header_addr - 
> mh_load_addr);
> +
> +        mh_entry_addr = ldl_p(header+i+28);
> +        mb_kernel_size = get_file_size(f) - mb_kernel_text_offset;
> +
> +        /* Valid if mh_flags sets MULTIBOOT_HEADER_HAS_VBE.
> +        uint32_t mh_mode_type = ldl_p(header+i+32);
> +        uint32_t mh_width = ldl_p(header+i+36);
> +        uint32_t mh_height = ldl_p(header+i+40);
> +        uint32_t mh_depth = ldl_p(header+i+44); */
> +
> +        fprintf(stderr, "multiboot: mh_header_addr = %#x\n", 
> mh_header_addr);
> +        fprintf(stderr, "multiboot: mh_load_addr = %#x\n", 
> mh_load_addr);
> +        fprintf(stderr, "multiboot: mh_load_end_addr = %#x\n", 
> mh_load_end_addr);
> +        fprintf(stderr, "multiboot: mh_bss_end_addr = %#x\n", 
> mh_bss_end_addr);

It's a bit chatty by default.

> +        fseek(f, mb_kernel_text_offset, SEEK_SET);
> +
> +        fprintf(stderr, "qemu: loading multiboot kernel (%#x bytes) 
> at %#zx\n",
> +                mb_kernel_size, mb_kernel_addr - phys_ram_base);
> +
> +        if ((tmp_size=fread(mb_kernel_addr, 1, mb_kernel_size, f)) != 
> mb_kernel_size) {
> +            fprintf(stderr, "qemu: read error on multiboot kernel 
> '%s' (%#x != %#x)\n", kernel_filename, tmp_size, mb_kernel_size);
> +            exit(1);
> +        }
> +        fclose(f);
> +    }
> +
> +
> +    /* load modules */
> +
> +    stl_p(phys_ram_base + mb_bootinfo + 20, 0x0); /* mods_count */
> +    if(initrd_filename) {
> +        uint32_t mb_mod_info = mb_bootinfo + 0x100;
> +        uint32_t mb_mod_cmdline = mb_bootinfo+ 0x300;
> +        uint32_t mb_mod_start = mh_load_addr;
> +        uint32_t mb_mod_length = mb_kernel_size;
> +        char *next_initrd;
> +        char *next_space;
> +        int mb_mod_count = 0;
> +
> +        do {
> +            next_initrd = strchr(initrd_filename, ',');
> +            if(next_initrd)
> +                *next_initrd = '\0';
> +            /* if a space comes after the module filename, treat 
> everything after that as parameters */
> +            strcpy(phys_ram_base + mb_mod_cmdline, initrd_filename);
> +            stl_p(phys_ram_base + mb_mod_info + 8, mb_mod_cmdline); 
> /* string */
> +            mb_mod_cmdline += strlen(initrd_filename) + 1;
> +            if(next_space = strchr(initrd_filename, ' '))
> +                *next_space = '\0';
> +printf("multiboot loading module: %s\n", initrd_filename);

Weird indent.

What all uses multiboot at this point?  I know Xen does.

Some additional documentation would be nice too.

Regards,

Anthony Liguori

  parent reply	other threads:[~2009-02-02 22:43 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-02-02 18:08 [Qemu-devel] [PATCH] Add multi-boot kernel loading support Rene Rebe
2009-02-02 18:15 ` Nathan Froyd
2009-02-02 18:17   ` Alexander Graf
2009-02-02 18:25     ` Nathan Froyd
2009-02-02 22:42 ` Anthony Liguori [this message]
2009-02-03  8:15   ` Rene Rebe
2009-02-03  8:23     ` Alexander Graf
2009-02-03  8:27       ` Rene Rebe
2009-02-03  8:25   ` Alexander Graf
2009-02-03  8:29     ` Rene Rebe
2009-02-03 10:53     ` Jamie Lokier
2009-02-03 11:12       ` Alexander Graf
2009-02-03 12:58 ` Kevin Wolf
2009-02-03 13:57   ` Rene Rebe
2009-02-03 14:31     ` Kevin Wolf
2009-02-04 10:36       ` Rene Rebe
2009-02-04 13:05         ` Kevin Wolf
2009-02-03 13:01 ` Paul Brook
2009-02-03 14:00   ` Rene Rebe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=498776EC.1050208@codemonkey.ws \
    --to=anthony@codemonkey.ws \
    --cc=agraf@suse.de \
    --cc=qemu-devel@nongnu.org \
    --cc=rene@exactcode.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).