All of lore.kernel.org
 help / color / mirror / Atom feed
From: Anthony Liguori <anthony@codemonkey.ws>
To: qemu-devel@nongnu.org
Cc: "René Rebe" <rene@exactcode.de>, "Alexander Graf" <agraf@suse.de>
Subject: Re: [Qemu-devel] [PATCH] Add multi-boot kernel loading support
Date: Mon, 02 Feb 2009 16:42:52 -0600	[thread overview]
Message-ID: <498776EC.1050208@codemonkey.ws> (raw)
In-Reply-To: <49873680.2040603@exactcode.de>

Rene Rebe wrote:
> Hi all,
>
> Alexander Graf implemented multi-boot kernel loading during
> his work to run Darwin inside Qemu/KVM. As the boot loader
> expects to load the kernel in an EFI environment a custom
> booter is used to load the kernel using a legacy BIOS.
>
> This is a port of the patch to the new extload / INT 19
> machinery (including minor cleanups).
>
> Signed-off-by: René Rebe <rene@exactcode.de>
>
> Index: elf_ops.h
> ===================================================================
> --- elf_ops.h    (revision 6501)
> +++ elf_ops.h    (working copy)
> @@ -195,6 +195,10 @@
>      }
>
>      if (ELF_MACHINE != ehdr.e_machine)
> +#if (ELF_MACHINE == EM_X86_64) && !defined(CONFIG_USER_ONLY)
> +      /* x86_64 systems can run i386 code as well */
> +      if(ehdr.e_machine != EM_386)
> +#endif
>          goto fail;
>
>      if (pentry)

Seems like an unrelated fix.

> Index: hw/pc.c
> ===================================================================
> --- hw/pc.c    (revision 6501)
> +++ hw/pc.c    (working copy)
> @@ -533,6 +533,438 @@
>      return size;
>  }
>
> +/* Generate an initial boot sector which sets state and jump to
> +   a specified vector */
> +static void generate_bootsect_multiboot(uint8_t *option_rom,
> +                                        uint32_t mh_entry_addr, 
> uint32_t bootinfo)
> +{
> +    uint8_t rom[512], *p, *reloc, *pgdt, *pmmaploop;
> +    uint8_t sum;
> +    uint32_t ip;
> +    int i;
> +    int mmaploop;
> +
> +    memset(rom, 0, sizeof(rom));
> +
> +    p = rom;
> +    /* Make sure we have an option rom signature */
> +    *p++ = 0x55;
> +    *p++ = 0xaa;
> +
> +    /* ROM size in sectors*/
> +    *p++ = 1;
> +
> +    /* Hook int19 */
> +
> +    *p++ = 0x50;                /* push ax */
> +    *p++ = 0x1e;                /* push ds */
> +    *p++ = 0x31; *p++ = 0xc0;   /* xor ax, ax */
> +    *p++ = 0x8e; *p++ = 0xd8;   /* mov ax, ds */
> +
> +    *p++ = 0xc7; *p++ = 0x06;   /* movvw _start,0x64 */
> +    *p++ = 0x64; *p++ = 0x00;
> +    reloc = p;
> +    *p++ = 0x00; *p++ = 0x00;
> +
> +    *p++ = 0x8c; *p++ = 0x0e;   /* mov cs,0x66 */
> +    *p++ = 0x66; *p++ = 0x00;
> +
> +    *p++ = 0x1f;                /* pop ds */
> +    *p++ = 0x58;                /* pop ax */
> +    *p++ = 0xcb;                /* lret */
> +    /* Actual code */
> +    *reloc = (p - rom);

The above could be refactored to common code.

> +    *p++ = 0xfa;                /* CLI */
> +    *p++ = 0xfc;                /* CLD */
> +
> +    /* 660f011528000000            lgdt [0x28] */
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0x0f;                /* LGDT [0x128] */
> +    *p++ = 0x01;
> +    *p++ = 0x15;
> +    pgdt=p; /* we calculate the gdt position later */
> +    p+=4;
> +
> +    /* Initialize multiboot mmap structs using the 0x15(e820) */
> +    *p++ = 0x31;                /* XOR BX,BX */
> +    *p++ = 0xdb;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0xbf;                /* MOV EDI,0x9004 */
> +    *p++ = 0x04;
> +    *p++ = 0x90;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +
> +    pmmaploop = p;
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0xb8;                /* MOV EAX,0x20 */
> +    *p++ = 0x20;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0x89;                /* MOV -4(EDI),EAX */
> +    *p++ = 0x47;
> +    *p++ = 0xfc;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0xb8;                /* MOV EAX,0x0000e820 */
> +    *p++ = 0x20;
> +    *p++ = 0xe8;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0xba;                /* MOV EDX,0x534d4150 */
> +    *p++ = 0x50;
> +    *p++ = 0x41;
> +    *p++ = 0x4d;
> +    *p++ = 0x53;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0xb9;                /* MOV ECX,0x20 */
> +    *p++ = 0x20;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +
> +    *p++ = 0xcd;                /* INT 0x15 */
> +    *p++ = 0x15;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0xb8;                /* MOV EAX, 0x24 */
> +    *p++ = 0x24;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +
> +    *p++ = 0xf7;                /* MUL AX, BX */
> +    *p++ = 0xe3;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0x21;                /* AND EBX, EBX */
> +    *p++ = 0xdb;
> +
> +    /* don't store if bx = 0 */
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0x0f;                /* JZ next instruction */
> +    *p++ = 0x84;
> +    *p++ = 0x07;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +
> +    /* store the amount of blocks in the bootinfo struct */
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0xa3;                /* MOV [bootinfo+0x2c], EAX */
> +    *p++ = (bootinfo+0x2c);
> +    *p++ = (bootinfo+0x2c) >> 8;
> +    *p++ = (bootinfo+0x2c) >> 16;
> +    *p++ = (bootinfo+0x2c) >> 24;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0x05;                /* ADD EAX, 0x9004 */
> +    *p++ = 0x04;
> +    *p++ = 0x90;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +
> +    *p++ = 0x89;                /* MOV DI, AX */
> +    *p++ = 0xc7;
> +
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0x21;                /* AND EBX, EBX */
> +    *p++ = 0xdb;
> +
> +    /* process next entry */
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0x67;                /* 32-bit addr size */
> +    *p++ = 0x0f;                /* JNZ mmaploop */
> +    *p++ = 0x85;
> +    mmaploop = (int)((long)pmmaploop) - ((long)p) - 4;
> +    *p++ = mmaploop;
> +    *p++ = mmaploop >> 8;
> +    *p++ = mmaploop >> 16;
> +    *p++ = mmaploop >> 24;
> +
> +    /* get us to protected mode now */
> +
> +    *p++ = 0x66;
> +    *p++ = 0xb8;                /* MOV EAX,0x01 */
> +    *p++ = 0x01;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +
> +    *p++ = 0x0f;                /* MOV CR0,EAX */
> +    *p++ = 0x22;
> +    *p++ = 0xc0;
> +
> +    /* the JMP sets CS for us and gets us to 32-bit */
> +    ip = 0x000d0000 + (p - rom) + 8; /* set i to the IP after the JMP */
> +    *p++ = 0x66;                /* 32-bit operand size */
> +    *p++ = 0xea;                /* JMP */
> +    *p++ = ip;        /* IP */
> +    *p++ = ip >> 8;
> +    *p++ = ip >> 16;
> +    *p++ = ip >> 24;
> +    *p++ = 0x08;
> +    *p++ = 0x00;
> +
> +    /* initialize all other segments */
> +    *p++ = 0xb8;                /* MOV EAX,0x10 */
> +    *p++ = 0x10;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +    *p++ = 0x00;
> +    for (i = 0; i < 6; i++) {
> +        if (i == 1)                /* Skip CS */
> +            continue;
> +
> +        *p++ = 0x8e;                /* MOV <seg>,EAX */
> +        *p++ = 0xc0 + (i << 3);
> +    }
> +
> +    /* EBX contains a pointer to the bootinfo struct */
> +    *p++ = 0xbb;                /* MOV EBX,imm32 */
> +    *p++ = bootinfo;
> +    *p++ = bootinfo >> 8;
> +    *p++ = bootinfo >> 16;
> +    *p++ = bootinfo >> 24;
> +
> +    /* EAX has to contain the following magic */
> +    *p++ = 0xb8;                /* MOV EAX,0x2badb002 */
> +    *p++ = 0x02;
> +    *p++ = 0xb0;
> +    *p++ = 0xad;
> +    *p++ = 0x2b;
> +
> +    /* Jump off to the kernel */
> +    *p++ = 0xea;                /* JMP */
> +    *p++ = mh_entry_addr;        /* IP */
> +    *p++ = mh_entry_addr >> 8;
> +    *p++ = mh_entry_addr >> 16;
> +    *p++ = mh_entry_addr >> 24;
> +    *p++ = 0x08;
> +    *p++ = 0x00;
> +
> +    { /* GDT loading */
> +        uint32_t gdt_base = 0x000d0000 + (p - rom); /* 0x00007c00 is 
> the first IP */
> +        uint32_t gdtr = gdt_base + 0x28;
> +        uint8_t gdt[] = { /* GDT base: 0x00000100 */
> +            /* 0x00 */
> +            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +            /* 0x08: code segment (base=0, limit=0xfffff, type=32bit 
> code exec/read, DPL=0, 4k) */
> +            0xff, 0xff, 0x00, 0x00, 0x00, 0x9a, 0xcf, 0x00,
> +            /* 0x10: data segment (base=0, limit=0xfffff, type=32bit 
> data read/write, DPL=0, 4k) */
> +            0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0xcf, 0x00,
> +            /* 0x18: code segment (base=0, limit=0x0ffff, type=16bit 
> code exec/read/conf, DPL=0, 1b) */
> +            0xff, 0xff, 0x00, 0x00, 0x00, 0x9e, 0x00, 0x00,
> +            /* 0x20: data segment (base=0, limit=0x0ffff, type=16bit 
> data read/write, DPL=0, 1b) */
> +            0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00,
> +            /* 0x28: gdtdesc */
> +            0x27, 0x00, gdt_base, gdt_base >> 8, gdt_base >> 16, 
> gdt_base >> 24
> +        };
> +
> +        memcpy(p, gdt, sizeof(gdt));
> +        p+=sizeof(gdt);
> +        *pgdt++ = gdtr;
> +        *pgdt++ = gdtr >> 8;
> +        *pgdt++ = gdtr >> 16;
> +        *pgdt++ = gdtr >> 24;
> +    }

It was already pushing the limit to embed the linux loader.  This is a 
bit too much.  Perhaps we should split it to a separate .S file?  I 
guess we could apply it as is but I'd like to see this code (along with 
the Linux code) moved to another file at least.

> +    fprintf(stderr, "qemu: multiboot loader code is %d bytes 
> long.\n", (int)(p-rom));
> +
> +    /* sign rom */
> +    sum = 0;
> +    for (i = 0; i < (sizeof(rom) - 1); i++)
> +        sum += rom[i];
> +    rom[sizeof(rom) - 1] = -sum;
> +
> +    memcpy(option_rom, rom, sizeof(rom));

This too could be common code.

> +}
> +
> +static int load_multiboot(uint8_t *option_rom,
> +                          FILE *f,
> +                          const char *kernel_filename,
> +                          const char *initrd_filename,
> +                          const char *kernel_cmdline,
> +                          uint8_t *header)
> +{
> +    int i, is_multiboot = 0;
> +    uint32_t flags = 0;
> +    uint32_t mh_entry_addr;
> +    uint32_t mh_load_addr;
> +    uint32_t mb_kernel_size;
> +    uint32_t mb_bootinfo = 0x90000;
> +    uint32_t tmp_size;
> +
> +    /* Ok, let's see if it is a multiboot image */
> +    for(i = 0; i < 8144; i += 4) { /* the header is 12x32bit long, so */
> +                                   /* the latest entry may be 8192 - 
> 48 */
> +        if(ldl_p(header+i) == 0x1BADB002) {
> +            uint32_t checksum = ldl_p(header+i+8);
> +            flags = ldl_p(header+i+4);
> +            checksum += flags;
> +            checksum += (uint32_t)0x1BADB002;
> +            if(!checksum) {
> +                is_multiboot = 1;
> +                break;
> +            }
> +        }
> +    }
> +
> +    if(!is_multiboot) return 0; /* no multiboot */
> +    fprintf(stderr, "qemu: I believe we found a multiboot image!\n");
> +
> +    if(flags & 0x00000004) { /* MULTIBOOT_HEADER_HAS_VBE */
> +        fprintf(stderr, "qemu: multiboot knows VBE. we don't.\n");
> +    }
> +    if(!(flags & 0x00010000)) { /* MULTIBOOT_HEADER_HAS_ADDR */
> +        uint64_t elf_entry;
> +        int kernel_size;
> +        fclose(f);
> +        kernel_size = load_elf(kernel_filename, 0, &elf_entry, NULL, 
> NULL);
> +        if(kernel_size < 0) {
> +            fprintf(stderr, "Error while loading elf kernel\n");
> +            exit(1);
> +        }
> +        mh_load_addr = mh_entry_addr = elf_entry;
> +        mb_kernel_size = kernel_size;
> +
> +        fprintf(stderr, "qemu: loading multiboot-elf kernel (%#x 
> bytes) with entry %#zx\n",
> +                mb_kernel_size, (size_t)mh_entry_addr);
> +    } else {
> +        /* Valid if mh_flags sets MULTIBOOT_HEADER_HAS_ADDR. */
> +        uint32_t mh_header_addr = ldl_p(header+i+12);
> +        mh_load_addr = ldl_p(header+i+16);

Mixing variable definitions and code.

> +        uint32_t mh_load_end_addr = ldl_p(header+i+20);
> +        uint32_t mh_bss_end_addr = ldl_p(header+i+24);
> +        uint8_t *mb_kernel_addr = phys_ram_base + (mh_load_addr);
> +        uint32_t mb_kernel_text_offset = i - (mh_header_addr - 
> mh_load_addr);
> +
> +        mh_entry_addr = ldl_p(header+i+28);
> +        mb_kernel_size = get_file_size(f) - mb_kernel_text_offset;
> +
> +        /* Valid if mh_flags sets MULTIBOOT_HEADER_HAS_VBE.
> +        uint32_t mh_mode_type = ldl_p(header+i+32);
> +        uint32_t mh_width = ldl_p(header+i+36);
> +        uint32_t mh_height = ldl_p(header+i+40);
> +        uint32_t mh_depth = ldl_p(header+i+44); */
> +
> +        fprintf(stderr, "multiboot: mh_header_addr = %#x\n", 
> mh_header_addr);
> +        fprintf(stderr, "multiboot: mh_load_addr = %#x\n", 
> mh_load_addr);
> +        fprintf(stderr, "multiboot: mh_load_end_addr = %#x\n", 
> mh_load_end_addr);
> +        fprintf(stderr, "multiboot: mh_bss_end_addr = %#x\n", 
> mh_bss_end_addr);

It's a bit chatty by default.

> +        fseek(f, mb_kernel_text_offset, SEEK_SET);
> +
> +        fprintf(stderr, "qemu: loading multiboot kernel (%#x bytes) 
> at %#zx\n",
> +                mb_kernel_size, mb_kernel_addr - phys_ram_base);
> +
> +        if ((tmp_size=fread(mb_kernel_addr, 1, mb_kernel_size, f)) != 
> mb_kernel_size) {
> +            fprintf(stderr, "qemu: read error on multiboot kernel 
> '%s' (%#x != %#x)\n", kernel_filename, tmp_size, mb_kernel_size);
> +            exit(1);
> +        }
> +        fclose(f);
> +    }
> +
> +
> +    /* load modules */
> +
> +    stl_p(phys_ram_base + mb_bootinfo + 20, 0x0); /* mods_count */
> +    if(initrd_filename) {
> +        uint32_t mb_mod_info = mb_bootinfo + 0x100;
> +        uint32_t mb_mod_cmdline = mb_bootinfo+ 0x300;
> +        uint32_t mb_mod_start = mh_load_addr;
> +        uint32_t mb_mod_length = mb_kernel_size;
> +        char *next_initrd;
> +        char *next_space;
> +        int mb_mod_count = 0;
> +
> +        do {
> +            next_initrd = strchr(initrd_filename, ',');
> +            if(next_initrd)
> +                *next_initrd = '\0';
> +            /* if a space comes after the module filename, treat 
> everything after that as parameters */
> +            strcpy(phys_ram_base + mb_mod_cmdline, initrd_filename);
> +            stl_p(phys_ram_base + mb_mod_info + 8, mb_mod_cmdline); 
> /* string */
> +            mb_mod_cmdline += strlen(initrd_filename) + 1;
> +            if(next_space = strchr(initrd_filename, ' '))
> +                *next_space = '\0';
> +printf("multiboot loading module: %s\n", initrd_filename);

Weird indent.

What all uses multiboot at this point?  I know Xen does.

Some additional documentation would be nice too.

Regards,

Anthony Liguori

  parent reply	other threads:[~2009-02-02 22:43 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-02-02 18:08 [Qemu-devel] [PATCH] Add multi-boot kernel loading support Rene Rebe
2009-02-02 18:15 ` Nathan Froyd
2009-02-02 18:17   ` Alexander Graf
2009-02-02 18:25     ` Nathan Froyd
2009-02-02 22:42 ` Anthony Liguori [this message]
2009-02-03  8:15   ` Rene Rebe
2009-02-03  8:23     ` Alexander Graf
2009-02-03  8:27       ` Rene Rebe
2009-02-03  8:25   ` Alexander Graf
2009-02-03  8:29     ` Rene Rebe
2009-02-03 10:53     ` Jamie Lokier
2009-02-03 11:12       ` Alexander Graf
2009-02-03 12:58 ` Kevin Wolf
2009-02-03 13:57   ` Rene Rebe
2009-02-03 14:31     ` Kevin Wolf
2009-02-04 10:36       ` Rene Rebe
2009-02-04 13:05         ` Kevin Wolf
2009-02-03 13:01 ` Paul Brook
2009-02-03 14:00   ` Rene Rebe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=498776EC.1050208@codemonkey.ws \
    --to=anthony@codemonkey.ws \
    --cc=agraf@suse.de \
    --cc=qemu-devel@nongnu.org \
    --cc=rene@exactcode.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.