From mboxrd@z Thu Jan 1 00:00:00 1970 From: Avi Kivity Subject: Re: [PATCH 5/5] kvm-lite qemu patch Date: Wed, 05 Sep 2007 20:02:31 +0300 Message-ID: <46DEE127.80702@qumranet.com> References: <1189005567.10802.127.camel@localhost.localdomain> <1189005638.10802.129.camel@localhost.localdomain> <1189005692.10802.132.camel@localhost.localdomain> <1189006973.10802.140.camel@localhost.localdomain> <1189007087.10802.144.camel@localhost.localdomain> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Cc: kvm-devel To: Rusty Russell Return-path: In-Reply-To: <1189007087.10802.144.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: kvm-devel-bounces-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org Errors-To: kvm-devel-bounces-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org List-Id: kvm.vger.kernel.org Rusty Russell wrote: > "-M pclite" requires -kernel, requires kvm. Also changes --enable-kvm > to --disable-kvm (it's enabled on supported archs, so we need a disable > not an enable flag). > +#ifdef USE_KVM > +/* FIXME: Run without paging and then turn it on, like normal Guest. */ > +/* Once we know how much memory we have, we can construct simple linear page > + * tables which set virtual == physical which will get the Guest far enough > + * into the boot to create its own. > + * > + * We lay them out of the way, just below the initrd, which is why we need to > + * know its offset (== mem if no initrd). */ > +static unsigned long linear_pagetables(unsigned long mem, > + unsigned long initrd_offset) > +{ > + unsigned long pgdir, linear; > + unsigned int mapped_pages, i, linear_pages; > + unsigned int ptes_per_page = 1024; > + unsigned long pageflags = PG_PRESENT_MASK|PG_RW_MASK; > + > + mapped_pages = mem/TARGET_PAGE_SIZE; > + > + /* Each PTE page can map ptes_per_page pages: how many do we need? */ > + linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page; > + > + /* We put the toplevel page directory page at the top of memory. */ > + pgdir = initrd_offset - TARGET_PAGE_SIZE; > + > + /* Now we use the next linear_pages pages as pte pages */ > + linear = pgdir - linear_pages*TARGET_PAGE_SIZE; > + > + /* Linear mapping is easy: put every page's address into the mapping in > + * order. */ > + for (i = 0; i < mapped_pages; i++) > + stl_raw(phys_ram_base + linear + i * sizeof(long), > + (i * TARGET_PAGE_SIZE) | pageflags); > + > + /* The top level points to the linear page table pages above. */ > + for (i = 0; i < mapped_pages; i += ptes_per_page) > + stl_raw(phys_ram_base + pgdir + i/ptes_per_page * sizeof(long), > + (linear + i*4) | pageflags); > + > + /* We return the top level (guest-physical) address: the kernel needs > + * to know where it is. */ > + return (unsigned long)pgdir; > +} > + > +static void hack_console_write(void *opaque, uint32_t addr, uint32_t val) > +{ > + printf("%c", val); fflush(stdout); > +} > + > +static void hack_timer_write(void *opaque, uint32_t addr, uint32_t val) > +{ > + QEMUTimer *irq_timer = opaque; > + > + if (val == 0) > + qemu_del_timer(irq_timer); > + else > + qemu_mod_timer(irq_timer, qemu_get_clock(vm_clock) + val); > +} > + > +/* FIXME: I don't think this is the right way to do an interrupt. */ > +static void hack_timer_tick(void *opaque) > +{ > + first_cpu->hflags &= ~HF_HALTED_MASK; > + if (kvm_inject_irq(kvm_context, 0, 32) != 0) > + fprintf(stderr, "Failed to inject timer irq: %m\n"); > +} > + > +static void pc_init_lite(int ram_size, int vga_ram_size, int boot_device, > + DisplayState *ds, const char **fd_filename, > + int snapshot, > + const char *kernel_filename, > + const char *kernel_cmdline, > + const char *initrd_filename) > +{ > + int ret, initrd_size, i; > + ram_addr_t initrd_offset; > + CPUState *env; > + uint8_t *video_mem; > + QEMUTimer *irq_timer; > + > + printf("Starting pc lite init\n"); > + > + if (!kernel_filename) { > + fprintf(stderr, "qemu: pclite needs -kernel\n"); > + exit(1); > + } > + > + /* init CPUs */ > + for(i = 0; i < smp_cpus; i++) { > + env = cpu_init(); > + if (i != 0) > + env->hflags |= HF_HALTED_MASK; > + if (smp_cpus > 1) { > + /* XXX: enable it in all cases */ > + env->cpuid_features |= CPUID_APIC; > + /* FIXME */ > + fprintf(stderr, "Sorry, SMP not supported"); > + exit(1); > + } > + apic_init(env); > + register_savevm("cpu", i, 4, cpu_save, cpu_load, env); > + qemu_register_reset(main_cpu_reset, env); > + } > + > + printf("pc lite init: %u\n", __LINE__); > + /* allocate RAM */ > + cpu_register_physical_memory(0, ram_size, 0); > + > + /* FIXME: Wire this in somewhere? */ > + ioapic_init(); > + > +#if 0 > + isa_pic = pic_init(pic_irq_request, first_cpu); > + pit = pit_init(0x40, 0); > + for(i = 0; i < MAX_SERIAL_PORTS; i++) { > + if (serial_hds[i]) { > + serial_init(&pic_set_irq_new, isa_pic, > + serial_io[i], serial_irq[i], serial_hds[i]); > + } > + } > +#endif > + > + irq_timer = qemu_new_timer(vm_clock, hack_timer_tick, NULL); > + > + register_ioport_write(0x1, 1, 1, hack_console_write, NULL); > + register_ioport_write(0x2, 1, 4, hack_timer_write, irq_timer); > + > + /* FIXME: kvm_create() "knows" the layout of x86 and doesn't map > + * all the memory. Map the video memory here. */ > + video_mem = kvm_create_phys_mem(kvm_context, 0xA0000, 0x20000, 2, 0, 1); > + if (!video_mem) > + exit(1); > + > +#if 0 > + register_ioport_write(0x3d4, 1, 1, hack_vga_console_write, > + phys_ram_base + 0xB8000); > + register_ioport_write(0x3d5, 1, 1, hack_vga_console_write, > + phys_ram_base + 0xB8000); > +#endif > + > + /* now we can load the kernel */ > + ret = load_kernel(kernel_filename, > + phys_ram_base + KERNEL_LOAD_ADDR, > + phys_ram_base + KERNEL_PARAMS_ADDR); > + if (ret < 0) { > + fprintf(stderr, "qemu: could not load kernel '%s'\n", kernel_filename); > + exit(1); > + } > + > + /* load initrd */ > + initrd_size = 0; > + initrd_offset = ram_size; > + if (initrd_filename) { > + initrd_size = get_image_size (initrd_filename); > + if (initrd_size > 0) { > + initrd_offset = (ram_size - initrd_size) & TARGET_PAGE_MASK; > + if (initrd_offset > MAX_INITRD_LOAD_ADDR) > + initrd_offset = MAX_INITRD_LOAD_ADDR; > + > + if (initrd_size > ram_size > + || initrd_offset < KERNEL_LOAD_ADDR + ret) { > + fprintf(stderr, > + "qemu: memory too small for initial ram disk '%s'\n", > + initrd_filename); > + exit(1); > + } > + initrd_size = load_image(initrd_filename, > + phys_ram_base + initrd_offset); > + } > + if (initrd_size < 0) { > + fprintf(stderr, "qemu: could not load initial ram disk '%s'\n", > + initrd_filename); > + exit(1); > + } > + } > + if (initrd_size > 0) { > + stl_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x218, initrd_offset); > + stl_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x21c, initrd_size); > + } > + printf("pc lite init: %u\n", __LINE__); > + > + /* Mug screen_info */ > + memset(phys_ram_base + KERNEL_PARAMS_ADDR, 0, 0x40); > + pstrcpy(phys_ram_base + KERNEL_CMDLINE_ADDR, 4096, > + kernel_cmdline); > + stw_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x20, 0xA33F); > + stw_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x22, > + KERNEL_CMDLINE_ADDR - KERNEL_PARAMS_ADDR); > + /* loader type */ > + stw_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x210, 0x01); > + > + /* Platform: kvm-lite */ > + stl_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x23c, 0x02); > + > + env->eip = KERNEL_LOAD_ADDR; > + env->eflags = 0x002; > + env->regs[R_ESI] = KERNEL_PARAMS_ADDR; > + > + /* The Linux boot header contains an "E820" memory map: ours is a simple, > + * single region. */ > + stb_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x1e8, 1); > + /* Zero it out first. */ > + memset(phys_ram_base + KERNEL_PARAMS_ADDR + 0x2d0, 0, 20); > + stl_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x2d8, ram_size); > + stl_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x2e0, 1 /* RAM */); > + > + /* KERNEL_CS ring 1 */ > + cpu_x86_load_seg_cache(env, R_CS, 12*8+1, 0, 0xffffffff, 0x00cf9b00); > + /* KERNEL_DS ring 1 */ > + cpu_x86_load_seg_cache(env, R_DS, 13*8+1, 0, 0xffffffff, 0x00cf9300); > + cpu_x86_load_seg_cache(env, R_SS, 13*8+1, 0, 0xffffffff, 0x00cf9300); > + cpu_x86_load_seg_cache(env, R_ES, 13*8+1, 0, 0xffffffff, 0x00cf9300); > + > + /* Page table pointer. */ > + env->cr[3] = linear_pagetables(ram_size, initrd_offset); > + > + /* KVM code cares that CR0 is set correctly (lite doesn't). */ > + env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; > + > + /* We seem to need to force KVM to get these regs now. */ > + kvm_load_registers(env); > + printf("Done pc lite init\n"); > +} > + > +QEMUMachine pclite_machine = { > + "pclite", > + "kvm-lite PC", > + pc_init_lite, > +}; > +#endif /* USE_KVM */ > + > Most of this hunk should go into a separate file. It would also be nice to support the hypercalls in qemu, so we can run without kernel support (on non-x86 hardware, for example). -- Any sufficiently difficult bug is indistinguishable from a feature. ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/