public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC] kvm-s390: userspace snapshot
@ 2008-06-06 15:54 Carsten Otte
  2008-06-10  5:55 ` Oliver Paukstadt
  0 siblings, 1 reply; 8+ messages in thread
From: Carsten Otte @ 2008-06-06 15:54 UTC (permalink / raw)
  To: kvm
  Cc: Avi Kivity, aliguori, jblunck, ihno, rvdheij, rusty,
	oliver.paukstadt, Olaf Schnapper

This patch is a full snapshot of "kuli", our current userspace for kvm.
It is <3000 lines of code, and it contains a bootloader as well as
virtio backeds for console, network, and block. It's command line syntax
is same as the common kvm userspace, but not all options are supported.
See --help for details.
Our next step will be to get rid of the guest phys == user virt address
mapping, so that we can have the guest memory anywhere on a megabyte
boundary in userland. After that, we'd like to integrate this into the
common kvm userspace, it is not intended for customer production use but
is a good starting point to explore what kvm can look alike on s390.

Have fun reading
---
diff -ruN empty/core/cpu.c kuli/core/cpu.c
--- empty/core/cpu.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/core/cpu.c	2008-06-05 13:54:32.000000000 +0200
@@ -0,0 +1,392 @@
+/*
+ * guest cpu related functions
+ * Copyright IBM Corp. 2007,2008
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <linux/kvm.h>
+#include <unistd.h>
+
+#include <kuli.h>
+#include <vcpu.h>
+
+/* sigp order codes */
+#define SIGP_RESTART           0x06
+#define SIGP_STORE_STATUS_ADDR 0x0e
+#define SIGP_SET_ARCH          0x12
+
+static pthread_t *cputhreads[64];
+static unsigned long cpu_threads_map;
+static unsigned long cpu_running_map;
+static pthread_mutex_t cpu_map_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t cpu_map_upd_cond = PTHREAD_COND_INITIALIZER;
+static struct vcpu *vcpus[64];
+unsigned int glo_numcpu;
+int glo_stopcpus;
+
+static struct vcpu *get_cpuno(int i)
+{
+	struct vcpu *cpu = vcpus[i];
+	pthread_mutex_lock(&cpu->cpu_lock);
+	return cpu;
+}
+
+static void get_cpu(struct vcpu *cpu)
+{
+	pthread_mutex_lock(&cpu->cpu_lock);
+}
+
+static void put_cpu(struct vcpu *cpu)
+{
+	pthread_mutex_unlock(&cpu->cpu_lock);
+}
+
+/*
+ * mark a CPU in map
+ */
+static void cpumap_set(unsigned int cpuno, unsigned long *map)
+{
+	if (cpuno > 63 || cpuno > glo_numcpu)
+		report_it("trying to set invalid cpu");
+	pthread_mutex_lock(&cpu_map_lock);
+	*map |= 1UL<<cpuno;
+	pthread_cond_signal(&cpu_map_upd_cond);
+	pthread_mutex_unlock(&cpu_map_lock);
+}
+
+/*
+ * clear a CPU in map
+ */
+static void cpumap_clear(unsigned int cpuno, unsigned long *map)
+{
+	if (cpuno > 63 || cpuno > glo_numcpu)
+		report_it("trying to clear invalid cpu");
+	pthread_mutex_lock(&cpu_map_lock);
+	*map &= ~(1UL<<cpuno);
+	pthread_cond_signal(&cpu_map_upd_cond);
+	pthread_mutex_unlock(&cpu_map_lock);
+}
+
+/*
+ * wait for a map to reach zero/nonzero state
+ */
+static void __cpumap_wait(unsigned long *map, int event, unsigned long val)
+{
+	pthread_mutex_lock(&cpu_map_lock);
+ retry:
+	switch (event) {
+	case 0: /* zero */
+		if (*map != 0)
+			goto wait;
+		break;
+	case 1: /* at least one */
+		if (*map == 0)
+			goto wait;
+		break;
+	case 2: /* specific value */
+		if (*map != val)
+			goto wait;
+		break;
+	}
+	pthread_mutex_unlock(&cpu_map_lock);
+	return;
+ wait:
+	pthread_cond_wait(&cpu_map_upd_cond, &cpu_map_lock);
+	goto retry;
+}
+
+static void cpumap_wait_zero(unsigned long *map)
+{
+	__cpumap_wait(map, 0, 0);
+}
+
+static void cpumap_wait_one(unsigned long *map)
+{
+	__cpumap_wait(map, 1, 0);
+}
+
+static void cpumap_wait_val(unsigned long *map, unsigned long val)
+{
+	__cpumap_wait(map, 2, val);
+}
+
+/*
+ * interpretive execution loop
+ */
+static int __run_cpu(struct vcpu *cpu)
+{
+	int rc = 0;
+	while (rc == 0 && !glo_stopcpus) {
+		kvm_run(cpu);
+		switch (cpu->run->exit_reason) {
+		case KVM_EXIT_S390_SIEIC:
+			rc = handle_intercept(cpu);
+			break;
+		case KVM_EXIT_S390_RESET:
+			rc = handle_reset(cpu);
+			break;
+		default:
+			report_it("unsupported exit reason %d",
+			  cpu->run->exit_reason);
+		}
+	}
+	return rc;
+}
+
+/*
+ * cpu thread
+ */
+static void *run_cpu(void *arg)
+{
+	struct vcpu *cpu = (struct vcpu *)arg;
+	sigset_t sigs;
+	int rc;
+	struct kvm_s390_interrupt kvmint;
+
+	get_cpu(cpu);
+
+	kvm_create_vcpu(cpu);
+	kvm_s390_initial_reset(cpu);
+
+	cpu->cpu_status = CPU_STATUS_STOPPED;
+	cpu->run = (struct kvm_run *) mmap(NULL, 2*getpagesize(),
+			PROT_READ|PROT_WRITE, MAP_SHARED, cpu->cpufd, 0);
+	if (cpu->run == MAP_FAILED) {
+		screen("failed to mmap cpu data for virtual cpu %d - rc %d",
+			cpu->cpuno, errno);
+		exit(1);
+	}
+	put_cpu(cpu);
+
+	/* block signals: SIGINT, SIGUSR1 */
+	sigemptyset(&sigs);
+	sigaddset(&sigs, SIGINT);
+	sigaddset(&sigs, SIGUSR1);
+	sigprocmask(SIG_BLOCK, &sigs, NULL);
+
+	get_cpu(cpu);
+	rc = 0;
+	cpumap_set(cpu->cpuno, &cpu_threads_map);
+	while (!rc && !glo_stopcpus) {
+		switch (cpu->cpu_status) {
+		case CPU_STATUS_RESTART:
+			kvmint.type = KVM_S390_RESTART;
+			kvm_s390_interrupt(cpu, &kvmint);
+			cpu->cpu_status = CPU_STATUS_RUNNING;
+			/* fall through */
+		case CPU_STATUS_RUNNING:
+			/* run work */
+			cpumap_set(cpu->cpuno, &cpu_running_map);
+			put_cpu(cpu);
+			log("cpu %d: activated, running work...", cpu->cpuno);
+			rc = __run_cpu(cpu);
+			if (rc == CPU_STATUS_STOPPED)
+				rc = 0;
+			get_cpu(cpu);
+			cpumap_clear(cpu->cpuno, &cpu_running_map);
+			cpu->cpu_status = CPU_STATUS_STOPPED;
+			/* fall through */
+		case CPU_STATUS_STOPPED:
+			rc = pthread_cond_wait(&cpu->cpu_sleeping,
+					       &cpu->cpu_lock);
+			break;
+		default:
+			report_it("illegal cpu status %d", cpu->cpu_status);
+		}
+	}
+	kvm_s390_store_status(cpu, KVM_S390_STORE_STATUS_PREFIXED);
+	cpumap_clear(cpu->cpuno, &cpu_threads_map);
+	put_cpu(cpu);
+	return (void *)cpu;
+}
+
+/*
+ * after everything is set up proper, this function
+ * launches the ipl cpu and initiates IPL
+ */
+void launch_cpu_ipl(uint64_t address)
+{
+	psw_t ipl_psw;
+
+	struct vcpu *cpu = get_cpuno(0);
+	log("starting guest (ipl)");
+	ipl_psw.mask = 0x0000000180000000UL;
+	ipl_psw.addr = address;
+	kvm_s390_set_initial_psw(cpu, ipl_psw);
+	cpu->cpu_status = CPU_STATUS_RUNNING;
+	pthread_cond_signal(&cpu->cpu_sleeping);
+	put_cpu(cpu);
+}
+
+/*
+ * wait until all cpus are down. note that you gonna wait
+ * forever if you did not initiate cpus becoming down
+ */
+void wait_for_cpusdown(void)
+{
+	unsigned int i;
+	void *tr;
+	struct vcpu *cpu;
+
+	/* wait for cpus to start up, and shut down again */
+	cpumap_wait_one(&cpu_running_map);
+	cpumap_wait_zero(&cpu_running_map);
+	glo_stopcpus = 1;
+
+	for (i = 0; i < glo_numcpu; i++) {
+		cpu = get_cpuno(i);
+		pthread_cond_signal(&cpu->cpu_sleeping);
+		put_cpu(cpu);
+	}
+
+	for (i = 0; i < glo_numcpu; i++) {
+		if (cputhreads[i]) {
+			pthread_join(*cputhreads[i], &tr);
+			cputhreads[i] = NULL;
+		}
+	}
+}
+
+static void __stop_cpu(unsigned int cpuno)
+{
+	struct kvm_s390_interrupt kvmint;
+	struct vcpu *cpu;
+
+	if (cpuno >= glo_numcpu)
+		report_it("called for nonexistent cpu");
+	cpu = get_cpuno(cpuno);
+	kvm_s390_store_status(cpu, KVM_S390_STORE_STATUS_PREFIXED);
+	kvmint.type = KVM_S390_SIGP_STOP;
+	kvm_s390_interrupt(cpu, &kvmint);
+	put_cpu(cpu);
+}
+
+void stop_all_cpus(void)
+{
+	int i;
+	struct vcpu *cpu;
+
+	glo_stopcpus = 1;
+	for (i = 0; i < (int)glo_numcpu; i++) {
+		__stop_cpu(i);
+		cpu = get_cpuno(i);
+		pthread_cond_signal(&vcpus[i]->cpu_sleeping);
+		put_cpu(cpu);
+	}
+}
+
+/*
+ * init_cpus initializes all cpus, called on ipl
+ */
+void init_cpus(void)
+{
+	int rc;
+	struct vcpu *this_pu;
+	unsigned int this_id;
+
+	if (glo_numcpu > 64)
+		glo_numcpu = 64;
+
+	if (glo_numcpu < 1)
+		glo_numcpu = 1;
+
+	for (this_id = 0; this_id < glo_numcpu; this_id++) {
+		this_pu = malloc(sizeof(*this_pu));
+		/* set cpu number */
+		this_pu->cpuno = this_id;
+		/* add this pu to global cpus array */
+		vcpus[this_id] = this_pu;
+		/* init lock */
+		pthread_mutex_init(&this_pu->cpu_lock, NULL);
+		/* init cpu idle condition */
+		pthread_cond_init(&this_pu->cpu_sleeping, NULL);
+
+		cputhreads[this_id] = malloc(sizeof(pthread_t));
+		do {
+			rc = pthread_create(cputhreads[this_id], NULL,
+					    run_cpu, vcpus[this_id]);
+		} while (rc == -EAGAIN);
+	}
+	if (glo_numcpu == 64)
+		cpumap_wait_val(&cpu_threads_map, -1ul);
+	else
+		cpumap_wait_val(&cpu_threads_map, (1ul<<glo_numcpu)-1);
+}
+
+void cpu_restart(unsigned int cpuno)
+{
+	struct vcpu *cpu = get_cpuno(cpuno);
+
+	cpu->cpu_status = CPU_STATUS_RESTART;
+	pthread_cond_signal(&cpu->cpu_sleeping);
+	put_cpu(cpu);
+}
+
+static uint64_t decode_base_displacement(struct vcpu *cpu,
+		struct kvm_regs *regs)
+{
+	uint64_t reg;
+
+	reg = cpu->run->s390_sieic.ipb >> 28;
+	/* register 0 is special*/
+	if (reg > 0)
+		reg = regs->gprs[reg];
+	reg += (cpu->run->s390_sieic.ipb & 0x0fff0000) >> 16;
+	return reg;
+}
+
+int handle_sigp(struct vcpu *cpu)
+{
+	struct kvm_regs regs;
+	uint64_t *status;
+	uint32_t parameter;
+	uint16_t cpu_addr;
+	uint8_t order_code;
+	uint8_t ipa1 = cpu->run->s390_sieic.ipa & 0x00ff;
+	struct vcpu *target;
+
+	kvm_get_regs(cpu, &regs);
+	status = &regs.gprs[(ipa1 & 0xf0)>>4];
+	parameter = ((ipa1 & 0xf0)>>4)%2 ?
+		*status : regs.gprs[((ipa1 & 0xf0)>>4)+1];
+	cpu_addr = regs.gprs[ipa1 & 0x0f];
+	order_code = decode_base_displacement(cpu, &regs);
+
+	switch (order_code) {
+	case SIGP_RESTART:
+		if (cpu_addr >= glo_numcpu)
+			goto no_cpu;
+		cpu_restart(cpu_addr);
+		setcc(cpu, 0);
+		return 0;
+	case SIGP_STORE_STATUS_ADDR:
+		if (cpu_addr >= glo_numcpu)
+			goto no_cpu;
+		parameter = parameter & 0x7ffffe00;
+		target = get_cpuno(cpu_addr);
+		kvm_s390_store_status(target, parameter);
+		put_cpu(target);
+		setcc(cpu, 0);
+		return 0;
+	case SIGP_SET_ARCH:
+		log("cpu %d: received SIGP_SET_ARCH, parameter %d",
+			     cpu->cpuno, parameter & 0xff);
+		return CPU_STATUS_PANIC;
+	default:
+		log("cpu %d: unsupported sigp order code %d",
+			cpu->cpuno, order_code);
+		return CPU_STATUS_PANIC;
+	}
+no_cpu:
+	setcc(cpu, 3);
+	return 0;
+}
+
diff -ruN empty/core/instruction.c kuli/core/instruction.c
--- empty/core/instruction.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/core/instruction.c	2008-06-05 14:10:45.000000000 +0200
@@ -0,0 +1,63 @@
+/*
+ * instruction interceptions
+ * Copyright IBM Corp. 2007,2008
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *         Christian Borntraeger <cborntra@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <string.h>
+
+#include <kuli.h>
+#include <vcpu.h>
+#include <virtio.h>
+
+#define DIAG_MAX		0x500
+
+static const intercept_handler_t priv_handlers[256] = {
+	[PRIV_SCLP_CALL] = sclp_service_call,
+};
+
+static intercept_handler_t diag_handlers[DIAG_MAX+1] = {
+	[DIAG_KVM_HYPERCALL] = handle_kvm_hypercall,
+};
+
+static int handle_priv(struct vcpu *cpu)
+{
+	uint8_t ipa1 = cpu->run->s390_sieic.ipa & 0x00ff;
+
+	if (priv_handlers[ipa1])
+		return priv_handlers[ipa1](cpu);
+	log("cpu %d: unknown privileged instruction b2%02x at addr %lx, "
+	    "sending prog 1", cpu->cpuno, ipa1, cpu->run->s390_sieic.mask);
+	return enter_pgmcheck(cpu, 0x0001);
+}
+
+static int handle_diag(struct vcpu *cpu)
+{
+	int code = (cpu->run->s390_sieic.ipb & 0x0fff0000) >> 16;
+
+	if ((code <= DIAG_MAX) && (diag_handlers[code]))
+		return diag_handlers[code](cpu);
+	log("cpu %d: unknown diagnose %x at addr %lx, sending prog 1",
+	     cpu->cpuno, code, cpu->run->s390_sieic.addr);
+	return enter_pgmcheck(cpu, 0x0001);
+}
+
+static const intercept_handler_t instruction_handlers[256] = {
+	[OPCODE_MAJOR_PRIV] = handle_priv,
+	[OPCODE_MAJOR_DIAG] = handle_diag,
+	[OPCODE_MAJOR_SIGP] = handle_sigp,
+};
+
+int handle_instruction(struct vcpu *cpu)
+{
+	unsigned int ipa0 = (cpu->run->s390_sieic.ipa & 0xff00) >> 8;
+
+	if (instruction_handlers[ipa0])
+		return instruction_handlers[ipa0](cpu);
+	log("cpu %d: unknown instruction %x at addr %lx, sending prog 1",
+	     cpu->cpuno, cpu->run->s390_sieic.ipa, cpu->run->s390_sieic.addr);
+	return enter_pgmcheck(cpu, 0x0001);
+}
diff -ruN empty/core/intercept.c kuli/core/intercept.c
--- empty/core/intercept.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/core/intercept.c	2008-06-03 17:22:49.000000000 +0200
@@ -0,0 +1,91 @@
+/*
+ * sie intercept handling
+ * Copyright IBM Corp. 2007,2008
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *         Christian Borntraeger <cborntra@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <linux/errno.h>
+#include <linux/kvm.h>
+#include <sys/time.h>
+
+#include <kuli.h>
+#include <vcpu.h>
+
+static int handle_should_not_happen(struct vcpu *cpu)
+{
+	log("PANIC:cpu %d caught unexpected intercept. Magic number: 0x%x",
+	     cpu->cpuno, cpu->run->s390_sieic.icptcode);
+	return CPU_STATUS_PANIC;
+}
+
+static int handle_waitpsw(struct vcpu *cpu)
+{
+	if (cpu->run->s390_sieic.mask & 0x4300000000000000ul)
+		report_it("received enabled wait in userland\n");
+
+	log("cpu %d: entered disabled wait PSW at %lx",
+	    cpu->cpuno, cpu->run->s390_sieic.addr);
+	return CPU_STATUS_STOPPED;
+}
+
+static int handle_software_intercept(struct vcpu *cpu)
+{
+	return 0;
+}
+
+static int handle_cpu_stop(struct vcpu *cpu)
+{
+	log("cpu %d: stopped at %lx", cpu->cpuno, cpu->run->s390_sieic.addr);
+	return CPU_STATUS_STOPPED;
+}
+
+static int handle_io_instruction(struct vcpu *cpu)
+{
+	setcc(cpu, 3);
+	return 0;
+}
+
+static const intercept_handler_t intercept_funcs[] = {
+	handle_should_not_happen,      /* 0x00 */
+	handle_instruction,            /* 0x04 */
+	handle_should_not_happen,      /* 0x08 */
+	handle_should_not_happen,      /* 0x0C */
+	handle_should_not_happen,      /* 0x10 */
+	handle_should_not_happen,      /* 0x14 */
+	handle_should_not_happen,      /* 0x18 */
+	handle_waitpsw,                /* 0x1C */
+	handle_should_not_happen,      /* 0x20 */
+	handle_software_intercept,     /* 0x24 */
+	handle_cpu_stop,               /* 0x28 */
+	handle_should_not_happen,      /* 0x2C */
+	handle_should_not_happen,      /* 0x30 */
+	handle_should_not_happen,      /* 0x34 */
+	handle_should_not_happen,      /* 0x38 */
+	handle_should_not_happen,      /* 0x3C */
+	handle_io_instruction,         /* 0x40 */
+	handle_should_not_happen,      /* 0x44 */
+	handle_should_not_happen       /* 0x48 */
+};
+
+int handle_intercept(struct vcpu *cpu)
+{
+	if ((cpu->run->s390_sieic.icptcode & 3)
+	  || (cpu->run->s390_sieic.icptcode > 0x48))
+		return handle_should_not_happen(cpu);
+	return intercept_funcs[(cpu->run->s390_sieic.icptcode)>>2](cpu);
+}
+
+int enter_pgmcheck(struct vcpu *cpu, uint16_t code)
+{
+	struct kvm_s390_interrupt kvmint;
+
+	kvmint.type = KVM_S390_PROGRAM_INT;
+	kvmint.parm = code;
+	kvm_s390_interrupt(cpu, &kvmint);
+	log("cpu: %d: sending program check %x",
+			cpu->cpuno, code);
+	return 0;
+}
diff -ruN empty/core/Makefile kuli/core/Makefile
--- empty/core/Makefile	1970-01-01 01:00:00.000000000 +0100
+++ kuli/core/Makefile	2008-06-05 14:10:45.000000000 +0200
@@ -0,0 +1,12 @@
+# core makefile
+# Copyright IBM Corp. 2007,2008
+# Author: Carsten Otte <cotte@de.ibm.com>
+# This file is licensed under the terms of the GNU General Public License(GPL)
+
+%.o : %.c ; echo "	Compiling	" $<; $(CC) $(CFLAGS) -c $<
+
+OBJS := cpu.o instruction.o intercept.o reset.o sclp.o
+
+all: $(OBJS)
+clean:
+	rm -f *.o
diff -ruN empty/core/reset.c kuli/core/reset.c
--- empty/core/reset.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/core/reset.c	2008-06-04 10:07:54.000000000 +0200
@@ -0,0 +1,86 @@
+/*
+ * reboot/reset related functions
+ * Copyright (C) IBM Corp. 2007,2008
+ * Author: Christian Borntraeger <cborntra@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <kuli.h>
+#include <linux/ptrace.h>
+#include <linux/kvm.h>
+
+#include <string.h>
+#include <list.h>
+#include <vcpu.h>
+
+static struct list reset_handlers = {&reset_handlers, &reset_handlers};
+
+/*
+ * Erases the guest memory
+ */
+static void reset_mem(void)
+{
+	memset(glo_origin, 0, glo_origin + glo_memsize);
+}
+
+/*
+ * Calls all reset callbacks
+ */
+static int reset_callbacks(void)
+{
+	struct reset_call_register *reset;
+	int rc = 0;
+
+	list_iterate(reset, &reset_handlers, head) {
+		rc = reset->handler();
+		if (rc)
+			break;
+	}
+	return rc;
+}
+
+int handle_reset(struct vcpu *cpu)
+{
+	psw_t ipl_psw;
+	struct kvm_regs regs;
+
+	if (cpu->run->s390_reset_flags & KVM_S390_RESET_SUBSYSTEM) {
+		reset_callbacks();
+		cpu->run->s390_reset_flags &= ~KVM_S390_RESET_SUBSYSTEM;
+	}
+
+	if (cpu->run->s390_reset_flags & KVM_S390_RESET_CLEAR) {
+		reset_mem();
+		cpu->run->s390_reset_flags &= ~KVM_S390_RESET_CLEAR;
+	}
+
+	if (cpu->run->s390_reset_flags & KVM_S390_RESET_CPU_INIT) {
+		kvm_s390_initial_reset(cpu);
+		cpu->run->s390_reset_flags &= ~KVM_S390_RESET_CPU_INIT;
+		memset(regs.gprs, 0, 16 * sizeof(unsigned long));
+		kvm_set_regs(cpu, &regs);
+	}
+
+	if (cpu->run->s390_reset_flags & KVM_S390_RESET_IPL) {
+		ipl_psw.mask = 0x0000000180000000UL;
+		ipl_psw.addr = load_ipl();
+		kvm_s390_set_initial_psw(cpu, ipl_psw);
+		cpu->run->s390_reset_flags &= ~KVM_S390_RESET_IPL;
+	}
+
+	if (cpu->run->s390_reset_flags)
+		report_it("usupported reset flag %lx, exit",
+		   cpu->run->s390_reset_flags);
+
+	return 0;
+}
+
+/*
+ * registers a callback for reset
+ */
+void register_reset_handler(struct reset_call_register *reset)
+{
+	list_add(&reset->head, &reset_handlers);
+}
+
diff -ruN empty/core/sclp.c kuli/core/sclp.c
--- empty/core/sclp.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/core/sclp.c	2008-06-05 14:10:45.000000000 +0200
@@ -0,0 +1,68 @@
+/*
+ * service call instruction interceptions
+ * Copyright IBM Corp. 2007,2008
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *         Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <linux/kvm.h>
+
+#include <kuli.h>
+#include <vcpu.h>
+
+struct sccb_scp_read_info {
+	uint16_t	length;
+	uint8_t		function_code;
+	uint8_t		control_mask[3];
+	uint16_t	response_code;
+	uint16_t	mem_code;
+	uint8_t		increment;
+} __attribute__((packed));
+
+int scp_read_info(struct vcpu *cpu, uint32_t _sccb)
+{
+	struct sccb_scp_read_info *sccb = (void *)(_sccb + glo_origin);
+	struct kvm_s390_interrupt kvmint;
+
+	sccb->mem_code = glo_memsize>>20;
+	sccb->increment = 1;
+	sccb->response_code = 0x10;
+	setcc(cpu, 0);
+
+	kvmint.type = KVM_S390_INT_SERVICE;
+	kvmint.parm = _sccb & ~3;
+	kvm_s390_interrupt(NULL, &kvmint);
+	return 0;
+}
+
+int sclp_service_call(struct vcpu *cpu)
+{
+	struct kvm_regs regs;
+	uint64_t sccb;
+	uint64_t code;
+	uint16_t ipbh0 = (cpu->run->s390_sieic.ipb & 0xffff0000) >> 16;
+
+	kvm_get_regs(cpu, &regs);
+	sccb  = regs.gprs[ipbh0 & 0xf];
+	code = regs.gprs[(ipbh0 & 0xf0) >> 4];
+
+	// FIXME: check_region(cpu, addr, addr + length - 1);
+	if (sccb & ~0x7ffffff8ul) {
+		log("cpu %d: invalid sccb address 0x%lx", cpu->cpuno, sccb);
+		goto out;
+	}
+	switch(code) {
+	case 0x00020001:
+	case 0x00120001:
+		return scp_read_info(cpu, sccb);
+	default:
+		log("cpu %d: unknown sclp service call 0x%lx, sccb 0x%lx,"
+		    "addr 0x%lx", cpu->cpuno, code, sccb,
+		    cpu->run->s390_sieic.addr);
+	}
+out:
+	setcc(cpu, 3);
+	return 0;
+}
diff -ruN empty/drivers/devices.c kuli/drivers/devices.c
--- empty/drivers/devices.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/drivers/devices.c	2008-06-04 13:29:17.000000000 +0200
@@ -0,0 +1,11 @@
+#include <kuli.h>
+
+void init_devices(void)
+{
+	virtio_init();
+	virtio_console_init();
+	virtio_rng_init();
+	virtio_block_init();
+	virtio_net_init();
+}
+
diff -ruN empty/drivers/Makefile kuli/drivers/Makefile
--- empty/drivers/Makefile	1970-01-01 01:00:00.000000000 +0100
+++ kuli/drivers/Makefile	2008-06-04 13:29:17.000000000 +0200
@@ -0,0 +1,14 @@
+# diag makefile
+# Copyright IBM Corp. 2007,2008
+# Author: Carsten Otte <cotte@de.ibm.com>
+# This file is licensed under the terms of the GNU General Public License(GPL)
+
+%.o : %.c ; echo "	Compiling	" $<; $(CC) $(CFLAGS) -c $<
+
+OBJS := devices.o virtio.o virtio_net.o virtio_blk.o virtio_rng.o virtio_console.o
+
+all: $(OBJS)
+clean:
+	rm -f *.o
+
+.PHONY: clean all
diff -ruN empty/drivers/virtio_blk.c kuli/drivers/virtio_blk.c
--- empty/drivers/virtio_blk.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/drivers/virtio_blk.c	2008-06-04 10:07:54.000000000 +0200
@@ -0,0 +1,228 @@
+/*
+ * virtio, based on Rusty Russel's descriptor based approach
+ * Copyright IBM Corp. 2007,2008
+ * Authors: Martin Peschke <mp3@de.ibm.com>
+ *          Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <kuli.h>
+#include <list.h>
+#include <vcpu.h>
+
+#include <virtio.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_blk.h>
+
+static struct list vblk_list = EMPTY_LIST(vblk_list);
+
+/* This hangs off device->priv. */
+struct vblk_info {
+	struct list head;
+
+	char *filename;
+	int fd;
+
+	/* The size of the file. */
+	off64_t len;
+
+	/* service thread */
+	pthread_t thread;
+	pthread_cond_t cond;
+	pthread_mutex_t mutex;
+};
+
+
+static int service_io(struct device *dev)
+{
+	struct vblk_info *vblk = dev->priv;
+	unsigned int head, out_num, in_num, wlen;
+	int ret;
+	__u8 *in;
+	struct virtio_blk_outhdr *out;
+	struct iovec iov[dev->vq->vring.num];
+	off64_t off;
+
+	/* See if there's a request waiting.  If not, nothing to do. */
+	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
+	if (head == dev->vq->vring.num)
+		return 0;
+
+	if (out_num == 0 || in_num == 0)
+		errx(1, "Bad virtblk cmd %u out=%u in=%u",
+		     head, out_num, in_num);
+
+	out = convert(&iov[0], struct virtio_blk_outhdr);
+	in = convert(&iov[out_num+in_num-1], __u8);
+	off = out->sector * 512;
+
+	if (out->type & VIRTIO_BLK_T_BARRIER)
+		fdatasync(vblk->fd);
+
+	if (out->type & VIRTIO_BLK_T_SCSI_CMD) {
+		fprintf(stderr, "Scsi commands unsupported\n");
+		*in = VIRTIO_BLK_S_UNSUPP;
+		wlen = sizeof(*in);
+	} else if (out->type & VIRTIO_BLK_T_OUT) {
+		/* Write */
+
+		if (lseek64(vblk->fd, off, SEEK_SET) != off)
+			err(1, "Bad seek to sector %llu",
+				(unsigned long long) out->sector);
+
+		ret = writev(vblk->fd, iov+1, out_num-1);
+
+		if (ret > 0 && off + ret > vblk->len) {
+			ftruncate64(vblk->fd, vblk->len);
+			errx(1, "Write past end %llu+%u",
+				(unsigned long long)off, ret);
+		}
+		wlen = sizeof(*in);
+		*in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
+	} else {
+		/* Read */
+		if (lseek64(vblk->fd, off, SEEK_SET) != off)
+			err(1, "Bad seek to sector %lu", out->sector);
+
+		ret = readv(vblk->fd, iov+1, in_num-1);
+		if (ret >= 0) {
+			wlen = sizeof(*in) + ret;
+			*in = VIRTIO_BLK_S_OK;
+		} else {
+			wlen = sizeof(*in);
+			*in = VIRTIO_BLK_S_IOERR;
+		}
+	}
+
+	add_used(dev->vq, head, wlen);
+	return 1;
+}
+
+/* This is the thread which actually services the I/O. */
+static void *blk_io_thread(void *_dev)
+{
+	struct device *dev = _dev;
+	struct vblk_info *vblk = dev->priv;
+	sigset_t sigs;
+
+	/* block signals: SIGINT, SIGUSR1 */
+	sigemptyset(&sigs);
+	sigaddset(&sigs, SIGINT);
+	sigaddset(&sigs, SIGUSR1);
+	sigprocmask(SIG_BLOCK, &sigs, NULL);
+
+	while (!glo_stopcpus) {
+		while (service_io(dev)) {
+			/* It did some work, so trigger the irq. */
+			trigger_irq(dev->vq);
+		}
+		pthread_testcancel();
+		pthread_mutex_lock(&vblk->mutex);
+		/* See if there's was a request coming in, otherwise sleep */
+		if (dev->vq->vring.avail->idx == dev->vq->last_avail_idx)
+			pthread_cond_wait(&vblk->cond, &vblk->mutex);
+		pthread_mutex_unlock(&vblk->mutex);
+		pthread_testcancel();
+	}
+	pthread_exit(NULL);
+	return NULL;
+}
+
+/* When the Guest submits some I/O, we just need to wake the I/O thread. */
+static void handle_virtblk_output(struct virtqueue *vq)
+{
+	struct vblk_info *vblk = vq->dev->priv;
+	int ret;
+
+	pthread_mutex_lock(&vblk->mutex);
+	ret = pthread_cond_signal(&vblk->cond);
+	pthread_mutex_unlock(&vblk->mutex);
+	if (ret)
+		report_it("could not signal I/O thread, error: %d", ret);
+}
+
+
+static int blk_handle_activate(struct device *dev)
+{
+	return 0;
+}
+
+static void virtio_blk_setup(struct vblk_info *vblk)
+{
+	struct device *dev;
+	struct virtio_blk_config conf;
+	struct stat stat;
+
+	dev = new_device(VIRTIO_ID_BLOCK);
+	dev->activate = blk_handle_activate;
+
+	add_virtqueue(dev, VIRTQUEUE_NUM, handle_virtblk_output);
+
+	dev->priv = vblk;
+
+	vblk->fd = open(vblk->filename, O_RDWR|O_LARGEFILE);
+	if (vblk->fd == -1) {
+		log("trying to open %s readonly", vblk->filename);
+		vblk->fd = open(vblk->filename, O_RDONLY | O_LARGEFILE);
+		add_feature(dev, VIRTIO_BLK_F_RO);
+	}
+	if (vblk->fd == -1)
+		err(1, "Failed to open block device %s", vblk->filename);
+
+	vblk->len = lseek64(vblk->fd, 0, SEEK_END);
+
+	fstat(vblk->fd, &stat);
+	add_feature(dev, VIRTIO_BLK_F_BARRIER);
+	conf.capacity = vblk->len / 512;
+	add_feature(dev, VIRTIO_BLK_F_SEG_MAX);
+	add_feature(dev, VIRTIO_BLK_F_BLK_SIZE);
+	conf.seg_max = VIRTQUEUE_NUM - 2;
+	conf.blk_size = stat.st_blksize;
+
+	set_config(dev, sizeof(conf), &conf);
+
+	pthread_cond_init(&vblk->cond, NULL);
+	pthread_mutex_init(&vblk->mutex, NULL);
+	if (pthread_create(&vblk->thread, NULL, blk_io_thread, dev))
+		err(1, "Creating clone for blk");
+}
+
+
+
+/* Add new device by name*/
+void virtio_block_add(char *parm)
+{
+	struct vblk_info *vblk;
+
+	char filename[255];
+
+	if (sscanf(parm, " file = %254s ", filename) != 1) {
+		printf("Error in %s. Use -drive file=<xxx>\n", parm);
+		exit(1);
+	}
+
+	vblk = malloc(sizeof(*vblk));
+	vblk->filename = malloc(strlen(filename) + 1);
+	strcpy(vblk->filename, filename);
+	list_add_end(&vblk->head, &vblk_list);
+
+}
+
+/* initialize all devices */
+void virtio_block_init(void)
+{
+	struct vblk_info *vblk;
+
+	list_iterate(vblk, &vblk_list, head)
+		virtio_blk_setup(vblk);
+}
+
diff -ruN empty/drivers/virtio.c kuli/drivers/virtio.c
--- empty/drivers/virtio.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/drivers/virtio.c	2008-06-04 13:29:17.000000000 +0200
@@ -0,0 +1,400 @@
+/*
+ * virtio, based on Rusty Russel's descriptor based approach
+ * Copyright IBM Corp. 2007, 2008
+ * Authors: Martin Peschke <mp3@de.ibm.com>
+ *          Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+
+#include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
+#include <linux/kvm.h>
+#include <asm/kvm_virtio.h>
+
+#include <virtio.h>
+
+/* The list of Guest devices, based on command line arguments. */
+static struct list devices;
+static __u8 *descriptors;
+
+static unsigned long guest_limit;
+static unsigned long guest_max;
+
+static inline void *from_guest_phys(unsigned long addr)
+{
+	return (void *) glo_origin + addr;
+}
+
+static inline unsigned long to_guest_phys(const void *addr)
+{
+	return (addr - (void *) glo_origin);
+}
+
+/* The device virtqueue descriptors are followed by feature bitmasks. */
+static __u8 *device_features(struct device *dev)
+{
+	return (__u8 *)(dev->desc + 1)
+		+ dev->desc->num_vq * sizeof(struct kvm_vqconfig);
+}
+
+static __u8 *device_configspace(const struct device *dev)
+{
+	return (void *)(dev->desc + 1)
+		+ dev->desc->num_vq * sizeof(struct kvm_vqconfig)
+		+ dev->desc->feature_len * 2;
+}
+
+static unsigned next_desc(struct virtqueue *vq, unsigned int i)
+{
+	unsigned int next;
+
+	if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT))
+		return vq->vring.num;
+
+	next = vq->vring.desc[i].next;
+	/* Make sure compiler knows to grab that: we don't want it changing! */
+	wmb();
+
+	if (next >= vq->vring.num)
+		errx(1, "Desc next is %u", next);
+
+	return next;
+}
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access.  Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->vring.num (which
+ * is never a valid descriptor number) if none was found. */
+unsigned get_vq_desc(struct virtqueue *vq,
+			    struct iovec iov[],
+			    unsigned int *out_num, unsigned int *in_num)
+{
+	unsigned int i, head;
+
+	if ((__u16)(vq->vring.avail->idx - vq->last_avail_idx) > vq->vring.num)
+		errx(1, "Guest moved used index from %u to %u",
+		     vq->last_avail_idx, vq->vring.avail->idx);
+
+	if (vq->vring.avail->idx == vq->last_avail_idx)
+		return vq->vring.num;
+
+	head = vq->vring.avail->ring[vq->last_avail_idx++ % vq->vring.num];
+
+	if (head >= vq->vring.num)
+		errx(1, "Guest says index %u is available", head);
+
+	*out_num = *in_num = 0;
+
+	i = head;
+	do {
+		if (vq->vring.desc[i].addr > guest_limit ||
+			vq->vring.desc[i].addr + vq->vring.desc[i].len
+			> guest_limit) {
+			errx(1, "invalid descriptor %d addr: %lX len: %X",
+				i, vq->vring.desc[i].addr,
+				vq->vring.desc[i].len);
+		}
+		iov[*out_num + *in_num].iov_len = vq->vring.desc[i].len;
+		iov[*out_num + *in_num].iov_base = from_guest_phys(vq->vring.
+								desc[i].addr);
+
+		if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE)
+			(*in_num)++;
+		else {
+			if (*in_num)
+				errx(1, "Descriptor has out after in");
+			(*out_num)++;
+		}
+
+		/* If we've got too many, that implies a descriptor loop. */
+		if (*out_num + *in_num > vq->vring.num)
+			errx(1, "Looped descriptor");
+	} while ((i = next_desc(vq, i)) != vq->vring.num);
+
+	return head;
+}
+
+void add_used(struct virtqueue *vq, unsigned int head, int len)
+{
+	struct vring_used_elem *used;
+
+	used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
+	used->id = head;
+	used->len = len;
+	wmb();
+	vq->vring.used->idx++;
+}
+
+/* This actually sends the interrupt for this virtqueue */
+void trigger_irq(struct virtqueue *vq)
+{
+	struct kvm_s390_interrupt kvmint;
+
+	if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+		return;
+
+	kvmint.type = KVM_S390_INT_VIRTIO;
+	kvmint.parm = 0;
+	kvmint.parm64 = vq->config->token;
+
+	kvm_s390_interrupt(NULL, &kvmint);
+}
+
+void add_used_and_trigger(struct virtqueue *vq,
+				 unsigned int head, int len)
+{
+	add_used(vq, head, len);
+	trigger_irq(vq);
+}
+
+/* Get some more pages for a device. */
+void *get_pages(unsigned int num)
+{
+	void *addr = (void *) guest_limit;
+	if (MAP_FAILED == mmap(addr, getpagesize()*num,
+			       PROT_READ | PROT_WRITE | PROT_EXEC,
+			       MAP_FIXED | MAP_SHARED | MAP_ANONYMOUS,
+			       -1, 0)) {
+		log("Failed to allocate memory");
+		return NULL;
+	}
+	log("map guest memory at %p (pfn: %lu)", addr,
+		((unsigned long) addr) / getpagesize());
+	guest_limit += num * getpagesize();
+	return (void *)addr;
+}
+
+static void __reset_device(struct device *dev)
+{
+	struct virtqueue *vq;
+
+	if (dev->deactivate)
+		dev->deactivate(dev);
+	dev->desc->status = 0;
+
+	memset(device_features(dev) + dev->desc->feature_len, 0,
+	       dev->desc->feature_len);
+
+	for (vq = dev->vq; vq; vq = vq->next) {
+		memset(vq->vring.desc, 0,
+		       vring_size(vq->vring.num, getpagesize()));
+		vq->last_avail_idx = 0;
+	}
+}
+
+static void virtio_reset_device(unsigned long addr)
+{
+	struct device *dev;
+
+	list_iterate(dev, &devices, head) {
+		if (from_guest_phys(addr) == dev->desc) {
+			__reset_device(dev);
+			return;
+		}
+	}
+}
+
+/* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */
+static void virtio_handle_notify(unsigned long addr)
+{
+	struct device *dev;
+	struct virtqueue *vq;
+
+	list_iterate(dev, &devices, head) {
+		for (vq = dev->vq; vq; vq = vq->next) {
+			if (vq->config->address != addr)
+				continue;
+
+			if (dev->desc->status == 0) {
+				warnx("%p gave early output", dev->desc);
+				return;
+			}
+
+			if (vq->handle_output)
+				vq->handle_output(vq);
+			return;
+		}
+	}
+	/* Early console write is done using notify on a nul-terminated string
+	 * in Guest memory. */
+	if (addr >= guest_limit)
+		errx(1, "Bad NOTIFY %#lx", addr);
+
+	write(STDOUT_FILENO, from_guest_phys(addr),
+	      strnlen(from_guest_phys(addr), guest_limit - addr));
+
+}
+
+static int virtio_handle_set_status(unsigned long addr)
+{
+	struct device *dev;
+
+	list_iterate(dev, &devices, head) {
+		if (from_guest_phys(addr) == dev->desc &&
+		    dev->desc->status & VIRTIO_CONFIG_S_DRIVER)
+			return dev->activate(dev);
+	}
+	return ENODEV;
+}
+
+/* This routine allocates a new "struct kvm_device_desc" from descriptor
+ * table page just above the Guest's normal memory.  It returns a pointer to
+ * that descriptor. */
+struct kvm_device_desc *new_dev_desc(__u16 type)
+{
+
+	struct kvm_device_desc d = { .type = type };
+	void *p;
+
+	if (list_is_empty(&devices))
+		p = descriptors;
+	else {
+		struct device *dev;
+		dev = list_get(devices.next, struct device, head);
+		p = device_configspace(dev) + dev->desc->config_len;
+	}
+	if (p + sizeof(d) > (void *) descriptors + getpagesize())
+		errx(1, "Too many devices");
+
+	return memcpy(p, &d, sizeof(d));
+}
+
+/* Each device descriptor is followed by the description of its virtqueues.  We
+ * specify how many descriptors the virtqueue is to have. */
+void add_virtqueue(struct device *dev, unsigned int num_descs,
+			  void (*handle_output)(struct virtqueue *me))
+{
+	unsigned int pages;
+	struct virtqueue **i, *vq = malloc(sizeof(*vq));
+	void *p;
+
+	pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1)
+		/ getpagesize();
+	p = get_pages(pages);
+
+	vq->next = NULL;
+	vq->last_avail_idx = 0;
+	vq->dev = dev;
+
+	vring_init(&vq->vring, num_descs, p, getpagesize());
+
+	assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0);
+
+	vq->config = (struct kvm_vqconfig *) device_configspace(dev);
+	vq->config->num = num_descs;
+	vq->config->address = to_guest_phys(p);
+
+	dev->desc->num_vq++;
+
+	for (i = &dev->vq; *i; i = &(*i)->next);
+	*i = vq;
+
+	vq->handle_output = handle_output;
+
+	if (!handle_output)
+		vq->vring.used->flags = VRING_USED_F_NO_NOTIFY;
+}
+
+void add_feature(struct device *dev, unsigned bit)
+{
+	__u8 *features;
+
+	if (dev->desc->feature_len <= bit / CHAR_BIT) {
+		assert(dev->desc->config_len == 0);
+		dev->desc->feature_len = (bit / CHAR_BIT) + 1;
+	}
+
+	features = (__u8 *)(dev->desc + 1)
+		+ dev->desc->num_vq * sizeof(struct kvm_vqconfig);
+
+	features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT));
+}
+
+/* This routine sets the configuration fields for an existing device's
+ * descriptor.  It only works for the last device, but that's OK because that's
+ * how we use it. */
+void set_config(struct device *dev, unsigned len, const void *conf)
+{
+	if (device_configspace(dev) + len > descriptors + getpagesize())
+		errx(1, "Too many devices");
+
+	memcpy(device_configspace(dev), conf, len);
+	dev->desc->config_len = len;
+}
+
+/* This routine does all the creation and setup of a new device, including
+ * calling new_dev_desc() to allocate the descriptor and device memory. */
+struct device *new_device(__u16 type)
+{
+	struct device *dev = malloc(sizeof(*dev));
+
+	dev->desc = new_dev_desc(type);
+	dev->vq = NULL;
+
+	list_add(&dev->head, &devices);
+	return dev;
+}
+
+/*
+ * Entry point for the kvm hypercall
+ */
+int handle_kvm_hypercall(struct vcpu *cpu)
+{
+	unsigned long mem;
+	struct kvm_regs regs;
+
+	kvm_get_regs(cpu, &regs);
+	mem = regs.gprs[2] + glo_origin;
+	switch (regs.gprs[1]) {
+	case KVM_S390_VIRTIO_RESET:
+		virtio_reset_device(mem);
+		regs.gprs[2] = 0;
+		break;
+	case KVM_S390_VIRTIO_NOTIFY:
+		virtio_handle_notify(mem);
+		regs.gprs[2] = 0;
+		break;
+	case KVM_S390_VIRTIO_SET_STATUS:
+		virtio_handle_set_status(mem);
+		regs.gprs[2] = 0;
+		break;
+	default:
+		regs.gprs[2] = -EINVAL;
+		break;
+	}
+	kvm_set_regs(cpu, &regs);
+	return 0;
+}
+
+static int virtio_reset_handler(void)
+{
+	struct device *dev;
+
+	list_iterate(dev, &devices, head)
+		__reset_device(dev);
+	return 0;
+}
+
+static struct reset_call_register virtio_reset_register = {
+	.handler = virtio_reset_handler,
+};
+
+void virtio_init(void)
+{
+	guest_limit = glo_memsize;
+	guest_max = glo_memsize + DEVICE_PAGES*getpagesize();
+	list_init(&devices);
+	descriptors = get_pages(1);
+	register_reset_handler(&virtio_reset_register);
+}
diff -ruN empty/drivers/virtio_console.c kuli/drivers/virtio_console.c
--- empty/drivers/virtio_console.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/drivers/virtio_console.c	2008-06-04 13:29:17.000000000 +0200
@@ -0,0 +1,176 @@
+/*
+ * Copyright IBM Corp. 2008
+ * Authors: Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+#include <err.h>
+#include <stdlib.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <string.h>
+#include <termios.h>
+
+#include <kuli.h>
+#include <virtio.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_console.h>
+
+struct vcon_info
+{
+	int fd;
+
+	/* service thread */
+	pthread_t thread;
+	pthread_cond_t cond;
+	pthread_mutex_t mutex;
+
+};
+
+static struct vcon_info *vcon;
+
+static void handle_from_guest(struct virtqueue *vq)
+	{
+		unsigned int head, out, in;
+	int len;
+	struct iovec iov[vq->vring.num];
+
+	/* Keep getting output buffers from the Guest until we run out. */
+	while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
+		if (in)
+			errx(1, "Input buffers in output queue?");
+		len = writev(vcon->fd, iov, out);
+		add_used_and_trigger(vq, head, len);
+	}
+}
+
+static int vcon_work(struct device *dev)
+{
+	unsigned int head, in_num, out_num;
+	int len;
+	struct iovec iov[dev->vq->vring.num];
+	struct pollfd pfd;
+
+	/* wait until input arrives */
+	pfd.fd = vcon->fd;
+	pfd.events = POLLIN;
+	pfd.revents = 0;
+	poll(&pfd, 1, -1);
+
+	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
+	if (head == dev->vq->vring.num)
+		return 0;
+	else if (out_num)
+		errx(1, "Output buffers in network recv queue?");
+
+retry:
+	len = readv(vcon->fd, iov, in_num);
+	if (len < 0) {
+		if (errno==EINTR)
+			goto retry;
+		err(1, "reading console");
+	}
+
+	/* Tell the Guest about the new packet. */
+	add_used_and_trigger(dev->vq, head, len);
+
+	/* All good. */
+	return 1;
+}
+
+
+/* This is the thread which actually services the I/O. */
+static void *vcon_io_thread(void *_dev)
+{
+	struct device *dev = _dev;
+
+	while (!glo_stopcpus) {
+		pthread_mutex_lock(&vcon->mutex);
+		if (dev->vq->vring.avail->idx == dev->vq->last_avail_idx)
+			pthread_cond_wait(&vcon->cond, &vcon->mutex);
+		pthread_mutex_unlock(&vcon->mutex);
+		while (vcon_work(dev))
+			;
+		pthread_testcancel();
+	}
+	pthread_exit(NULL);
+	return NULL;
+}
+
+
+static void handle_to_guest(struct virtqueue *vq)
+{
+	int ret;
+
+	vq = vq;
+	pthread_mutex_lock(&vcon->mutex);
+	ret = pthread_cond_signal(&vcon->cond);
+	pthread_mutex_unlock(&vcon->mutex);
+	if (ret)
+		report_it("could not signal I/O thread, error: %d", ret);
+}
+
+static int console_handle_activate(struct device *dev)
+{
+	if (vcon->thread)
+		return 0;
+	if (pthread_create(&vcon->thread, NULL, vcon_io_thread, dev))
+		err(1, "Creating clone for console failed");
+	return 0;
+}
+
+static struct termios oldterm;
+static void restore_oldterm(void)
+{
+	tcsetattr(vcon->fd, TCSANOW, &oldterm);
+}
+
+
+static void console_init(void)
+{
+	struct termios term;
+
+	if (!tcgetattr(vcon->fd, &oldterm)) {
+		term = oldterm;
+		/* disable signals like ^C*/
+		term.c_lflag &= ~ISIG;
+		/*non-canonical mode (dont wait for end of line etc.)*/
+		term.c_lflag &= ~ICANON;
+		/* disable echo. Its done in the guest */
+		term.c_lflag &= ~ECHO;
+		tcsetattr(vcon->fd, TCSANOW, &term);
+		/* restore old console on exit */
+		atexit(restore_oldterm);
+	}
+}
+
+
+void virtio_console_init(void)
+{
+	struct device *dev;
+
+	vcon = malloc(sizeof(*vcon));
+	if (!vcon)
+		err(1, "Error allocation console");
+
+	vcon->fd = open("/dev/tty", O_RDWR);
+	if (!vcon->fd)
+              err(1, "Failed to open console device /dev/tty");
+	dev = new_device(VIRTIO_ID_CONSOLE);
+	dev->priv = vcon;
+	dev->activate = console_handle_activate;
+
+	add_virtqueue(dev, VIRTQUEUE_NUM, handle_to_guest);
+	add_virtqueue(dev, VIRTQUEUE_NUM, handle_from_guest);
+
+	console_init();
+
+	/* prepare for I/O thread */
+	pthread_cond_init(&vcon->cond, NULL);
+	pthread_mutex_init(&vcon->mutex, NULL);
+}
+
diff -ruN empty/drivers/virtio_net.c kuli/drivers/virtio_net.c
--- empty/drivers/virtio_net.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/drivers/virtio_net.c	2008-06-03 14:25:17.000000000 +0200
@@ -0,0 +1,235 @@
+/*
+ * virtio, based on Rusty Russel's descriptor based approach
+ * Copyright IBM Corp. 2007,2008
+ * Authors: Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <string.h>
+#include <pthread.h>
+#include <errno.h>
+#include <err.h>
+#include <fcntl.h>
+#include <netinet/in.h>
+#include <net/if.h>
+#include <linux/sockios.h>
+#include <unistd.h>
+#include <linux/if_tun.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+
+#include <kuli.h>
+#include <list.h>
+#include <virtio.h>
+
+#include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_net.h>
+
+static struct list vnet_list = EMPTY_LIST(vnet_list);
+
+struct vnet_info
+{
+	struct list head;
+
+	char *ifname;
+	int fd;
+
+	/* service thread */
+	pthread_t thread;
+	pthread_cond_t cond;
+	pthread_mutex_t mutex;
+};
+
+static void handle_net_output(struct virtqueue *vq)
+{
+	unsigned int head, out, in;
+	int len;
+	struct iovec iov[vq->vring.num];
+	struct vnet_info *vnet = vq->dev->priv;
+
+	while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
+		if (in)
+			errx(1, "Input buffers in output queue?");
+		(void)convert(&iov[0], struct virtio_net_hdr);
+retry:
+		len = writev(vnet->fd, iov+1, out-1);
+		if (len == -1 && errno == EINTR)
+			goto retry;
+		add_used_and_trigger(vq, head, len);
+	}
+}
+
+
+
+/* This is where we handle a packet coming in from the tun device to our
+ * Guest. */
+static int work_tun(struct device *dev)
+{
+	unsigned int head, in_num, out_num;
+	int len;
+	struct iovec iov[dev->vq->vring.num];
+	struct virtio_net_hdr *hdr;
+	struct vnet_info *vnet = dev->priv;
+
+	head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
+	if (head == dev->vq->vring.num)
+		return 0;
+	else if (out_num)
+		errx(1, "Output buffers in network recv queue?");
+
+	hdr = convert(&iov[0], struct virtio_net_hdr);
+	hdr->flags = 0;
+	hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+retry:
+	len = readv(vnet->fd, iov+1, in_num-1);
+	if (len <= 0) {
+		if (errno==EINTR)
+			goto retry;
+		err(1, "reading network");
+	}
+
+	add_used_and_trigger(dev->vq, head, sizeof(*hdr) + len);
+
+	/* All good. */
+	return 1;
+}
+
+/* This is the thread which actually services the I/O. */
+static void *net_io_thread(void *_dev)
+{
+	struct device *dev = _dev;
+	sigset_t sigs;
+	struct vnet_info *vnet = dev->priv;
+
+	//block signals: SIGINT, SIGUSR1
+	sigemptyset (&sigs);
+	sigaddset   (&sigs, SIGINT);
+	sigaddset   (&sigs, SIGUSR1);
+	sigprocmask (SIG_BLOCK, &sigs, NULL);
+
+	while (!glo_stopcpus) {
+		pthread_mutex_lock(&vnet->mutex);
+		if (dev->vq->vring.avail->idx == dev->vq->last_avail_idx)
+			pthread_cond_wait(&vnet->cond, &vnet->mutex);
+		pthread_mutex_unlock(&vnet->mutex);
+		while (work_tun(dev))
+			;
+		pthread_testcancel();
+	}
+	pthread_exit(NULL);
+	return NULL;
+}
+
+static int net_handle_deactivate(struct device *dev)
+{
+	struct vnet_info *vnet = dev->priv;
+
+	pthread_cancel(vnet->thread);
+	pthread_join(vnet->thread, NULL);
+	vnet->thread = 0;
+	pthread_cond_init(&vnet->cond, NULL);
+	pthread_mutex_init(&vnet->mutex, NULL);
+	return 0;
+}
+
+static int net_handle_activate(struct device *dev)
+{
+	struct vnet_info *vnet = dev->priv;
+	
+	if (vnet->thread)
+		return 0;
+	if (pthread_create(&vnet->thread, NULL, net_io_thread, dev))
+		err(1, "Creating clone for net");
+	return 0;
+}
+
+static void handle_from_host(struct virtqueue *vq)
+{
+	struct vnet_info *vnet = vq->dev->priv;
+	int ret;
+
+	pthread_mutex_lock(&vnet->mutex);
+	ret = pthread_cond_signal(&vnet->cond);
+	pthread_mutex_unlock(&vnet->mutex);
+	if (ret)
+		report_it("could not signal I/O thread, error: %d", ret);
+
+}
+
+/*
+ * tries to get a tap device via /dev/net/tun
+ */
+static void create_tap_device(struct vnet_info *vnet)
+{
+	struct ifreq ifr;
+
+	vnet->fd = open("/dev/net/tun", O_RDWR);
+	if (!vnet->fd)
+              err(1, "Failed to open tun device %s", vnet->ifname);
+	memset(&ifr, 0, sizeof(ifr));
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+	strcpy(ifr.ifr_name, vnet->ifname);
+	if (ioctl(vnet->fd, TUNSETIFF, &ifr) != 0)
+		err(1, "configuring /dev/net/tun");
+	ioctl(vnet->fd, TUNSETNOCSUM, 1);
+}
+
+/*
+ * Creates a virtual network device bases on a open file desriptor
+ */
+static void setup_net_common(struct vnet_info *vnet)
+{
+	struct device *dev;
+
+	dev = new_device(VIRTIO_ID_NET);
+
+	add_virtqueue(dev, VIRTQUEUE_NUM, handle_from_host);
+	add_virtqueue(dev, VIRTQUEUE_NUM, handle_net_output);
+
+	dev->priv = vnet;
+
+	pthread_cond_init(&vnet->cond, NULL);
+	pthread_mutex_init(&vnet->mutex, NULL);
+
+	dev->deactivate = net_handle_deactivate;
+	dev->activate = net_handle_activate;
+}
+
+static void virtio_net_setup(struct vnet_info *vnet)
+{
+	log("Adding networking device to %s", vnet->ifname);
+	create_tap_device(vnet);
+	setup_net_common(vnet);
+}
+
+/* Add new device by name*/
+void virtio_net_add(char *parm)
+{
+	struct vnet_info *vnet;
+
+	char ifname[255];
+
+	if (sscanf(parm," tap,ifname = %254s ", ifname) != 1) {
+		printf("Error in %s. Use -net tap,ifname=tap<xx>\n", parm);
+		exit(1);
+	}
+
+	vnet = malloc(sizeof(*vnet));
+	vnet->ifname = malloc(strlen(ifname) + 1);	
+	strcpy(vnet->ifname, ifname);
+	list_add_end(&vnet->head, &vnet_list);
+
+}
+
+/* initialize all devices */
+void virtio_net_init(void)
+{
+	struct vnet_info *vnet;
+	
+	list_iterate(vnet, &vnet_list, head)
+		virtio_net_setup(vnet);
+}
+
diff -ruN empty/drivers/virtio_rng.c kuli/drivers/virtio_rng.c
--- empty/drivers/virtio_rng.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/drivers/virtio_rng.c	2008-05-19 11:17:04.000000000 +0200
@@ -0,0 +1,76 @@
+/*
+ * virtio, based on Rusty Russel's descriptor based approach
+ * Copyright IBM Corp. 2008
+ * Authors: Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+#include <err.h>
+#include <stdlib.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+
+#include <kuli.h>
+#include <virtio.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_rng.h>
+
+struct vrng_info
+{
+	int fd;
+};
+
+static void handle_rng_input(struct virtqueue *vq)
+{
+	int len;
+	unsigned int head, in_num, out_num;
+	struct iovec iov[vq->vring.num];
+	struct vrng_info *vrng = vq->dev->priv;
+
+	/* First we need a buffer from the Guests's virtqueue. */
+	head = get_vq_desc(vq, iov, &out_num, &in_num);
+
+	/* If they're not ready for input, stop listening to this file
+	 * descriptor.  We'll start again once they add an input buffer. */
+	if (head == vq->vring.num) {
+		return; 
+	}
+
+	if (out_num)
+		errx(1, "Output buffers in rng?");
+
+	len = readv(vrng->fd, iov, in_num);
+	/* Tell the Guest about the new input. */
+	add_used_and_trigger(vq, head, len);
+
+	return; 
+}
+
+static int rng_handle_activate(struct device *dev)
+{
+	dev = dev;
+	return 0;
+}
+
+
+void virtio_rng_init(void)
+{
+	struct device *dev;
+	struct vrng_info *vrng;
+
+	vrng = malloc(sizeof(*vrng));
+	
+	vrng->fd = open("/dev/urandom", O_RDONLY);
+	if (!vrng->fd)
+              err(1, "Failed to open random device /dev/urandom");
+	dev = new_device(VIRTIO_ID_RNG);
+	dev->priv = vrng;
+	dev->activate = rng_handle_activate;
+
+	/* The device has one virtqueue, where the Guest places inbufs. */
+	add_virtqueue(dev, VIRTQUEUE_NUM, handle_rng_input);
+}
+
diff -ruN empty/include/kuli.h kuli/include/kuli.h
--- empty/include/kuli.h	1970-01-01 01:00:00.000000000 +0100
+++ kuli/include/kuli.h	2008-06-05 14:10:45.000000000 +0200
@@ -0,0 +1,108 @@
+/*
+ * kuli core function definitions
+ * Copyright IBM Corp. 2007,2008
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ *            Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#ifndef __KULI_H
+#define __KULI_H
+
+#include <linux/kvm.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <vcpu.h>
+#include <list.h>
+
+
+typedef  int (*intercept_handler_t)(struct vcpu *cpu);
+typedef  int (*reset_handler_t)();
+
+#define glo_origin 0ul
+
+/* message macros */
+#define screen(f,a...) printf("%s: " f "\n" , __FUNCTION__ , ## a)
+#define log(f,a...) { \
+        fprintf(glo_logfile, "%s: " f "\n" , __FUNCTION__ , ## a); \
+}
+#define report_it(f,a...) {fprintf(stderr, "internal error>" f "< in function %s\nplease report to cotte@de.ibm.com\n", ## a, __FUNCTION__); exit(1);}
+
+/* init/bootloader.c */
+unsigned long load_ipl(void);
+
+/* init/dump.c */
+extern unsigned long   glo_memsize;
+void create_dumpfile(void);
+
+/* init/kvm.c */
+extern int glo_kvm_vm_fd;
+void kvm_create_vm();
+void kvm_run(struct vcpu *cpu);
+void kvm_create_vcpu(struct vcpu* cpu);
+void kvm_s390_initial_reset(struct vcpu *cpu);
+void kvm_s390_interrupt(struct vcpu *cpu, struct kvm_s390_interrupt *kvmint);
+void kvm_s390_set_initial_psw(struct vcpu *cpu, psw_t psw);
+void kvm_s390_store_status(struct vcpu *cpu, unsigned long addr);
+void kvm_get_regs(struct vcpu *cpu, struct kvm_regs *regs);
+void kvm_set_regs(struct vcpu *cpu, struct kvm_regs *regs);
+
+/* init/message.c */
+extern FILE* glo_logfile;
+extern pthread_mutex_t glo_logfile_lock;
+void init_logging();
+void close_logging();
+
+/* init/options.c */
+extern char* glo_kernel;
+extern char* glo_bootfrom;
+extern char* glo_initrd;
+extern char* glo_parmfile;
+extern char* glo_append;
+extern char* glo_sysdump;
+void parse_options(int argc, char *argv[]);
+
+/* core/cpu.c */
+extern unsigned int glo_numcpu;
+extern int glo_stopcpus;
+
+/* core/diag.c */
+#define DIAG_KVM_HYPERCALL	0x500 /* kvm virtio hypercall */
+
+/* core/instruction.c */
+#define OPCODE_MAJOR_DIAG 0x83
+#define OPCODE_MAJOR_SIGP 0xae
+#define OPCODE_MAJOR_PRIV 0xb2
+#define PRIV_SCLP_CALL    0x20
+void init_instruction();
+int handle_instruction(struct vcpu *cpu);
+
+/* core/intercept.c */
+int enter_pgmcheck(struct vcpu *cpu, uint16_t code);
+int handle_intercept(struct vcpu *cpu);
+
+/* core/reset.c */
+struct reset_call_register {
+	struct list head;
+	reset_handler_t handler;
+};
+void register_reset_handler(struct reset_call_register *);
+int handle_reset(struct vcpu *cpu);
+
+/* core/sclp.c */
+int sclp_service_call(struct vcpu *cpu);
+
+/* devices */
+void init_devices(void);
+void virtio_init(void);
+void virtio_net_init(void);
+void virtio_block_init(void);
+void virtio_rng_init(void);
+void virtio_console_init(void);
+void virtio_net_add(char *parm);
+void virtio_block_add(char *parm);
+
+#endif
diff -ruN empty/include/list.h kuli/include/list.h
--- empty/include/list.h	1970-01-01 01:00:00.000000000 +0100
+++ kuli/include/list.h	2008-05-13 14:34:38.000000000 +0200
@@ -0,0 +1,70 @@
+/*
+ * kuli list implementation header file
+ * Copyright IBM Corp. 2007,2008
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#ifndef __LIST_H
+#define __LIST_H
+struct list {
+	struct list *next, *prev;
+};
+
+#define EMPTY_LIST(list) { &(list), &(list) }
+
+static inline void list_add(struct list *entry, struct list *head)
+{
+	entry->next = head->next;
+	entry->next->prev = entry;
+	head->next = entry;
+	entry->prev = head;
+}
+
+static inline void list_add_end(struct list *entry, struct list *head)
+{
+	entry->prev = head->prev;
+	entry->prev->next = entry;
+	head->prev = entry;
+	entry->next = head;
+}
+
+static inline void list_del(struct list *entry)
+{
+	entry->next->prev = entry->prev;
+	entry->prev->next = entry->next;
+	entry->next=entry;
+	entry->prev=entry;
+}
+
+static inline int list_is_empty(struct list *head)
+{
+	if ((head->next == head) && (head->prev == head))
+		return 1;
+	else
+		return 0;
+}
+
+static inline void list_init(struct list *head)
+{
+	head->next = head;
+	head->prev = head;
+}
+
+#define list_get(entry, type, member) \
+	((type *)((char *)(entry)-(unsigned long)(&((type *)0)->member)))
+
+#define list_iterate(i, head, member)				\
+	for (i = list_get((head)->next, typeof(*i), member);	\
+	     &i->member != (head);				\
+	     i = list_get(i->member.next, typeof(*i), member))
+
+#define list_iterate_safe(i, head, member, n)			\
+	for (i = list_get((head)->next, typeof(*i), member),	\
+	     n = list_get(i->member.next, typeof(*i), member);\
+	     &i->member != (head);				\
+	     i = n,						\
+	     n = list_get(n->member.next, typeof(*n), member))
+
+#endif
diff -ruN empty/include/vcpu.h kuli/include/vcpu.h
--- empty/include/vcpu.h	1970-01-01 01:00:00.000000000 +0100
+++ kuli/include/vcpu.h	2008-06-03 14:31:35.000000000 +0200
@@ -0,0 +1,45 @@
+/*
+ * guest cpu header file
+ * Copyright IBM Corp. 2007,2008
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#ifndef __VCPU_H
+#define __VCPU_H
+
+#include <pthread.h>
+#include <stdint.h>
+
+#include <linux/kvm.h>
+#include <asm/ptrace.h>
+
+#define CPU_STATUS_RUNNING 0
+#define CPU_STATUS_STOPPED 1
+#define CPU_STATUS_PANIC   2
+#define CPU_STATUS_RESTART 3
+
+struct vcpu {
+	int cpufd;			/* vcpu file descriptor (local) */
+	struct kvm_run* run;		/* kvm running cpu structure (local) */
+	unsigned int       cpuno;	/* this cpu number (local) */
+	pthread_mutex_t    cpu_lock;	/* lock */
+	int cpu_status;			/* cpu status (lock) */
+	pthread_cond_t     cpu_sleeping;/* sleeping condition for cpu (lock) */
+};
+
+static inline void setcc(struct vcpu *cpu, unsigned long cc)
+{
+	cpu->run->s390_sieic.mask &= ~(3ul << 44);
+	cpu->run->s390_sieic.mask |= (cc & 3) << 44;
+}
+
+int handle_sigp(struct vcpu *cpu);
+void wait_for_cpusdown();
+void launch_cpu_ipl(uint64_t address);
+void init_cpus();
+void cpu_restart(unsigned int cpunum);
+void stop_all_cpus(void);
+int cpu_store_status_address(int cpuno, uint64_t address);
+#endif
diff -ruN empty/include/virtio.h kuli/include/virtio.h
--- empty/include/virtio.h	1970-01-01 01:00:00.000000000 +0100
+++ kuli/include/virtio.h	2008-06-03 16:16:56.000000000 +0200
@@ -0,0 +1,93 @@
+/*
+ * virtio header file, based on lguests descriptor based approach
+ * Copyright IBM Corp. 2007,2008
+ * Author: Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#ifndef __VIRTIO_H
+#define __VIRTIO_H
+
+#include <err.h>
+#include <string.h>
+#include <asm/kvm_virtio.h>
+#include <linux/virtio_ring.h>
+#include <sys/uio.h>
+#include <kuli.h>
+#include <list.h>
+
+/* We can have up to 256 pages for devices. */
+#define DEVICE_PAGES 256
+/* This will occupy 2 pages: it must be a power of 2. */
+#define VIRTQUEUE_NUM 128
+
+/* The device structure describes a single device. */
+struct device
+{
+	struct list head;
+
+	/* The this device's descriptor, as mapped into the Guest. */
+	struct kvm_device_desc *desc;
+
+	/* Any queues attached to this device */
+	struct virtqueue *vq;
+
+	/* Device-specific data. */
+	void *priv;
+	
+	/* Callbacks for activation/deactivation/reset */
+	int (*deactivate)(struct device *device);
+	int (*activate)(struct device *device);
+};
+
+/* The virtqueue structure describes a queue attached to a device. */
+struct virtqueue
+{
+	struct virtqueue *next;
+
+	/* Which device owns me. */
+	struct device *dev;
+
+	/* The configuration for this queue. */
+	struct kvm_vqconfig *config;
+
+	/* The actual ring of buffers. */
+	struct vring vring;
+
+	/* Last available index we saw. */
+	__u16 last_avail_idx;
+
+	/* The routine to call when the Guest pings us. */
+	void (*handle_output)(struct virtqueue *me);
+};
+
+#define wmb() asm volatile("" ::: "memory");
+
+static inline void *_convert(struct iovec *iov, size_t size, size_t align,
+		      const char *name)
+{
+	if (iov->iov_len != size)
+		errx(1, "Bad iovec size %zu for %s", iov->iov_len, name);
+	if ((unsigned long)iov->iov_base % align != 0)
+		errx(1, "Bad alignment %p for %s", iov->iov_base, name);
+	return iov->iov_base;
+}
+#define convert(iov, type) \
+	((type *)_convert((iov), sizeof(type), __alignof__(type), #type))
+
+unsigned get_vq_desc(struct virtqueue *vq, struct iovec iov[],
+		    unsigned int *out_num, unsigned int *in_num);
+void add_used(struct virtqueue *vq, unsigned int head, int len);
+void trigger_irq(struct virtqueue *vq);
+void add_used_and_trigger(struct virtqueue *vq, unsigned int head, int len);
+void *get_pages(unsigned int num);
+struct kvm_device_desc *new_dev_desc(__u16 type);
+void add_feature(struct device *dev, unsigned bit);
+void set_config(struct device *dev, unsigned len, const void *conf);
+void add_virtqueue(struct device *dev, unsigned int num_descs,
+			  void (*handle_output)(struct virtqueue *me));
+struct device *new_device(__u16 type);
+int handle_kvm_hypercall(struct vcpu *cpu);
+#endif
+
diff -ruN empty/init/bootloader.c kuli/init/bootloader.c
--- empty/init/bootloader.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/init/bootloader.c	2008-06-03 16:12:34.000000000 +0200
@@ -0,0 +1,289 @@
+#define _LARGEFILE64_SOURCE
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <kuli.h>
+
+/*
+ * for loading from files
+ */
+#define KERN_IMAGE_START  0x010000UL
+#define KERN_PARM_AREA    0x010480UL
+#define INITRD_START      0x800000UL
+#define INITRD_PARM_START 0x010408UL
+#define INITRD_PARM_SIZE  0x010410UL
+#define PARMFILE_START    0x001000UL
+
+static int fd;
+
+struct scsi_blockptr {
+	uint64_t blockno;
+	uint16_t size;
+	uint16_t blockct;
+	uint8_t reserved[4];
+} __attribute__ ((packed));
+
+enum {
+	component_execute = 0x01,
+	component_load = 0x02
+} component_entry_type;
+
+struct component_entry {
+	struct scsi_blockptr data;
+	uint8_t pad[7];
+	uint8_t component_type;
+	union {
+		uint64_t load_address;
+		uint64_t load_psw;
+	} address;
+} __attribute((packed));
+
+enum {
+	component_header_ipl = 0x00,
+	component_header_dump = 0x01
+} component_header_type;
+
+struct component_header {
+	uint8_t magic[4];
+	uint8_t type;
+	uint8_t reserved[27];
+} __attribute((packed));
+
+#define PROGRAM_TABLE_BLOCK_SIZE        512
+#define PROGRAM_TABLE_ENTRIES PROGRAM_TABLE_BLOCK_SIZE / \
+				sizeof(struct scsi_blockptr)
+
+/* Pointer to a block on a disk with linear layout */
+struct program_table {
+	struct scsi_blockptr entries[PROGRAM_TABLE_ENTRIES];
+} __attribute__ ((packed));
+
+struct mbr {
+	char magic[4];
+	uint32_t version_id;
+	uint8_t reserved[8];
+	struct scsi_blockptr blockptr;
+} __attribute__ ((packed));
+
+static int read_from(void *buffer, off_t offset, size_t size)
+{
+	int ret;
+
+	ret = lseek64(fd, offset, SEEK_SET);
+	if (ret == -1)
+		return ret;
+	return read(fd, buffer, size);
+}
+
+static void check_magic(void *tmp)
+{
+	char *buf = tmp;
+
+	if (memcmp(buf, "zIPL", 4)) {
+		printf("Wrong magic\n");
+		exit(2);
+	}
+}
+
+/* FIXME?: zipl treats virtio as scsi, so it uses 512 byte for sector size */
+#define PHY_BLOCK_SIZE 512
+#define max_entries (PHY_BLOCK_SIZE / sizeof(struct scsi_blockptr))
+
+static uint64_t parse_segment_elements(struct scsi_blockptr *bprs,
+				       uint64_t *address)
+{
+	unsigned d;
+	int len;
+
+	for (d = 0; d < max_entries - 1; d++) {
+		if (*address > glo_memsize) {
+			screen("bootmap points to illegal addresses");
+			exit(1);
+		}
+		if (bprs[d].blockno == 0)
+			return 0;
+		len = read_from((void *)(*address + glo_origin),
+				bprs[d].blockno * bprs[d].size,
+				bprs[d].size * (bprs[d].blockct + 1));
+		if (len != bprs[d].size * (bprs[d].blockct + 1))
+			report_it("Read was not completed");
+		*address += len;
+	}
+	return bprs[max_entries - 1].blockno;
+
+}
+
+static void parse_segment_table(uint64_t blockno, uint64_t address)
+{
+	struct scsi_blockptr bprs[max_entries + 1];
+
+	do {
+		read_from(bprs, blockno * 512, sizeof(bprs));
+		blockno = parse_segment_elements(bprs, &address);
+	} while (blockno);
+}
+
+static uint64_t parse_program(struct scsi_blockptr *blockptr)
+{
+	struct component_header header;
+	struct component_entry entry;
+	uint64_t offset = blockptr->blockno * 512;
+
+	read_from(&header, offset, sizeof(header));
+	check_magic(&header.magic);
+	switch (header.type) {
+	case component_header_ipl:
+		screen("found IPL record\n");
+		break;
+	case component_header_dump:
+		screen("found dump header");
+		exit(1);
+	default:
+		screen("Unknown header");
+		exit(1);
+	}
+
+	offset += sizeof(header);
+	read_from(&entry, offset, sizeof(entry));
+	while (entry.component_type == component_load) {
+		parse_segment_table(entry.data.blockno,
+				    entry.address.load_address);
+		offset += sizeof(entry);
+		read_from(&entry, offset, sizeof(entry));
+	}
+	if (entry.component_type == component_execute)
+		return entry.address.load_address;
+	else {
+		screen("No ipl address found\n");
+		exit(1);
+	}
+}
+
+static uint64_t parse_program_table(uint64_t blockno)
+{
+	struct program_table ptb;
+	unsigned d;
+
+	/* entry 0, holds the magic, entry 1 the default */
+	read_from(&ptb, blockno * 512, sizeof(ptb));
+	check_magic(&ptb.entries[0]);
+	for (d = 1; d < PROGRAM_TABLE_ENTRIES; d++)
+		if (ptb.entries[d].blockno == 0)
+			break;
+	screen("Found %d entries in the boot table, starting default", d - 2);
+	/* entry 1 is default */
+	return parse_program(&ptb.entries[1]);
+	/* FIXME: menu and config file for choosing boot entry */
+}
+
+static uint64_t parse_mbr(void)
+{
+	struct mbr mbr;
+
+	log("Reading MBR of disk");
+	read_from(&mbr, 0, sizeof(mbr));
+	check_magic(&mbr.magic);
+	return parse_program_table(mbr.blockptr.blockno);
+}
+
+/*
+ * looks at the program tables written by the boot loader to load
+ * everything which is specified in the bootmap
+ */
+static unsigned long load_from_disk(char *filename)
+{
+	uint64_t address;
+
+	fd = open(filename, O_RDONLY);
+	address = parse_mbr();
+	close(fd);
+	return address & 0x7fffffff;
+}
+
+static unsigned long load_file(char* name, unsigned long location,
+	off_t file_offset)
+{
+	int fd;
+	ssize_t readct;
+	unsigned long length = 0;
+
+	fd = open (name, O_RDONLY);
+	if (fd<0)
+		report_it("cannot open file %s", name);
+
+	if (file_offset != lseek (fd, file_offset, SEEK_SET))
+		report_it("file error %s", name);
+
+	while ((readct = read(fd, (void*)(location+length), 4096)) > 0) {
+		length += readct;
+	}
+	if (readct<0)
+		report_it("read error %s", name);;
+	close(fd);
+	return length;
+}
+
+/*
+ * load_ipl loads everything from files
+ */
+static unsigned long load_from_files(void)
+{
+	unsigned long length;
+	unsigned long *rd_start = (unsigned long *)INITRD_PARM_START;
+	unsigned long *rd_size  = (unsigned long *)INITRD_PARM_SIZE;
+	unsigned long kernel_size;
+
+	kernel_size = load_file(glo_kernel, KERN_IMAGE_START + glo_origin,
+				KERN_IMAGE_START);
+	if (glo_initrd) {
+		*rd_start = INITRD_START;
+		/* increase initrd address if kernel + 1M would overlap */
+		while (kernel_size + 0x100000 > *rd_start)
+			*rd_start += 0x100000;
+		log("loading ramdisk file %s at %lX", glo_initrd, *rd_start);
+		*rd_size = load_file(glo_initrd, *rd_start + glo_origin, 0);
+	}
+	if (glo_parmfile) {
+		log ("loading kernel parameter file %s",glo_parmfile);
+		length = load_file(glo_parmfile, PARMFILE_START + glo_origin,
+			0);
+		strncpy ((void*)KERN_PARM_AREA + glo_origin,
+			 (void*)PARMFILE_START + glo_origin, length);
+	} else {
+		if (!glo_append)
+			report_it ("we should have kernel parameters at this " \
+				"point");
+		log ("using kernel parameters %s",glo_append);
+		strcpy ((void*)KERN_PARM_AREA + glo_origin, glo_append);
+	}
+	return 0x10000;
+}
+
+/*
+ * depending on the command line this loads kernel, parameters, initrd
+ * from file or disk. The disk must be prepared with the zipl boot loader
+ * program. 
+ * Returns the IPL address.
+ */
+unsigned long load_ipl(void)
+{
+	if ((!glo_kernel && !glo_bootfrom) ||
+	    (glo_kernel && glo_bootfrom)) {
+		screen("either -kernel or -bootfrom required, both together"
+			" are not allowed");
+		exit(1);
+	}
+	if (glo_kernel) {
+		log("Booting from kernel image file \"%s\"", glo_kernel);
+		return load_from_files() & 0x7fffffff;
+	} else {
+		log("Booting from disk \"%s\"", glo_bootfrom);
+		return load_from_disk(glo_bootfrom) & 0x7fffffff;
+	}
+}
diff -ruN empty/init/dump.c kuli/init/dump.c
--- empty/init/dump.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/init/dump.c	2008-06-02 15:56:04.000000000 +0200
@@ -0,0 +1,82 @@
+/*
+ * memory image related functions
+ * Copyright IBM Corp. 2007
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <linux/kvm.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <string.h>
+
+#include <kuli.h>
+
+unsigned long glo_memsize;
+
+static inline unsigned long long get_clock (void)
+{
+	unsigned long long clk;
+
+	__asm__("stck 0(%1)" : "=m" (clk) : "a" (&clk) : "cc");
+	return clk;
+}
+
+static inline void write_loop(void *data, size_t size, int fd)
+{
+	size_t written=0;
+	int rc;
+	while (written<size) {
+		rc = write (fd, data+written, size-written);
+		if (rc<0) {
+			perror ("cannot write to guest dump file");
+			exit (1);
+		}
+		written+=rc;
+	}
+}
+
+/*
+ * create dumpheader creates an lcrash compatible dump header
+ * this way we can actually debug suspended machines
+ */
+static void create_dumpheader(int fd)
+{
+	unsigned long header[512]={
+		0xa8190173618f23fdUL,	// dump magic
+		0x0000000300001000UL,	// version/header_size
+		0x0000000400001000UL,	// level/page size
+		glo_memsize,		// 3: mem_size
+		0,			// 4: mem_start
+		glo_memsize,		// 5: mem_end
+		(glo_memsize>>12)<<32,	// 6: num_pages in first 4 byte
+		get_clock(),		// 7: dump time
+		0,			// 8: cpuid
+		0x0000000200000000UL,	// arch=s390x,volume 0
+		0x0000000200000000UL,	// arch=s390x,volume 0
+		glo_memsize,		// 11:real mem size
+	};
+	write_loop (header, getpagesize(), fd);
+}
+
+void create_dumpfile(void)
+{
+	int fd;
+	char *name = glo_sysdump;
+
+	fd = open (name, O_CREAT|O_TRUNC|O_RDWR, S_IRWXU);
+	if (fd < 0) {
+		log ("unable to create dump file %s, errno %d", name, errno);
+		return;
+	}
+	create_dumpheader (fd);
+	write_loop(glo_origin, glo_memsize, fd);
+	close(fd);
+}
diff -ruN empty/init/kvm.c kuli/init/kvm.c
--- empty/init/kvm.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/init/kvm.c	2008-06-02 15:56:04.000000000 +0200
@@ -0,0 +1,128 @@
+/*
+ * kvm user interface related callbacks
+ * Copyright IBM Corp. 2007
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <linux/kvm.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <kuli.h>
+
+int glo_kvm_vm_fd;
+
+void kvm_create_vm() {
+	int devfd, vmfd, iorc;
+	struct kvm_userspace_memory_region memreg;
+	void* addr;
+
+	devfd = open ("/dev/kvm", O_RDWR);
+	if (devfd<0) {
+		screen ("failed to open /dev/kvm in read+write mode");
+		exit(1);
+	}
+
+	vmfd = ioctl (devfd, KVM_CREATE_VM, NULL);
+	if (vmfd<0) {
+		screen ("KVM_CREATE_VM ioctl failed with rc %d", vmfd);
+		close(devfd);
+		exit(1);
+	}
+	glo_kvm_vm_fd = vmfd;
+	close (devfd);
+	
+	//map guest memory
+	addr = mmap((void *) glo_origin, glo_memsize,
+		    PROT_READ | PROT_WRITE | PROT_EXEC,
+		    MAP_FIXED | MAP_SHARED | MAP_ANONYMOUS, -1 , 0);
+	if (addr == MAP_FAILED) {
+		screen ("cannot map guest memory");
+		exit (1);
+	}
+	memreg.slot            = 0;
+	memreg.flags           = 0;
+	memreg.guest_phys_addr = 0;
+	memreg.userspace_addr  = glo_origin;
+	memreg.memory_size     = glo_memsize;
+	iorc = ioctl(glo_kvm_vm_fd, KVM_SET_USER_MEMORY_REGION, &memreg);
+	if (iorc < 0) {
+		screen("KVM_SET_USER_MEMORY_REGION failed");
+		exit (1);
+	}
+}
+
+void kvm_run(struct vcpu *cpu)
+{
+	int rc;
+
+again:
+	rc = ioctl(cpu->cpufd, KVM_RUN, NULL);
+	if ((rc == -1) && (errno == EINTR) && (!glo_stopcpus))
+		goto again;
+	if (rc != 0)
+		report_it("sie: rc %d errno is %d", rc, errno);
+}
+
+void kvm_create_vcpu(struct vcpu* cpu)
+{
+	int rc;
+
+	rc = ioctl(glo_kvm_vm_fd, KVM_CREATE_VCPU, cpu->cpuno);
+	if (rc < 0) {
+		screen("KVM_CREATE_VCPU failed for virtual cpu %d with rc %d",
+			cpu->cpuno, rc);
+		exit(1);
+	}
+	cpu->cpufd = rc;
+}
+
+void kvm_s390_initial_reset(struct vcpu *cpu)
+{
+	if (ioctl(cpu->cpufd, KVM_S390_INITIAL_RESET, NULL) < 0)
+		report_it("KVM_S390_INITIAL_RESET failed for cpu %d with rc %d",
+			cpu->cpuno, errno);
+}
+
+void kvm_s390_interrupt(struct vcpu *cpu, struct kvm_s390_interrupt *kvmint)
+{
+	int rc; 
+	if (cpu)
+		rc = ioctl(cpu->cpufd, KVM_S390_INTERRUPT, kvmint);
+	else
+		rc = ioctl(glo_kvm_vm_fd, KVM_S390_INTERRUPT, kvmint);
+
+	if (rc<0)
+		report_it("cannot inject interrupt");
+}
+
+void kvm_s390_set_initial_psw(struct vcpu *cpu, psw_t psw)
+{
+	if (ioctl(cpu->cpufd, KVM_S390_SET_INITIAL_PSW, &psw))
+		report_it ("cannot set ipl psw");
+}
+
+void kvm_s390_store_status(struct vcpu *cpu, unsigned long addr)
+{
+	if (ioctl(cpu->cpufd, KVM_S390_STORE_STATUS, addr))
+		report_it("cannot inject store status request rc %d", errno);
+}
+
+void kvm_get_regs(struct vcpu *cpu, struct kvm_regs *regs)
+{
+	if (ioctl(cpu->cpufd, KVM_GET_REGS, regs))
+		report_it("call to KVM_GET_REGS failed\n");
+}
+
+void kvm_set_regs(struct vcpu *cpu, struct kvm_regs *regs)
+{
+	if (ioctl(cpu->cpufd, KVM_SET_REGS, regs))
+		report_it("ioctl SET_REGS failed with %d", errno);
+}
diff -ruN empty/init/Makefile kuli/init/Makefile
--- empty/init/Makefile	1970-01-01 01:00:00.000000000 +0100
+++ kuli/init/Makefile	2008-06-02 12:32:27.000000000 +0200
@@ -0,0 +1,12 @@
+# init makefile
+# Copyright IBM Corp. 2007
+# Author: Carsten Otte <cotte@de.ibm.com>
+# This file is licensed under the terms of the GNU General Public License(GPL)
+
+%.o : %.c ; echo "	Compiling	" $<; $(CC) $(CFLAGS) -c $<
+
+OBJS := dump.o kvm.o message.o options.o bootloader.o
+
+all: $(OBJS)
+clean:
+	rm -f *.o
diff -ruN empty/init/message.c kuli/init/message.c
--- empty/init/message.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/init/message.c	2008-06-02 12:32:27.000000000 +0200
@@ -0,0 +1,32 @@
+/*
+ * message logging
+ * Copyright IBM Corp. 2007
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+
+#include <kuli.h>
+
+FILE* glo_logfile;
+
+void init_logging(void)
+{
+	glo_logfile = fopen ("kuli.log", "w");
+	if (glo_logfile == NULL) {
+		perror ("cannot open log file for writing");
+		exit (1);
+	}
+	log ("KULI logging initialized");
+}
+
+void close_logging(void)
+{
+	fclose (glo_logfile);
+	glo_logfile = NULL;
+}
diff -ruN empty/init/options.c kuli/init/options.c
--- empty/init/options.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/init/options.c	2008-06-02 11:29:08.000000000 +0200
@@ -0,0 +1,121 @@
+/*
+ * kuli command line options
+ * Copyright IBM Corp. 2007,2008
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <unistd.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <kuli.h>
+
+char* glo_kernel;
+char* glo_bootfrom;
+char* glo_initrd;
+char* glo_parmfile;
+char* glo_append;
+char* glo_sysdump;
+
+static struct option long_options[] = {
+	{"append", 1, 0, 'a'},
+	{"bootfrom", 1, 0, 'b'},
+	{"smp", 1, 0, 'c'},
+	{"drive", 1, 0, 'd'},
+	{"help", 0, 0, 'h'},
+	{"initrd", 1, 0, 'i'},
+	{"kernel", 1, 0, 'k'},
+	{"megs", 1, 0, 'm'},
+	{"net", 1, 0, 'n'},
+	{"parmfile", 1, 0, 'p'},
+	{"sysdump", 1, 0, 's'},
+	{0, 0, 0, 0}
+};
+
+static void help_and_exit(void) {
+	screen ("usage: kuli <OPTIONS>");
+	screen ("where <OPTIONS> are:");
+	screen ("-a or -append to set the kernel parameters");
+	screen ("-b or -bootfrom to set the disk image for booting");
+	screen ("-c or -smp to set the amount of cpus");
+	screen ("-d or -drive to set the disk images to be used");
+	screen ("   Example: -drive file=/dev/mapper/guestroot");
+	screen ("-h or -help to see this message");
+	screen ("-i or -initrd to set the initrd image");
+	screen ("-k or -kernel to set the kernel image");
+	screen ("-m or -megs to set the guest memory in megs");
+	screen ("-n or -net to set the network connection");
+	screen ("   Example: -net tap,ifname=tap2");
+	screen ("-p or -parmfile to set the kernel parmfile");
+	screen ("-s or -sysdump to set the system dump image");
+	exit(1);
+}
+
+static char* __fill_string(int c, char *string, char* optarg) {
+	if (string) {
+		screen ("Option -%c presented twice", c);
+		help_and_exit();
+	}
+	string = malloc(strlen(optarg)+1);
+	strcpy (string, optarg);
+	return string;
+}
+
+void parse_options(int argc, char *argv[])
+{
+	int c;
+
+	glo_memsize=256<<20;
+
+	while (1) {
+		int this_option_optind = optind ? optind : 1;
+		int option_index = 0;
+
+		c = getopt_long_only(argc, argv, "hc:s:", long_options, &option_index);
+
+		if (c == -1) // no more getopt processable options?
+			break;
+		switch (c) {
+		case 'h':
+			help_and_exit();
+		case 'k':
+			glo_kernel = __fill_string(c, glo_kernel, optarg);
+			break;
+		case 'b':
+			glo_bootfrom = __fill_string(c, glo_bootfrom, optarg);
+			break;
+		case 'i':
+			glo_initrd = __fill_string(c, glo_initrd, optarg);
+			break;
+		case 'p':
+			glo_parmfile = __fill_string(c, glo_parmfile, optarg);
+			break;
+		case 'a':
+			glo_append = __fill_string(c, glo_append, optarg);
+			break;
+		case 's':
+			glo_sysdump = __fill_string(c, glo_sysdump, optarg);
+			break;
+		case 'n':
+			virtio_net_add(optarg);
+			break;
+		case 'd':
+			virtio_block_add(optarg);
+			break;
+		case 'm':
+			glo_memsize = strtoul(optarg, NULL, 10)<<20;
+			break;
+		case 'c':
+			glo_numcpu = strtoul(optarg, NULL, 10);
+			break;
+		default:
+			screen ("Option not understood: %s",argv[this_option_optind]);
+			help_and_exit();
+		}
+	}
+	if (optind != argc)
+		help_and_exit();
+}
diff -ruN empty/kuli.ld kuli/kuli.ld
--- empty/kuli.ld	1970-01-01 01:00:00.000000000 +0100
+++ kuli/kuli.ld	2008-06-02 11:23:33.000000000 +0200
@@ -0,0 +1,207 @@
+/* Script for -z combreloc: combine and sort reloc sections */
+OUTPUT_FORMAT("elf64-s390", "elf64-s390",
+	      "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+ENTRY(_start)
+SEARCH_DIR("/usr/s390x-linux/lib"); SEARCH_DIR("/usr/local/lib64"); SEARCH_DIR("/lib64"); SEARCH_DIR("/usr/lib64"); SEARCH_DIR("/usr/lib"); SEARCH_DIR("/usr/local/lib"); SEARCH_DIR("/lib");
+/* Do we need any of these for elf?
+   __DYNAMIC = 0;    */
+SECTIONS
+{
+  /* Read-only sections, merged into text segment: */
+  PROVIDE (__executable_start = 0x1ff00000000); . = 0x1ff00000000 + SIZEOF_HEADERS;
+  .interp         : { *(.interp) }
+  .hash           : { *(.hash) }
+  .dynsym         : { *(.dynsym) }
+  .dynstr         : { *(.dynstr) }
+  .gnu.version    : { *(.gnu.version) }
+  .gnu.version_d  : { *(.gnu.version_d) }
+  .gnu.version_r  : { *(.gnu.version_r) }
+  .rel.dyn        :
+    {
+      *(.rel.init)
+      *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*)
+      *(.rel.fini)
+      *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*)
+      *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*)
+      *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*)
+      *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*)
+      *(.rel.ctors)
+      *(.rel.dtors)
+      *(.rel.got)
+      *(.rel.sdata .rel.sdata.* .rel.gnu.linkonce.s.*)
+      *(.rel.sbss .rel.sbss.* .rel.gnu.linkonce.sb.*)
+      *(.rel.sdata2 .rel.sdata2.* .rel.gnu.linkonce.s2.*)
+      *(.rel.sbss2 .rel.sbss2.* .rel.gnu.linkonce.sb2.*)
+      *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*)
+    }
+  .rela.dyn       :
+    {
+      *(.rela.init)
+      *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+      *(.rela.fini)
+      *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+      *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+      *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+      *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+      *(.rela.ctors)
+      *(.rela.dtors)
+      *(.rela.got)
+      *(.rela.sdata .rela.sdata.* .rela.gnu.linkonce.s.*)
+      *(.rela.sbss .rela.sbss.* .rela.gnu.linkonce.sb.*)
+      *(.rela.sdata2 .rela.sdata2.* .rela.gnu.linkonce.s2.*)
+      *(.rela.sbss2 .rela.sbss2.* .rela.gnu.linkonce.sb2.*)
+      *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+    }
+  .rel.plt        : { *(.rel.plt) }
+  .rela.plt       : { *(.rela.plt) }
+  .init           :
+  {
+    KEEP (*(.init))
+  } =0x07070707
+  .plt            : { *(.plt) }
+  .text           :
+  {
+    *(.text .stub .text.* .gnu.linkonce.t.*)
+    /* .gnu.warning sections are handled specially by elf32.em.  */
+    *(.gnu.warning)
+  } =0x07070707
+  .fini           :
+  {
+    KEEP (*(.fini))
+  } =0x07070707
+  PROVIDE (__etext = .);
+  PROVIDE (_etext = .);
+  PROVIDE (etext = .);
+  .rodata         : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+  .rodata1        : { *(.rodata1) }
+  .sdata2         : { *(.sdata2 .sdata2.* .gnu.linkonce.s2.*) }
+  .sbss2          : { *(.sbss2 .sbss2.* .gnu.linkonce.sb2.*) }
+  .eh_frame_hdr : { *(.eh_frame_hdr) }
+  /* Adjust the address for the data segment.  We want to adjust up to
+     the same address within the page on the next page up.  */
+  . = ALIGN(0x1000) + (. & (0x1000 - 1));
+  /* Ensure the __preinit_array_start label is properly aligned.  We
+     could instead move the label definition inside the section, but
+     the linker would then create the section even if it turns out to
+     be empty, which isn't pretty.  */
+  . = ALIGN(64 / 8);
+  PROVIDE (__preinit_array_start = .);
+  .preinit_array     : { *(.preinit_array) }
+  PROVIDE (__preinit_array_end = .);
+  PROVIDE (__init_array_start = .);
+  .init_array     : { *(.init_array) }
+  PROVIDE (__init_array_end = .);
+  PROVIDE (__fini_array_start = .);
+  .fini_array     : { *(.fini_array) }
+  PROVIDE (__fini_array_end = .);
+  .data           :
+  {
+    *(.data .data.* .gnu.linkonce.d.*)
+    SORT(CONSTRUCTORS)
+  }
+  .data1          : { *(.data1) }
+  .tdata	  : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+  .tbss		  : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+  .eh_frame       : { KEEP (*(.eh_frame)) }
+  .gcc_except_table   : { *(.gcc_except_table) }
+  .dynamic        : { *(.dynamic) }
+  .ctors          :
+  {
+    /* gcc uses crtbegin.o to find the start of
+       the constructors, so we make sure it is
+       first.  Because this is a wildcard, it
+       doesn't matter if the user does not
+       actually link against crtbegin.o; the
+       linker won't look for a file to match a
+       wildcard.  The wildcard also means that it
+       doesn't matter which directory crtbegin.o
+       is in.  */
+    KEEP (*crtbegin*.o(.ctors))
+    /* We don't want to include the .ctor section from
+       from the crtend.o file until after the sorted ctors.
+       The .ctor section from the crtend file contains the
+       end of ctors marker and it must be last */
+    KEEP (*(EXCLUDE_FILE (*crtend*.o ) .ctors))
+    KEEP (*(SORT(.ctors.*)))
+    KEEP (*(.ctors))
+  }
+  .dtors          :
+  {
+    KEEP (*crtbegin*.o(.dtors))
+    KEEP (*(EXCLUDE_FILE (*crtend*.o ) .dtors))
+    KEEP (*(SORT(.dtors.*)))
+    KEEP (*(.dtors))
+  }
+  .jcr            : { KEEP (*(.jcr)) }
+  .got            : { *(.got.plt) *(.got) }
+  /* We want the small data sections together, so single-instruction offsets
+     can access them all, and initialized data all before uninitialized, so
+     we can shorten the on-disk segment size.  */
+  .sdata          :
+  {
+    *(.sdata .sdata.* .gnu.linkonce.s.*)
+  }
+  _edata = .;
+  PROVIDE (edata = .);
+  __bss_start = .;
+  .sbss           :
+  {
+    PROVIDE (__sbss_start = .);
+    PROVIDE (___sbss_start = .);
+    *(.dynsbss)
+    *(.sbss .sbss.* .gnu.linkonce.sb.*)
+    *(.scommon)
+    PROVIDE (__sbss_end = .);
+    PROVIDE (___sbss_end = .);
+  }
+  .bss            :
+  {
+   *(.dynbss)
+   *(.bss .bss.* .gnu.linkonce.b.*)
+   *(COMMON)
+   /* Align here to ensure that the .bss section occupies space up to
+      _end.  Align after .bss to ensure correct alignment even if the
+      .bss section disappears because there are no input sections.  */
+   . = ALIGN(64 / 8);
+  }
+  . = ALIGN(64 / 8);
+  _end = .;
+  PROVIDE (end = .);
+  /* Stabs debugging sections.  */
+  .stab          0 : { *(.stab) }
+  .stabstr       0 : { *(.stabstr) }
+  .stab.excl     0 : { *(.stab.excl) }
+  .stab.exclstr  0 : { *(.stab.exclstr) }
+  .stab.index    0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment       0 : { *(.comment) }
+  /* DWARF debug sections.
+     Symbols in the DWARF debugging sections are relative to the beginning
+     of the section so we begin them at 0.  */
+  /* DWARF 1 */
+  .debug          0 : { *(.debug) }
+  .line           0 : { *(.line) }
+  /* GNU DWARF 1 extensions */
+  .debug_srcinfo  0 : { *(.debug_srcinfo) }
+  .debug_sfnames  0 : { *(.debug_sfnames) }
+  /* DWARF 1.1 and DWARF 2 */
+  .debug_aranges  0 : { *(.debug_aranges) }
+  .debug_pubnames 0 : { *(.debug_pubnames) }
+  /* DWARF 2 */
+  .debug_info     0 : { *(.debug_info .gnu.linkonce.wi.*) }
+  .debug_abbrev   0 : { *(.debug_abbrev) }
+  .debug_line     0 : { *(.debug_line) }
+  .debug_frame    0 : { *(.debug_frame) }
+  .debug_str      0 : { *(.debug_str) }
+  .debug_loc      0 : { *(.debug_loc) }
+  .debug_macinfo  0 : { *(.debug_macinfo) }
+  /* SGI/MIPS DWARF 2 extensions */
+  .debug_weaknames 0 : { *(.debug_weaknames) }
+  .debug_funcnames 0 : { *(.debug_funcnames) }
+  .debug_typenames 0 : { *(.debug_typenames) }
+  .debug_varnames  0 : { *(.debug_varnames) }
+  /DISCARD/ : { *(.note.GNU-stack) }
+}
+
+
diff -ruN empty/main.c kuli/main.c
--- empty/main.c	1970-01-01 01:00:00.000000000 +0100
+++ kuli/main.c	2008-06-05 14:10:45.000000000 +0200
@@ -0,0 +1,74 @@
+/*
+ * kuli main component
+ * Copyright IBM Corp. 2008
+ * Authors: Carsten Otte <cotte@de.ibm.com>
+ *          Christian Borntraeger <cborntra@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <kuli.h>
+#include <vcpu.h>
+
+
+static void shutdown_req(int signal)
+{
+	if (signal != SIGINT)
+		report_it ("received illegal signal");
+
+	glo_stopcpus = 1;
+	stop_all_cpus();
+}
+
+static void dump_req(int signal)
+{
+	if (signal != SIGUSR1)
+		report_it ("received illegal signal");
+
+	if (!glo_sysdump)
+		glo_sysdump = "kuli.dump";
+	glo_stopcpus = 1;
+	stop_all_cpus();
+}
+
+static int do_ipl(void)
+{
+	uint64_t address;
+
+	log ("perparing for IPL");
+	kvm_create_vm();
+	init_cpus();
+	init_devices();
+	
+	address = load_ipl();
+	log("IPL from address %zX", address);
+	launch_cpu_ipl(address);
+
+	return 0;
+}
+
+/*
+ * main()
+ */
+int main(int argc, char* argv[])
+{
+	parse_options(argc, argv);
+	init_logging();
+	do_ipl();
+	signal(SIGINT, shutdown_req);
+	signal(SIGUSR1, dump_req);
+	wait_for_cpusdown();
+	if (glo_sysdump) {
+		screen("Taking a dump...");
+		signal(SIGINT, SIG_DFL);
+		create_dumpfile();
+		screen("FIXME SLEEPING 10 SECONDS TO ALLOW RETRIEVAL OF DBF");
+		sleep(10);
+	}
+	close_logging();
+	return 0;
+}
diff -ruN empty/Makefile kuli/Makefile
--- empty/Makefile	1970-01-01 01:00:00.000000000 +0100
+++ kuli/Makefile	2008-06-05 14:10:45.000000000 +0200
@@ -0,0 +1,61 @@
+# main makefile
+# Copyright IBM Corp. 2007,2008
+# Authors: Carsten Otte <cotte@de.ibm.com>
+#          Christian Borntraeger <cborntra@de.ibm.com>
+# This file is licensed under the terms of the GNU General Public License(GPL)
+
+%.o : %.c ; echo "	Compiling	" $<; $(CC) $(CFLAGS) -c $<
+
+CC	:= $(CROSS)gcc
+INCLUDE := -I$(PWD)/include -I$(KERNELDIR)/include
+CFLAGS	:= -Wall -Wno-missing-field-initializers -Wno-nonnull -W -D_GNU_SOURCE -m64 -fpic -ggdb $(INCLUDE)
+EXELDFLAGS := -Tkuli.ld -m64 -fpic -lpthread
+LDFLAGS := -m64
+ASFLAGS  := -Wall -m64
+GCCVERSION = `gcc -dumpversion`
+GCCLIB = /usr/lib/gcc/s390x-redhat-linux/$(GCCVERSION)
+#GCCLIB = /usr/lib64/gcc/s390x-suse-linux/$(GCCVERSION)
+SUBDIRS = init core drivers
+
+MAKEFLAGS+=--quiet
+
+export CC CFLAGS LDFLAGS ASFLAGS
+
+all: kuli
+
+kuli: subdirs main.o
+	echo "	Linking		" $@
+	$(CC) $(EXELDFLAGS) -nostartfiles -o kuli /usr/lib64/crt1.o /usr/lib64/crti.o $(GCCLIB)/crtbeginS.o main.o init/*.o core/*.o drivers/*.o $(GCCLIB)/crtendS.o /usr/lib64/crtn.o
+
+.PHONY: clean all subdirs mrproper tags TAGS $(SUBDIRS)
+
+subdirs: $(SUBDIRS)
+
+$(SUBDIRS):
+	$(MAKE) -C $@ all
+
+clean:
+	for dir in $(SUBDIRS); do $(MAKE) -C $$dir clean ; done
+	rm -f main.o kuli kuli.map
+
+TAGS: *.c init/*.c core/*.c drivers/*.c include/*.h 
+	etags $^
+
+tags: *.c init/*.c core/*.c drivers/*.c include/*.h
+	ctags $^
+
+mrproper: clean
+	find . -name "*~" |xargs -e rm -f
+	find . -name "#*#" |xargs -e rm -f
+	find . -name ".#*" |xargs -e rm -f
+	find . -name "*.orig" |xargs -e rm -f
+	find . -name "*.rej" |xargs -e rm -f	
+	find . -name "*.s" |xargs -e rm -f	
+	find . -name "*.i" |xargs -e rm -f	
+	rm -f TAGS tags
+
+install: kuli
+	install kuli /usr/bin
+
+uninstall:
+	rm -rf /usr/bin/kuli




^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2008-06-12 19:56 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-06 15:54 [RFC] kvm-s390: userspace snapshot Carsten Otte
2008-06-10  5:55 ` Oliver Paukstadt
2008-06-11 14:35   ` Christian Borntraeger
2008-06-11 20:53     ` Oliver Paukstadt
2008-06-11 22:14       ` Christian Borntraeger
2008-06-12  5:39         ` Oliver Paukstadt
2008-06-12 14:14           ` Christian Borntraeger
2008-06-12 19:56             ` Oliver Paukstadt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox