* [RFC] kvm-s390: userspace snapshot
@ 2008-06-06 15:54 Carsten Otte
2008-06-10 5:55 ` Oliver Paukstadt
0 siblings, 1 reply; 8+ messages in thread
From: Carsten Otte @ 2008-06-06 15:54 UTC (permalink / raw)
To: kvm
Cc: Avi Kivity, aliguori, jblunck, ihno, rvdheij, rusty,
oliver.paukstadt, Olaf Schnapper
This patch is a full snapshot of "kuli", our current userspace for kvm.
It is <3000 lines of code, and it contains a bootloader as well as
virtio backeds for console, network, and block. It's command line syntax
is same as the common kvm userspace, but not all options are supported.
See --help for details.
Our next step will be to get rid of the guest phys == user virt address
mapping, so that we can have the guest memory anywhere on a megabyte
boundary in userland. After that, we'd like to integrate this into the
common kvm userspace, it is not intended for customer production use but
is a good starting point to explore what kvm can look alike on s390.
Have fun reading
---
diff -ruN empty/core/cpu.c kuli/core/cpu.c
--- empty/core/cpu.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/core/cpu.c 2008-06-05 13:54:32.000000000 +0200
@@ -0,0 +1,392 @@
+/*
+ * guest cpu related functions
+ * Copyright IBM Corp. 2007,2008
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <linux/kvm.h>
+#include <unistd.h>
+
+#include <kuli.h>
+#include <vcpu.h>
+
+/* sigp order codes */
+#define SIGP_RESTART 0x06
+#define SIGP_STORE_STATUS_ADDR 0x0e
+#define SIGP_SET_ARCH 0x12
+
+static pthread_t *cputhreads[64];
+static unsigned long cpu_threads_map;
+static unsigned long cpu_running_map;
+static pthread_mutex_t cpu_map_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t cpu_map_upd_cond = PTHREAD_COND_INITIALIZER;
+static struct vcpu *vcpus[64];
+unsigned int glo_numcpu;
+int glo_stopcpus;
+
+static struct vcpu *get_cpuno(int i)
+{
+ struct vcpu *cpu = vcpus[i];
+ pthread_mutex_lock(&cpu->cpu_lock);
+ return cpu;
+}
+
+static void get_cpu(struct vcpu *cpu)
+{
+ pthread_mutex_lock(&cpu->cpu_lock);
+}
+
+static void put_cpu(struct vcpu *cpu)
+{
+ pthread_mutex_unlock(&cpu->cpu_lock);
+}
+
+/*
+ * mark a CPU in map
+ */
+static void cpumap_set(unsigned int cpuno, unsigned long *map)
+{
+ if (cpuno > 63 || cpuno > glo_numcpu)
+ report_it("trying to set invalid cpu");
+ pthread_mutex_lock(&cpu_map_lock);
+ *map |= 1UL<<cpuno;
+ pthread_cond_signal(&cpu_map_upd_cond);
+ pthread_mutex_unlock(&cpu_map_lock);
+}
+
+/*
+ * clear a CPU in map
+ */
+static void cpumap_clear(unsigned int cpuno, unsigned long *map)
+{
+ if (cpuno > 63 || cpuno > glo_numcpu)
+ report_it("trying to clear invalid cpu");
+ pthread_mutex_lock(&cpu_map_lock);
+ *map &= ~(1UL<<cpuno);
+ pthread_cond_signal(&cpu_map_upd_cond);
+ pthread_mutex_unlock(&cpu_map_lock);
+}
+
+/*
+ * wait for a map to reach zero/nonzero state
+ */
+static void __cpumap_wait(unsigned long *map, int event, unsigned long val)
+{
+ pthread_mutex_lock(&cpu_map_lock);
+ retry:
+ switch (event) {
+ case 0: /* zero */
+ if (*map != 0)
+ goto wait;
+ break;
+ case 1: /* at least one */
+ if (*map == 0)
+ goto wait;
+ break;
+ case 2: /* specific value */
+ if (*map != val)
+ goto wait;
+ break;
+ }
+ pthread_mutex_unlock(&cpu_map_lock);
+ return;
+ wait:
+ pthread_cond_wait(&cpu_map_upd_cond, &cpu_map_lock);
+ goto retry;
+}
+
+static void cpumap_wait_zero(unsigned long *map)
+{
+ __cpumap_wait(map, 0, 0);
+}
+
+static void cpumap_wait_one(unsigned long *map)
+{
+ __cpumap_wait(map, 1, 0);
+}
+
+static void cpumap_wait_val(unsigned long *map, unsigned long val)
+{
+ __cpumap_wait(map, 2, val);
+}
+
+/*
+ * interpretive execution loop
+ */
+static int __run_cpu(struct vcpu *cpu)
+{
+ int rc = 0;
+ while (rc == 0 && !glo_stopcpus) {
+ kvm_run(cpu);
+ switch (cpu->run->exit_reason) {
+ case KVM_EXIT_S390_SIEIC:
+ rc = handle_intercept(cpu);
+ break;
+ case KVM_EXIT_S390_RESET:
+ rc = handle_reset(cpu);
+ break;
+ default:
+ report_it("unsupported exit reason %d",
+ cpu->run->exit_reason);
+ }
+ }
+ return rc;
+}
+
+/*
+ * cpu thread
+ */
+static void *run_cpu(void *arg)
+{
+ struct vcpu *cpu = (struct vcpu *)arg;
+ sigset_t sigs;
+ int rc;
+ struct kvm_s390_interrupt kvmint;
+
+ get_cpu(cpu);
+
+ kvm_create_vcpu(cpu);
+ kvm_s390_initial_reset(cpu);
+
+ cpu->cpu_status = CPU_STATUS_STOPPED;
+ cpu->run = (struct kvm_run *) mmap(NULL, 2*getpagesize(),
+ PROT_READ|PROT_WRITE, MAP_SHARED, cpu->cpufd, 0);
+ if (cpu->run == MAP_FAILED) {
+ screen("failed to mmap cpu data for virtual cpu %d - rc %d",
+ cpu->cpuno, errno);
+ exit(1);
+ }
+ put_cpu(cpu);
+
+ /* block signals: SIGINT, SIGUSR1 */
+ sigemptyset(&sigs);
+ sigaddset(&sigs, SIGINT);
+ sigaddset(&sigs, SIGUSR1);
+ sigprocmask(SIG_BLOCK, &sigs, NULL);
+
+ get_cpu(cpu);
+ rc = 0;
+ cpumap_set(cpu->cpuno, &cpu_threads_map);
+ while (!rc && !glo_stopcpus) {
+ switch (cpu->cpu_status) {
+ case CPU_STATUS_RESTART:
+ kvmint.type = KVM_S390_RESTART;
+ kvm_s390_interrupt(cpu, &kvmint);
+ cpu->cpu_status = CPU_STATUS_RUNNING;
+ /* fall through */
+ case CPU_STATUS_RUNNING:
+ /* run work */
+ cpumap_set(cpu->cpuno, &cpu_running_map);
+ put_cpu(cpu);
+ log("cpu %d: activated, running work...", cpu->cpuno);
+ rc = __run_cpu(cpu);
+ if (rc == CPU_STATUS_STOPPED)
+ rc = 0;
+ get_cpu(cpu);
+ cpumap_clear(cpu->cpuno, &cpu_running_map);
+ cpu->cpu_status = CPU_STATUS_STOPPED;
+ /* fall through */
+ case CPU_STATUS_STOPPED:
+ rc = pthread_cond_wait(&cpu->cpu_sleeping,
+ &cpu->cpu_lock);
+ break;
+ default:
+ report_it("illegal cpu status %d", cpu->cpu_status);
+ }
+ }
+ kvm_s390_store_status(cpu, KVM_S390_STORE_STATUS_PREFIXED);
+ cpumap_clear(cpu->cpuno, &cpu_threads_map);
+ put_cpu(cpu);
+ return (void *)cpu;
+}
+
+/*
+ * after everything is set up proper, this function
+ * launches the ipl cpu and initiates IPL
+ */
+void launch_cpu_ipl(uint64_t address)
+{
+ psw_t ipl_psw;
+
+ struct vcpu *cpu = get_cpuno(0);
+ log("starting guest (ipl)");
+ ipl_psw.mask = 0x0000000180000000UL;
+ ipl_psw.addr = address;
+ kvm_s390_set_initial_psw(cpu, ipl_psw);
+ cpu->cpu_status = CPU_STATUS_RUNNING;
+ pthread_cond_signal(&cpu->cpu_sleeping);
+ put_cpu(cpu);
+}
+
+/*
+ * wait until all cpus are down. note that you gonna wait
+ * forever if you did not initiate cpus becoming down
+ */
+void wait_for_cpusdown(void)
+{
+ unsigned int i;
+ void *tr;
+ struct vcpu *cpu;
+
+ /* wait for cpus to start up, and shut down again */
+ cpumap_wait_one(&cpu_running_map);
+ cpumap_wait_zero(&cpu_running_map);
+ glo_stopcpus = 1;
+
+ for (i = 0; i < glo_numcpu; i++) {
+ cpu = get_cpuno(i);
+ pthread_cond_signal(&cpu->cpu_sleeping);
+ put_cpu(cpu);
+ }
+
+ for (i = 0; i < glo_numcpu; i++) {
+ if (cputhreads[i]) {
+ pthread_join(*cputhreads[i], &tr);
+ cputhreads[i] = NULL;
+ }
+ }
+}
+
+static void __stop_cpu(unsigned int cpuno)
+{
+ struct kvm_s390_interrupt kvmint;
+ struct vcpu *cpu;
+
+ if (cpuno >= glo_numcpu)
+ report_it("called for nonexistent cpu");
+ cpu = get_cpuno(cpuno);
+ kvm_s390_store_status(cpu, KVM_S390_STORE_STATUS_PREFIXED);
+ kvmint.type = KVM_S390_SIGP_STOP;
+ kvm_s390_interrupt(cpu, &kvmint);
+ put_cpu(cpu);
+}
+
+void stop_all_cpus(void)
+{
+ int i;
+ struct vcpu *cpu;
+
+ glo_stopcpus = 1;
+ for (i = 0; i < (int)glo_numcpu; i++) {
+ __stop_cpu(i);
+ cpu = get_cpuno(i);
+ pthread_cond_signal(&vcpus[i]->cpu_sleeping);
+ put_cpu(cpu);
+ }
+}
+
+/*
+ * init_cpus initializes all cpus, called on ipl
+ */
+void init_cpus(void)
+{
+ int rc;
+ struct vcpu *this_pu;
+ unsigned int this_id;
+
+ if (glo_numcpu > 64)
+ glo_numcpu = 64;
+
+ if (glo_numcpu < 1)
+ glo_numcpu = 1;
+
+ for (this_id = 0; this_id < glo_numcpu; this_id++) {
+ this_pu = malloc(sizeof(*this_pu));
+ /* set cpu number */
+ this_pu->cpuno = this_id;
+ /* add this pu to global cpus array */
+ vcpus[this_id] = this_pu;
+ /* init lock */
+ pthread_mutex_init(&this_pu->cpu_lock, NULL);
+ /* init cpu idle condition */
+ pthread_cond_init(&this_pu->cpu_sleeping, NULL);
+
+ cputhreads[this_id] = malloc(sizeof(pthread_t));
+ do {
+ rc = pthread_create(cputhreads[this_id], NULL,
+ run_cpu, vcpus[this_id]);
+ } while (rc == -EAGAIN);
+ }
+ if (glo_numcpu == 64)
+ cpumap_wait_val(&cpu_threads_map, -1ul);
+ else
+ cpumap_wait_val(&cpu_threads_map, (1ul<<glo_numcpu)-1);
+}
+
+void cpu_restart(unsigned int cpuno)
+{
+ struct vcpu *cpu = get_cpuno(cpuno);
+
+ cpu->cpu_status = CPU_STATUS_RESTART;
+ pthread_cond_signal(&cpu->cpu_sleeping);
+ put_cpu(cpu);
+}
+
+static uint64_t decode_base_displacement(struct vcpu *cpu,
+ struct kvm_regs *regs)
+{
+ uint64_t reg;
+
+ reg = cpu->run->s390_sieic.ipb >> 28;
+ /* register 0 is special*/
+ if (reg > 0)
+ reg = regs->gprs[reg];
+ reg += (cpu->run->s390_sieic.ipb & 0x0fff0000) >> 16;
+ return reg;
+}
+
+int handle_sigp(struct vcpu *cpu)
+{
+ struct kvm_regs regs;
+ uint64_t *status;
+ uint32_t parameter;
+ uint16_t cpu_addr;
+ uint8_t order_code;
+ uint8_t ipa1 = cpu->run->s390_sieic.ipa & 0x00ff;
+ struct vcpu *target;
+
+ kvm_get_regs(cpu, ®s);
+ status = ®s.gprs[(ipa1 & 0xf0)>>4];
+ parameter = ((ipa1 & 0xf0)>>4)%2 ?
+ *status : regs.gprs[((ipa1 & 0xf0)>>4)+1];
+ cpu_addr = regs.gprs[ipa1 & 0x0f];
+ order_code = decode_base_displacement(cpu, ®s);
+
+ switch (order_code) {
+ case SIGP_RESTART:
+ if (cpu_addr >= glo_numcpu)
+ goto no_cpu;
+ cpu_restart(cpu_addr);
+ setcc(cpu, 0);
+ return 0;
+ case SIGP_STORE_STATUS_ADDR:
+ if (cpu_addr >= glo_numcpu)
+ goto no_cpu;
+ parameter = parameter & 0x7ffffe00;
+ target = get_cpuno(cpu_addr);
+ kvm_s390_store_status(target, parameter);
+ put_cpu(target);
+ setcc(cpu, 0);
+ return 0;
+ case SIGP_SET_ARCH:
+ log("cpu %d: received SIGP_SET_ARCH, parameter %d",
+ cpu->cpuno, parameter & 0xff);
+ return CPU_STATUS_PANIC;
+ default:
+ log("cpu %d: unsupported sigp order code %d",
+ cpu->cpuno, order_code);
+ return CPU_STATUS_PANIC;
+ }
+no_cpu:
+ setcc(cpu, 3);
+ return 0;
+}
+
diff -ruN empty/core/instruction.c kuli/core/instruction.c
--- empty/core/instruction.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/core/instruction.c 2008-06-05 14:10:45.000000000 +0200
@@ -0,0 +1,63 @@
+/*
+ * instruction interceptions
+ * Copyright IBM Corp. 2007,2008
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <cborntra@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <string.h>
+
+#include <kuli.h>
+#include <vcpu.h>
+#include <virtio.h>
+
+#define DIAG_MAX 0x500
+
+static const intercept_handler_t priv_handlers[256] = {
+ [PRIV_SCLP_CALL] = sclp_service_call,
+};
+
+static intercept_handler_t diag_handlers[DIAG_MAX+1] = {
+ [DIAG_KVM_HYPERCALL] = handle_kvm_hypercall,
+};
+
+static int handle_priv(struct vcpu *cpu)
+{
+ uint8_t ipa1 = cpu->run->s390_sieic.ipa & 0x00ff;
+
+ if (priv_handlers[ipa1])
+ return priv_handlers[ipa1](cpu);
+ log("cpu %d: unknown privileged instruction b2%02x at addr %lx, "
+ "sending prog 1", cpu->cpuno, ipa1, cpu->run->s390_sieic.mask);
+ return enter_pgmcheck(cpu, 0x0001);
+}
+
+static int handle_diag(struct vcpu *cpu)
+{
+ int code = (cpu->run->s390_sieic.ipb & 0x0fff0000) >> 16;
+
+ if ((code <= DIAG_MAX) && (diag_handlers[code]))
+ return diag_handlers[code](cpu);
+ log("cpu %d: unknown diagnose %x at addr %lx, sending prog 1",
+ cpu->cpuno, code, cpu->run->s390_sieic.addr);
+ return enter_pgmcheck(cpu, 0x0001);
+}
+
+static const intercept_handler_t instruction_handlers[256] = {
+ [OPCODE_MAJOR_PRIV] = handle_priv,
+ [OPCODE_MAJOR_DIAG] = handle_diag,
+ [OPCODE_MAJOR_SIGP] = handle_sigp,
+};
+
+int handle_instruction(struct vcpu *cpu)
+{
+ unsigned int ipa0 = (cpu->run->s390_sieic.ipa & 0xff00) >> 8;
+
+ if (instruction_handlers[ipa0])
+ return instruction_handlers[ipa0](cpu);
+ log("cpu %d: unknown instruction %x at addr %lx, sending prog 1",
+ cpu->cpuno, cpu->run->s390_sieic.ipa, cpu->run->s390_sieic.addr);
+ return enter_pgmcheck(cpu, 0x0001);
+}
diff -ruN empty/core/intercept.c kuli/core/intercept.c
--- empty/core/intercept.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/core/intercept.c 2008-06-03 17:22:49.000000000 +0200
@@ -0,0 +1,91 @@
+/*
+ * sie intercept handling
+ * Copyright IBM Corp. 2007,2008
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <cborntra@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <linux/errno.h>
+#include <linux/kvm.h>
+#include <sys/time.h>
+
+#include <kuli.h>
+#include <vcpu.h>
+
+static int handle_should_not_happen(struct vcpu *cpu)
+{
+ log("PANIC:cpu %d caught unexpected intercept. Magic number: 0x%x",
+ cpu->cpuno, cpu->run->s390_sieic.icptcode);
+ return CPU_STATUS_PANIC;
+}
+
+static int handle_waitpsw(struct vcpu *cpu)
+{
+ if (cpu->run->s390_sieic.mask & 0x4300000000000000ul)
+ report_it("received enabled wait in userland\n");
+
+ log("cpu %d: entered disabled wait PSW at %lx",
+ cpu->cpuno, cpu->run->s390_sieic.addr);
+ return CPU_STATUS_STOPPED;
+}
+
+static int handle_software_intercept(struct vcpu *cpu)
+{
+ return 0;
+}
+
+static int handle_cpu_stop(struct vcpu *cpu)
+{
+ log("cpu %d: stopped at %lx", cpu->cpuno, cpu->run->s390_sieic.addr);
+ return CPU_STATUS_STOPPED;
+}
+
+static int handle_io_instruction(struct vcpu *cpu)
+{
+ setcc(cpu, 3);
+ return 0;
+}
+
+static const intercept_handler_t intercept_funcs[] = {
+ handle_should_not_happen, /* 0x00 */
+ handle_instruction, /* 0x04 */
+ handle_should_not_happen, /* 0x08 */
+ handle_should_not_happen, /* 0x0C */
+ handle_should_not_happen, /* 0x10 */
+ handle_should_not_happen, /* 0x14 */
+ handle_should_not_happen, /* 0x18 */
+ handle_waitpsw, /* 0x1C */
+ handle_should_not_happen, /* 0x20 */
+ handle_software_intercept, /* 0x24 */
+ handle_cpu_stop, /* 0x28 */
+ handle_should_not_happen, /* 0x2C */
+ handle_should_not_happen, /* 0x30 */
+ handle_should_not_happen, /* 0x34 */
+ handle_should_not_happen, /* 0x38 */
+ handle_should_not_happen, /* 0x3C */
+ handle_io_instruction, /* 0x40 */
+ handle_should_not_happen, /* 0x44 */
+ handle_should_not_happen /* 0x48 */
+};
+
+int handle_intercept(struct vcpu *cpu)
+{
+ if ((cpu->run->s390_sieic.icptcode & 3)
+ || (cpu->run->s390_sieic.icptcode > 0x48))
+ return handle_should_not_happen(cpu);
+ return intercept_funcs[(cpu->run->s390_sieic.icptcode)>>2](cpu);
+}
+
+int enter_pgmcheck(struct vcpu *cpu, uint16_t code)
+{
+ struct kvm_s390_interrupt kvmint;
+
+ kvmint.type = KVM_S390_PROGRAM_INT;
+ kvmint.parm = code;
+ kvm_s390_interrupt(cpu, &kvmint);
+ log("cpu: %d: sending program check %x",
+ cpu->cpuno, code);
+ return 0;
+}
diff -ruN empty/core/Makefile kuli/core/Makefile
--- empty/core/Makefile 1970-01-01 01:00:00.000000000 +0100
+++ kuli/core/Makefile 2008-06-05 14:10:45.000000000 +0200
@@ -0,0 +1,12 @@
+# core makefile
+# Copyright IBM Corp. 2007,2008
+# Author: Carsten Otte <cotte@de.ibm.com>
+# This file is licensed under the terms of the GNU General Public License(GPL)
+
+%.o : %.c ; echo " Compiling " $<; $(CC) $(CFLAGS) -c $<
+
+OBJS := cpu.o instruction.o intercept.o reset.o sclp.o
+
+all: $(OBJS)
+clean:
+ rm -f *.o
diff -ruN empty/core/reset.c kuli/core/reset.c
--- empty/core/reset.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/core/reset.c 2008-06-04 10:07:54.000000000 +0200
@@ -0,0 +1,86 @@
+/*
+ * reboot/reset related functions
+ * Copyright (C) IBM Corp. 2007,2008
+ * Author: Christian Borntraeger <cborntra@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <kuli.h>
+#include <linux/ptrace.h>
+#include <linux/kvm.h>
+
+#include <string.h>
+#include <list.h>
+#include <vcpu.h>
+
+static struct list reset_handlers = {&reset_handlers, &reset_handlers};
+
+/*
+ * Erases the guest memory
+ */
+static void reset_mem(void)
+{
+ memset(glo_origin, 0, glo_origin + glo_memsize);
+}
+
+/*
+ * Calls all reset callbacks
+ */
+static int reset_callbacks(void)
+{
+ struct reset_call_register *reset;
+ int rc = 0;
+
+ list_iterate(reset, &reset_handlers, head) {
+ rc = reset->handler();
+ if (rc)
+ break;
+ }
+ return rc;
+}
+
+int handle_reset(struct vcpu *cpu)
+{
+ psw_t ipl_psw;
+ struct kvm_regs regs;
+
+ if (cpu->run->s390_reset_flags & KVM_S390_RESET_SUBSYSTEM) {
+ reset_callbacks();
+ cpu->run->s390_reset_flags &= ~KVM_S390_RESET_SUBSYSTEM;
+ }
+
+ if (cpu->run->s390_reset_flags & KVM_S390_RESET_CLEAR) {
+ reset_mem();
+ cpu->run->s390_reset_flags &= ~KVM_S390_RESET_CLEAR;
+ }
+
+ if (cpu->run->s390_reset_flags & KVM_S390_RESET_CPU_INIT) {
+ kvm_s390_initial_reset(cpu);
+ cpu->run->s390_reset_flags &= ~KVM_S390_RESET_CPU_INIT;
+ memset(regs.gprs, 0, 16 * sizeof(unsigned long));
+ kvm_set_regs(cpu, ®s);
+ }
+
+ if (cpu->run->s390_reset_flags & KVM_S390_RESET_IPL) {
+ ipl_psw.mask = 0x0000000180000000UL;
+ ipl_psw.addr = load_ipl();
+ kvm_s390_set_initial_psw(cpu, ipl_psw);
+ cpu->run->s390_reset_flags &= ~KVM_S390_RESET_IPL;
+ }
+
+ if (cpu->run->s390_reset_flags)
+ report_it("usupported reset flag %lx, exit",
+ cpu->run->s390_reset_flags);
+
+ return 0;
+}
+
+/*
+ * registers a callback for reset
+ */
+void register_reset_handler(struct reset_call_register *reset)
+{
+ list_add(&reset->head, &reset_handlers);
+}
+
diff -ruN empty/core/sclp.c kuli/core/sclp.c
--- empty/core/sclp.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/core/sclp.c 2008-06-05 14:10:45.000000000 +0200
@@ -0,0 +1,68 @@
+/*
+ * service call instruction interceptions
+ * Copyright IBM Corp. 2007,2008
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <linux/kvm.h>
+
+#include <kuli.h>
+#include <vcpu.h>
+
+struct sccb_scp_read_info {
+ uint16_t length;
+ uint8_t function_code;
+ uint8_t control_mask[3];
+ uint16_t response_code;
+ uint16_t mem_code;
+ uint8_t increment;
+} __attribute__((packed));
+
+int scp_read_info(struct vcpu *cpu, uint32_t _sccb)
+{
+ struct sccb_scp_read_info *sccb = (void *)(_sccb + glo_origin);
+ struct kvm_s390_interrupt kvmint;
+
+ sccb->mem_code = glo_memsize>>20;
+ sccb->increment = 1;
+ sccb->response_code = 0x10;
+ setcc(cpu, 0);
+
+ kvmint.type = KVM_S390_INT_SERVICE;
+ kvmint.parm = _sccb & ~3;
+ kvm_s390_interrupt(NULL, &kvmint);
+ return 0;
+}
+
+int sclp_service_call(struct vcpu *cpu)
+{
+ struct kvm_regs regs;
+ uint64_t sccb;
+ uint64_t code;
+ uint16_t ipbh0 = (cpu->run->s390_sieic.ipb & 0xffff0000) >> 16;
+
+ kvm_get_regs(cpu, ®s);
+ sccb = regs.gprs[ipbh0 & 0xf];
+ code = regs.gprs[(ipbh0 & 0xf0) >> 4];
+
+ // FIXME: check_region(cpu, addr, addr + length - 1);
+ if (sccb & ~0x7ffffff8ul) {
+ log("cpu %d: invalid sccb address 0x%lx", cpu->cpuno, sccb);
+ goto out;
+ }
+ switch(code) {
+ case 0x00020001:
+ case 0x00120001:
+ return scp_read_info(cpu, sccb);
+ default:
+ log("cpu %d: unknown sclp service call 0x%lx, sccb 0x%lx,"
+ "addr 0x%lx", cpu->cpuno, code, sccb,
+ cpu->run->s390_sieic.addr);
+ }
+out:
+ setcc(cpu, 3);
+ return 0;
+}
diff -ruN empty/drivers/devices.c kuli/drivers/devices.c
--- empty/drivers/devices.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/drivers/devices.c 2008-06-04 13:29:17.000000000 +0200
@@ -0,0 +1,11 @@
+#include <kuli.h>
+
+void init_devices(void)
+{
+ virtio_init();
+ virtio_console_init();
+ virtio_rng_init();
+ virtio_block_init();
+ virtio_net_init();
+}
+
diff -ruN empty/drivers/Makefile kuli/drivers/Makefile
--- empty/drivers/Makefile 1970-01-01 01:00:00.000000000 +0100
+++ kuli/drivers/Makefile 2008-06-04 13:29:17.000000000 +0200
@@ -0,0 +1,14 @@
+# diag makefile
+# Copyright IBM Corp. 2007,2008
+# Author: Carsten Otte <cotte@de.ibm.com>
+# This file is licensed under the terms of the GNU General Public License(GPL)
+
+%.o : %.c ; echo " Compiling " $<; $(CC) $(CFLAGS) -c $<
+
+OBJS := devices.o virtio.o virtio_net.o virtio_blk.o virtio_rng.o virtio_console.o
+
+all: $(OBJS)
+clean:
+ rm -f *.o
+
+.PHONY: clean all
diff -ruN empty/drivers/virtio_blk.c kuli/drivers/virtio_blk.c
--- empty/drivers/virtio_blk.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/drivers/virtio_blk.c 2008-06-04 10:07:54.000000000 +0200
@@ -0,0 +1,228 @@
+/*
+ * virtio, based on Rusty Russel's descriptor based approach
+ * Copyright IBM Corp. 2007,2008
+ * Authors: Martin Peschke <mp3@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <kuli.h>
+#include <list.h>
+#include <vcpu.h>
+
+#include <virtio.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_blk.h>
+
+static struct list vblk_list = EMPTY_LIST(vblk_list);
+
+/* This hangs off device->priv. */
+struct vblk_info {
+ struct list head;
+
+ char *filename;
+ int fd;
+
+ /* The size of the file. */
+ off64_t len;
+
+ /* service thread */
+ pthread_t thread;
+ pthread_cond_t cond;
+ pthread_mutex_t mutex;
+};
+
+
+static int service_io(struct device *dev)
+{
+ struct vblk_info *vblk = dev->priv;
+ unsigned int head, out_num, in_num, wlen;
+ int ret;
+ __u8 *in;
+ struct virtio_blk_outhdr *out;
+ struct iovec iov[dev->vq->vring.num];
+ off64_t off;
+
+ /* See if there's a request waiting. If not, nothing to do. */
+ head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
+ if (head == dev->vq->vring.num)
+ return 0;
+
+ if (out_num == 0 || in_num == 0)
+ errx(1, "Bad virtblk cmd %u out=%u in=%u",
+ head, out_num, in_num);
+
+ out = convert(&iov[0], struct virtio_blk_outhdr);
+ in = convert(&iov[out_num+in_num-1], __u8);
+ off = out->sector * 512;
+
+ if (out->type & VIRTIO_BLK_T_BARRIER)
+ fdatasync(vblk->fd);
+
+ if (out->type & VIRTIO_BLK_T_SCSI_CMD) {
+ fprintf(stderr, "Scsi commands unsupported\n");
+ *in = VIRTIO_BLK_S_UNSUPP;
+ wlen = sizeof(*in);
+ } else if (out->type & VIRTIO_BLK_T_OUT) {
+ /* Write */
+
+ if (lseek64(vblk->fd, off, SEEK_SET) != off)
+ err(1, "Bad seek to sector %llu",
+ (unsigned long long) out->sector);
+
+ ret = writev(vblk->fd, iov+1, out_num-1);
+
+ if (ret > 0 && off + ret > vblk->len) {
+ ftruncate64(vblk->fd, vblk->len);
+ errx(1, "Write past end %llu+%u",
+ (unsigned long long)off, ret);
+ }
+ wlen = sizeof(*in);
+ *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
+ } else {
+ /* Read */
+ if (lseek64(vblk->fd, off, SEEK_SET) != off)
+ err(1, "Bad seek to sector %lu", out->sector);
+
+ ret = readv(vblk->fd, iov+1, in_num-1);
+ if (ret >= 0) {
+ wlen = sizeof(*in) + ret;
+ *in = VIRTIO_BLK_S_OK;
+ } else {
+ wlen = sizeof(*in);
+ *in = VIRTIO_BLK_S_IOERR;
+ }
+ }
+
+ add_used(dev->vq, head, wlen);
+ return 1;
+}
+
+/* This is the thread which actually services the I/O. */
+static void *blk_io_thread(void *_dev)
+{
+ struct device *dev = _dev;
+ struct vblk_info *vblk = dev->priv;
+ sigset_t sigs;
+
+ /* block signals: SIGINT, SIGUSR1 */
+ sigemptyset(&sigs);
+ sigaddset(&sigs, SIGINT);
+ sigaddset(&sigs, SIGUSR1);
+ sigprocmask(SIG_BLOCK, &sigs, NULL);
+
+ while (!glo_stopcpus) {
+ while (service_io(dev)) {
+ /* It did some work, so trigger the irq. */
+ trigger_irq(dev->vq);
+ }
+ pthread_testcancel();
+ pthread_mutex_lock(&vblk->mutex);
+ /* See if there's was a request coming in, otherwise sleep */
+ if (dev->vq->vring.avail->idx == dev->vq->last_avail_idx)
+ pthread_cond_wait(&vblk->cond, &vblk->mutex);
+ pthread_mutex_unlock(&vblk->mutex);
+ pthread_testcancel();
+ }
+ pthread_exit(NULL);
+ return NULL;
+}
+
+/* When the Guest submits some I/O, we just need to wake the I/O thread. */
+static void handle_virtblk_output(struct virtqueue *vq)
+{
+ struct vblk_info *vblk = vq->dev->priv;
+ int ret;
+
+ pthread_mutex_lock(&vblk->mutex);
+ ret = pthread_cond_signal(&vblk->cond);
+ pthread_mutex_unlock(&vblk->mutex);
+ if (ret)
+ report_it("could not signal I/O thread, error: %d", ret);
+}
+
+
+static int blk_handle_activate(struct device *dev)
+{
+ return 0;
+}
+
+static void virtio_blk_setup(struct vblk_info *vblk)
+{
+ struct device *dev;
+ struct virtio_blk_config conf;
+ struct stat stat;
+
+ dev = new_device(VIRTIO_ID_BLOCK);
+ dev->activate = blk_handle_activate;
+
+ add_virtqueue(dev, VIRTQUEUE_NUM, handle_virtblk_output);
+
+ dev->priv = vblk;
+
+ vblk->fd = open(vblk->filename, O_RDWR|O_LARGEFILE);
+ if (vblk->fd == -1) {
+ log("trying to open %s readonly", vblk->filename);
+ vblk->fd = open(vblk->filename, O_RDONLY | O_LARGEFILE);
+ add_feature(dev, VIRTIO_BLK_F_RO);
+ }
+ if (vblk->fd == -1)
+ err(1, "Failed to open block device %s", vblk->filename);
+
+ vblk->len = lseek64(vblk->fd, 0, SEEK_END);
+
+ fstat(vblk->fd, &stat);
+ add_feature(dev, VIRTIO_BLK_F_BARRIER);
+ conf.capacity = vblk->len / 512;
+ add_feature(dev, VIRTIO_BLK_F_SEG_MAX);
+ add_feature(dev, VIRTIO_BLK_F_BLK_SIZE);
+ conf.seg_max = VIRTQUEUE_NUM - 2;
+ conf.blk_size = stat.st_blksize;
+
+ set_config(dev, sizeof(conf), &conf);
+
+ pthread_cond_init(&vblk->cond, NULL);
+ pthread_mutex_init(&vblk->mutex, NULL);
+ if (pthread_create(&vblk->thread, NULL, blk_io_thread, dev))
+ err(1, "Creating clone for blk");
+}
+
+
+
+/* Add new device by name*/
+void virtio_block_add(char *parm)
+{
+ struct vblk_info *vblk;
+
+ char filename[255];
+
+ if (sscanf(parm, " file = %254s ", filename) != 1) {
+ printf("Error in %s. Use -drive file=<xxx>\n", parm);
+ exit(1);
+ }
+
+ vblk = malloc(sizeof(*vblk));
+ vblk->filename = malloc(strlen(filename) + 1);
+ strcpy(vblk->filename, filename);
+ list_add_end(&vblk->head, &vblk_list);
+
+}
+
+/* initialize all devices */
+void virtio_block_init(void)
+{
+ struct vblk_info *vblk;
+
+ list_iterate(vblk, &vblk_list, head)
+ virtio_blk_setup(vblk);
+}
+
diff -ruN empty/drivers/virtio.c kuli/drivers/virtio.c
--- empty/drivers/virtio.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/drivers/virtio.c 2008-06-04 13:29:17.000000000 +0200
@@ -0,0 +1,400 @@
+/*
+ * virtio, based on Rusty Russel's descriptor based approach
+ * Copyright IBM Corp. 2007, 2008
+ * Authors: Martin Peschke <mp3@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+
+#include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
+#include <linux/kvm.h>
+#include <asm/kvm_virtio.h>
+
+#include <virtio.h>
+
+/* The list of Guest devices, based on command line arguments. */
+static struct list devices;
+static __u8 *descriptors;
+
+static unsigned long guest_limit;
+static unsigned long guest_max;
+
+static inline void *from_guest_phys(unsigned long addr)
+{
+ return (void *) glo_origin + addr;
+}
+
+static inline unsigned long to_guest_phys(const void *addr)
+{
+ return (addr - (void *) glo_origin);
+}
+
+/* The device virtqueue descriptors are followed by feature bitmasks. */
+static __u8 *device_features(struct device *dev)
+{
+ return (__u8 *)(dev->desc + 1)
+ + dev->desc->num_vq * sizeof(struct kvm_vqconfig);
+}
+
+static __u8 *device_configspace(const struct device *dev)
+{
+ return (void *)(dev->desc + 1)
+ + dev->desc->num_vq * sizeof(struct kvm_vqconfig)
+ + dev->desc->feature_len * 2;
+}
+
+static unsigned next_desc(struct virtqueue *vq, unsigned int i)
+{
+ unsigned int next;
+
+ if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT))
+ return vq->vring.num;
+
+ next = vq->vring.desc[i].next;
+ /* Make sure compiler knows to grab that: we don't want it changing! */
+ wmb();
+
+ if (next >= vq->vring.num)
+ errx(1, "Desc next is %u", next);
+
+ return next;
+}
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access. Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->vring.num (which
+ * is never a valid descriptor number) if none was found. */
+unsigned get_vq_desc(struct virtqueue *vq,
+ struct iovec iov[],
+ unsigned int *out_num, unsigned int *in_num)
+{
+ unsigned int i, head;
+
+ if ((__u16)(vq->vring.avail->idx - vq->last_avail_idx) > vq->vring.num)
+ errx(1, "Guest moved used index from %u to %u",
+ vq->last_avail_idx, vq->vring.avail->idx);
+
+ if (vq->vring.avail->idx == vq->last_avail_idx)
+ return vq->vring.num;
+
+ head = vq->vring.avail->ring[vq->last_avail_idx++ % vq->vring.num];
+
+ if (head >= vq->vring.num)
+ errx(1, "Guest says index %u is available", head);
+
+ *out_num = *in_num = 0;
+
+ i = head;
+ do {
+ if (vq->vring.desc[i].addr > guest_limit ||
+ vq->vring.desc[i].addr + vq->vring.desc[i].len
+ > guest_limit) {
+ errx(1, "invalid descriptor %d addr: %lX len: %X",
+ i, vq->vring.desc[i].addr,
+ vq->vring.desc[i].len);
+ }
+ iov[*out_num + *in_num].iov_len = vq->vring.desc[i].len;
+ iov[*out_num + *in_num].iov_base = from_guest_phys(vq->vring.
+ desc[i].addr);
+
+ if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE)
+ (*in_num)++;
+ else {
+ if (*in_num)
+ errx(1, "Descriptor has out after in");
+ (*out_num)++;
+ }
+
+ /* If we've got too many, that implies a descriptor loop. */
+ if (*out_num + *in_num > vq->vring.num)
+ errx(1, "Looped descriptor");
+ } while ((i = next_desc(vq, i)) != vq->vring.num);
+
+ return head;
+}
+
+void add_used(struct virtqueue *vq, unsigned int head, int len)
+{
+ struct vring_used_elem *used;
+
+ used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
+ used->id = head;
+ used->len = len;
+ wmb();
+ vq->vring.used->idx++;
+}
+
+/* This actually sends the interrupt for this virtqueue */
+void trigger_irq(struct virtqueue *vq)
+{
+ struct kvm_s390_interrupt kvmint;
+
+ if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+ return;
+
+ kvmint.type = KVM_S390_INT_VIRTIO;
+ kvmint.parm = 0;
+ kvmint.parm64 = vq->config->token;
+
+ kvm_s390_interrupt(NULL, &kvmint);
+}
+
+void add_used_and_trigger(struct virtqueue *vq,
+ unsigned int head, int len)
+{
+ add_used(vq, head, len);
+ trigger_irq(vq);
+}
+
+/* Get some more pages for a device. */
+void *get_pages(unsigned int num)
+{
+ void *addr = (void *) guest_limit;
+ if (MAP_FAILED == mmap(addr, getpagesize()*num,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_FIXED | MAP_SHARED | MAP_ANONYMOUS,
+ -1, 0)) {
+ log("Failed to allocate memory");
+ return NULL;
+ }
+ log("map guest memory at %p (pfn: %lu)", addr,
+ ((unsigned long) addr) / getpagesize());
+ guest_limit += num * getpagesize();
+ return (void *)addr;
+}
+
+static void __reset_device(struct device *dev)
+{
+ struct virtqueue *vq;
+
+ if (dev->deactivate)
+ dev->deactivate(dev);
+ dev->desc->status = 0;
+
+ memset(device_features(dev) + dev->desc->feature_len, 0,
+ dev->desc->feature_len);
+
+ for (vq = dev->vq; vq; vq = vq->next) {
+ memset(vq->vring.desc, 0,
+ vring_size(vq->vring.num, getpagesize()));
+ vq->last_avail_idx = 0;
+ }
+}
+
+static void virtio_reset_device(unsigned long addr)
+{
+ struct device *dev;
+
+ list_iterate(dev, &devices, head) {
+ if (from_guest_phys(addr) == dev->desc) {
+ __reset_device(dev);
+ return;
+ }
+ }
+}
+
+/* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */
+static void virtio_handle_notify(unsigned long addr)
+{
+ struct device *dev;
+ struct virtqueue *vq;
+
+ list_iterate(dev, &devices, head) {
+ for (vq = dev->vq; vq; vq = vq->next) {
+ if (vq->config->address != addr)
+ continue;
+
+ if (dev->desc->status == 0) {
+ warnx("%p gave early output", dev->desc);
+ return;
+ }
+
+ if (vq->handle_output)
+ vq->handle_output(vq);
+ return;
+ }
+ }
+ /* Early console write is done using notify on a nul-terminated string
+ * in Guest memory. */
+ if (addr >= guest_limit)
+ errx(1, "Bad NOTIFY %#lx", addr);
+
+ write(STDOUT_FILENO, from_guest_phys(addr),
+ strnlen(from_guest_phys(addr), guest_limit - addr));
+
+}
+
+static int virtio_handle_set_status(unsigned long addr)
+{
+ struct device *dev;
+
+ list_iterate(dev, &devices, head) {
+ if (from_guest_phys(addr) == dev->desc &&
+ dev->desc->status & VIRTIO_CONFIG_S_DRIVER)
+ return dev->activate(dev);
+ }
+ return ENODEV;
+}
+
+/* This routine allocates a new "struct kvm_device_desc" from descriptor
+ * table page just above the Guest's normal memory. It returns a pointer to
+ * that descriptor. */
+struct kvm_device_desc *new_dev_desc(__u16 type)
+{
+
+ struct kvm_device_desc d = { .type = type };
+ void *p;
+
+ if (list_is_empty(&devices))
+ p = descriptors;
+ else {
+ struct device *dev;
+ dev = list_get(devices.next, struct device, head);
+ p = device_configspace(dev) + dev->desc->config_len;
+ }
+ if (p + sizeof(d) > (void *) descriptors + getpagesize())
+ errx(1, "Too many devices");
+
+ return memcpy(p, &d, sizeof(d));
+}
+
+/* Each device descriptor is followed by the description of its virtqueues. We
+ * specify how many descriptors the virtqueue is to have. */
+void add_virtqueue(struct device *dev, unsigned int num_descs,
+ void (*handle_output)(struct virtqueue *me))
+{
+ unsigned int pages;
+ struct virtqueue **i, *vq = malloc(sizeof(*vq));
+ void *p;
+
+ pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1)
+ / getpagesize();
+ p = get_pages(pages);
+
+ vq->next = NULL;
+ vq->last_avail_idx = 0;
+ vq->dev = dev;
+
+ vring_init(&vq->vring, num_descs, p, getpagesize());
+
+ assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0);
+
+ vq->config = (struct kvm_vqconfig *) device_configspace(dev);
+ vq->config->num = num_descs;
+ vq->config->address = to_guest_phys(p);
+
+ dev->desc->num_vq++;
+
+ for (i = &dev->vq; *i; i = &(*i)->next);
+ *i = vq;
+
+ vq->handle_output = handle_output;
+
+ if (!handle_output)
+ vq->vring.used->flags = VRING_USED_F_NO_NOTIFY;
+}
+
+void add_feature(struct device *dev, unsigned bit)
+{
+ __u8 *features;
+
+ if (dev->desc->feature_len <= bit / CHAR_BIT) {
+ assert(dev->desc->config_len == 0);
+ dev->desc->feature_len = (bit / CHAR_BIT) + 1;
+ }
+
+ features = (__u8 *)(dev->desc + 1)
+ + dev->desc->num_vq * sizeof(struct kvm_vqconfig);
+
+ features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT));
+}
+
+/* This routine sets the configuration fields for an existing device's
+ * descriptor. It only works for the last device, but that's OK because that's
+ * how we use it. */
+void set_config(struct device *dev, unsigned len, const void *conf)
+{
+ if (device_configspace(dev) + len > descriptors + getpagesize())
+ errx(1, "Too many devices");
+
+ memcpy(device_configspace(dev), conf, len);
+ dev->desc->config_len = len;
+}
+
+/* This routine does all the creation and setup of a new device, including
+ * calling new_dev_desc() to allocate the descriptor and device memory. */
+struct device *new_device(__u16 type)
+{
+ struct device *dev = malloc(sizeof(*dev));
+
+ dev->desc = new_dev_desc(type);
+ dev->vq = NULL;
+
+ list_add(&dev->head, &devices);
+ return dev;
+}
+
+/*
+ * Entry point for the kvm hypercall
+ */
+int handle_kvm_hypercall(struct vcpu *cpu)
+{
+ unsigned long mem;
+ struct kvm_regs regs;
+
+ kvm_get_regs(cpu, ®s);
+ mem = regs.gprs[2] + glo_origin;
+ switch (regs.gprs[1]) {
+ case KVM_S390_VIRTIO_RESET:
+ virtio_reset_device(mem);
+ regs.gprs[2] = 0;
+ break;
+ case KVM_S390_VIRTIO_NOTIFY:
+ virtio_handle_notify(mem);
+ regs.gprs[2] = 0;
+ break;
+ case KVM_S390_VIRTIO_SET_STATUS:
+ virtio_handle_set_status(mem);
+ regs.gprs[2] = 0;
+ break;
+ default:
+ regs.gprs[2] = -EINVAL;
+ break;
+ }
+ kvm_set_regs(cpu, ®s);
+ return 0;
+}
+
+static int virtio_reset_handler(void)
+{
+ struct device *dev;
+
+ list_iterate(dev, &devices, head)
+ __reset_device(dev);
+ return 0;
+}
+
+static struct reset_call_register virtio_reset_register = {
+ .handler = virtio_reset_handler,
+};
+
+void virtio_init(void)
+{
+ guest_limit = glo_memsize;
+ guest_max = glo_memsize + DEVICE_PAGES*getpagesize();
+ list_init(&devices);
+ descriptors = get_pages(1);
+ register_reset_handler(&virtio_reset_register);
+}
diff -ruN empty/drivers/virtio_console.c kuli/drivers/virtio_console.c
--- empty/drivers/virtio_console.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/drivers/virtio_console.c 2008-06-04 13:29:17.000000000 +0200
@@ -0,0 +1,176 @@
+/*
+ * Copyright IBM Corp. 2008
+ * Authors: Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+#include <err.h>
+#include <stdlib.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <string.h>
+#include <termios.h>
+
+#include <kuli.h>
+#include <virtio.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_console.h>
+
+struct vcon_info
+{
+ int fd;
+
+ /* service thread */
+ pthread_t thread;
+ pthread_cond_t cond;
+ pthread_mutex_t mutex;
+
+};
+
+static struct vcon_info *vcon;
+
+static void handle_from_guest(struct virtqueue *vq)
+ {
+ unsigned int head, out, in;
+ int len;
+ struct iovec iov[vq->vring.num];
+
+ /* Keep getting output buffers from the Guest until we run out. */
+ while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
+ if (in)
+ errx(1, "Input buffers in output queue?");
+ len = writev(vcon->fd, iov, out);
+ add_used_and_trigger(vq, head, len);
+ }
+}
+
+static int vcon_work(struct device *dev)
+{
+ unsigned int head, in_num, out_num;
+ int len;
+ struct iovec iov[dev->vq->vring.num];
+ struct pollfd pfd;
+
+ /* wait until input arrives */
+ pfd.fd = vcon->fd;
+ pfd.events = POLLIN;
+ pfd.revents = 0;
+ poll(&pfd, 1, -1);
+
+ head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
+ if (head == dev->vq->vring.num)
+ return 0;
+ else if (out_num)
+ errx(1, "Output buffers in network recv queue?");
+
+retry:
+ len = readv(vcon->fd, iov, in_num);
+ if (len < 0) {
+ if (errno==EINTR)
+ goto retry;
+ err(1, "reading console");
+ }
+
+ /* Tell the Guest about the new packet. */
+ add_used_and_trigger(dev->vq, head, len);
+
+ /* All good. */
+ return 1;
+}
+
+
+/* This is the thread which actually services the I/O. */
+static void *vcon_io_thread(void *_dev)
+{
+ struct device *dev = _dev;
+
+ while (!glo_stopcpus) {
+ pthread_mutex_lock(&vcon->mutex);
+ if (dev->vq->vring.avail->idx == dev->vq->last_avail_idx)
+ pthread_cond_wait(&vcon->cond, &vcon->mutex);
+ pthread_mutex_unlock(&vcon->mutex);
+ while (vcon_work(dev))
+ ;
+ pthread_testcancel();
+ }
+ pthread_exit(NULL);
+ return NULL;
+}
+
+
+static void handle_to_guest(struct virtqueue *vq)
+{
+ int ret;
+
+ vq = vq;
+ pthread_mutex_lock(&vcon->mutex);
+ ret = pthread_cond_signal(&vcon->cond);
+ pthread_mutex_unlock(&vcon->mutex);
+ if (ret)
+ report_it("could not signal I/O thread, error: %d", ret);
+}
+
+static int console_handle_activate(struct device *dev)
+{
+ if (vcon->thread)
+ return 0;
+ if (pthread_create(&vcon->thread, NULL, vcon_io_thread, dev))
+ err(1, "Creating clone for console failed");
+ return 0;
+}
+
+static struct termios oldterm;
+static void restore_oldterm(void)
+{
+ tcsetattr(vcon->fd, TCSANOW, &oldterm);
+}
+
+
+static void console_init(void)
+{
+ struct termios term;
+
+ if (!tcgetattr(vcon->fd, &oldterm)) {
+ term = oldterm;
+ /* disable signals like ^C*/
+ term.c_lflag &= ~ISIG;
+ /*non-canonical mode (dont wait for end of line etc.)*/
+ term.c_lflag &= ~ICANON;
+ /* disable echo. Its done in the guest */
+ term.c_lflag &= ~ECHO;
+ tcsetattr(vcon->fd, TCSANOW, &term);
+ /* restore old console on exit */
+ atexit(restore_oldterm);
+ }
+}
+
+
+void virtio_console_init(void)
+{
+ struct device *dev;
+
+ vcon = malloc(sizeof(*vcon));
+ if (!vcon)
+ err(1, "Error allocation console");
+
+ vcon->fd = open("/dev/tty", O_RDWR);
+ if (!vcon->fd)
+ err(1, "Failed to open console device /dev/tty");
+ dev = new_device(VIRTIO_ID_CONSOLE);
+ dev->priv = vcon;
+ dev->activate = console_handle_activate;
+
+ add_virtqueue(dev, VIRTQUEUE_NUM, handle_to_guest);
+ add_virtqueue(dev, VIRTQUEUE_NUM, handle_from_guest);
+
+ console_init();
+
+ /* prepare for I/O thread */
+ pthread_cond_init(&vcon->cond, NULL);
+ pthread_mutex_init(&vcon->mutex, NULL);
+}
+
diff -ruN empty/drivers/virtio_net.c kuli/drivers/virtio_net.c
--- empty/drivers/virtio_net.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/drivers/virtio_net.c 2008-06-03 14:25:17.000000000 +0200
@@ -0,0 +1,235 @@
+/*
+ * virtio, based on Rusty Russel's descriptor based approach
+ * Copyright IBM Corp. 2007,2008
+ * Authors: Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <string.h>
+#include <pthread.h>
+#include <errno.h>
+#include <err.h>
+#include <fcntl.h>
+#include <netinet/in.h>
+#include <net/if.h>
+#include <linux/sockios.h>
+#include <unistd.h>
+#include <linux/if_tun.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+
+#include <kuli.h>
+#include <list.h>
+#include <virtio.h>
+
+#include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_net.h>
+
+static struct list vnet_list = EMPTY_LIST(vnet_list);
+
+struct vnet_info
+{
+ struct list head;
+
+ char *ifname;
+ int fd;
+
+ /* service thread */
+ pthread_t thread;
+ pthread_cond_t cond;
+ pthread_mutex_t mutex;
+};
+
+static void handle_net_output(struct virtqueue *vq)
+{
+ unsigned int head, out, in;
+ int len;
+ struct iovec iov[vq->vring.num];
+ struct vnet_info *vnet = vq->dev->priv;
+
+ while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
+ if (in)
+ errx(1, "Input buffers in output queue?");
+ (void)convert(&iov[0], struct virtio_net_hdr);
+retry:
+ len = writev(vnet->fd, iov+1, out-1);
+ if (len == -1 && errno == EINTR)
+ goto retry;
+ add_used_and_trigger(vq, head, len);
+ }
+}
+
+
+
+/* This is where we handle a packet coming in from the tun device to our
+ * Guest. */
+static int work_tun(struct device *dev)
+{
+ unsigned int head, in_num, out_num;
+ int len;
+ struct iovec iov[dev->vq->vring.num];
+ struct virtio_net_hdr *hdr;
+ struct vnet_info *vnet = dev->priv;
+
+ head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
+ if (head == dev->vq->vring.num)
+ return 0;
+ else if (out_num)
+ errx(1, "Output buffers in network recv queue?");
+
+ hdr = convert(&iov[0], struct virtio_net_hdr);
+ hdr->flags = 0;
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+retry:
+ len = readv(vnet->fd, iov+1, in_num-1);
+ if (len <= 0) {
+ if (errno==EINTR)
+ goto retry;
+ err(1, "reading network");
+ }
+
+ add_used_and_trigger(dev->vq, head, sizeof(*hdr) + len);
+
+ /* All good. */
+ return 1;
+}
+
+/* This is the thread which actually services the I/O. */
+static void *net_io_thread(void *_dev)
+{
+ struct device *dev = _dev;
+ sigset_t sigs;
+ struct vnet_info *vnet = dev->priv;
+
+ //block signals: SIGINT, SIGUSR1
+ sigemptyset (&sigs);
+ sigaddset (&sigs, SIGINT);
+ sigaddset (&sigs, SIGUSR1);
+ sigprocmask (SIG_BLOCK, &sigs, NULL);
+
+ while (!glo_stopcpus) {
+ pthread_mutex_lock(&vnet->mutex);
+ if (dev->vq->vring.avail->idx == dev->vq->last_avail_idx)
+ pthread_cond_wait(&vnet->cond, &vnet->mutex);
+ pthread_mutex_unlock(&vnet->mutex);
+ while (work_tun(dev))
+ ;
+ pthread_testcancel();
+ }
+ pthread_exit(NULL);
+ return NULL;
+}
+
+static int net_handle_deactivate(struct device *dev)
+{
+ struct vnet_info *vnet = dev->priv;
+
+ pthread_cancel(vnet->thread);
+ pthread_join(vnet->thread, NULL);
+ vnet->thread = 0;
+ pthread_cond_init(&vnet->cond, NULL);
+ pthread_mutex_init(&vnet->mutex, NULL);
+ return 0;
+}
+
+static int net_handle_activate(struct device *dev)
+{
+ struct vnet_info *vnet = dev->priv;
+
+ if (vnet->thread)
+ return 0;
+ if (pthread_create(&vnet->thread, NULL, net_io_thread, dev))
+ err(1, "Creating clone for net");
+ return 0;
+}
+
+static void handle_from_host(struct virtqueue *vq)
+{
+ struct vnet_info *vnet = vq->dev->priv;
+ int ret;
+
+ pthread_mutex_lock(&vnet->mutex);
+ ret = pthread_cond_signal(&vnet->cond);
+ pthread_mutex_unlock(&vnet->mutex);
+ if (ret)
+ report_it("could not signal I/O thread, error: %d", ret);
+
+}
+
+/*
+ * tries to get a tap device via /dev/net/tun
+ */
+static void create_tap_device(struct vnet_info *vnet)
+{
+ struct ifreq ifr;
+
+ vnet->fd = open("/dev/net/tun", O_RDWR);
+ if (!vnet->fd)
+ err(1, "Failed to open tun device %s", vnet->ifname);
+ memset(&ifr, 0, sizeof(ifr));
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+ strcpy(ifr.ifr_name, vnet->ifname);
+ if (ioctl(vnet->fd, TUNSETIFF, &ifr) != 0)
+ err(1, "configuring /dev/net/tun");
+ ioctl(vnet->fd, TUNSETNOCSUM, 1);
+}
+
+/*
+ * Creates a virtual network device bases on a open file desriptor
+ */
+static void setup_net_common(struct vnet_info *vnet)
+{
+ struct device *dev;
+
+ dev = new_device(VIRTIO_ID_NET);
+
+ add_virtqueue(dev, VIRTQUEUE_NUM, handle_from_host);
+ add_virtqueue(dev, VIRTQUEUE_NUM, handle_net_output);
+
+ dev->priv = vnet;
+
+ pthread_cond_init(&vnet->cond, NULL);
+ pthread_mutex_init(&vnet->mutex, NULL);
+
+ dev->deactivate = net_handle_deactivate;
+ dev->activate = net_handle_activate;
+}
+
+static void virtio_net_setup(struct vnet_info *vnet)
+{
+ log("Adding networking device to %s", vnet->ifname);
+ create_tap_device(vnet);
+ setup_net_common(vnet);
+}
+
+/* Add new device by name*/
+void virtio_net_add(char *parm)
+{
+ struct vnet_info *vnet;
+
+ char ifname[255];
+
+ if (sscanf(parm," tap,ifname = %254s ", ifname) != 1) {
+ printf("Error in %s. Use -net tap,ifname=tap<xx>\n", parm);
+ exit(1);
+ }
+
+ vnet = malloc(sizeof(*vnet));
+ vnet->ifname = malloc(strlen(ifname) + 1);
+ strcpy(vnet->ifname, ifname);
+ list_add_end(&vnet->head, &vnet_list);
+
+}
+
+/* initialize all devices */
+void virtio_net_init(void)
+{
+ struct vnet_info *vnet;
+
+ list_iterate(vnet, &vnet_list, head)
+ virtio_net_setup(vnet);
+}
+
diff -ruN empty/drivers/virtio_rng.c kuli/drivers/virtio_rng.c
--- empty/drivers/virtio_rng.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/drivers/virtio_rng.c 2008-05-19 11:17:04.000000000 +0200
@@ -0,0 +1,76 @@
+/*
+ * virtio, based on Rusty Russel's descriptor based approach
+ * Copyright IBM Corp. 2008
+ * Authors: Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+#include <err.h>
+#include <stdlib.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+
+#include <kuli.h>
+#include <virtio.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_rng.h>
+
+struct vrng_info
+{
+ int fd;
+};
+
+static void handle_rng_input(struct virtqueue *vq)
+{
+ int len;
+ unsigned int head, in_num, out_num;
+ struct iovec iov[vq->vring.num];
+ struct vrng_info *vrng = vq->dev->priv;
+
+ /* First we need a buffer from the Guests's virtqueue. */
+ head = get_vq_desc(vq, iov, &out_num, &in_num);
+
+ /* If they're not ready for input, stop listening to this file
+ * descriptor. We'll start again once they add an input buffer. */
+ if (head == vq->vring.num) {
+ return;
+ }
+
+ if (out_num)
+ errx(1, "Output buffers in rng?");
+
+ len = readv(vrng->fd, iov, in_num);
+ /* Tell the Guest about the new input. */
+ add_used_and_trigger(vq, head, len);
+
+ return;
+}
+
+static int rng_handle_activate(struct device *dev)
+{
+ dev = dev;
+ return 0;
+}
+
+
+void virtio_rng_init(void)
+{
+ struct device *dev;
+ struct vrng_info *vrng;
+
+ vrng = malloc(sizeof(*vrng));
+
+ vrng->fd = open("/dev/urandom", O_RDONLY);
+ if (!vrng->fd)
+ err(1, "Failed to open random device /dev/urandom");
+ dev = new_device(VIRTIO_ID_RNG);
+ dev->priv = vrng;
+ dev->activate = rng_handle_activate;
+
+ /* The device has one virtqueue, where the Guest places inbufs. */
+ add_virtqueue(dev, VIRTQUEUE_NUM, handle_rng_input);
+}
+
diff -ruN empty/include/kuli.h kuli/include/kuli.h
--- empty/include/kuli.h 1970-01-01 01:00:00.000000000 +0100
+++ kuli/include/kuli.h 2008-06-05 14:10:45.000000000 +0200
@@ -0,0 +1,108 @@
+/*
+ * kuli core function definitions
+ * Copyright IBM Corp. 2007,2008
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#ifndef __KULI_H
+#define __KULI_H
+
+#include <linux/kvm.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <vcpu.h>
+#include <list.h>
+
+
+typedef int (*intercept_handler_t)(struct vcpu *cpu);
+typedef int (*reset_handler_t)();
+
+#define glo_origin 0ul
+
+/* message macros */
+#define screen(f,a...) printf("%s: " f "\n" , __FUNCTION__ , ## a)
+#define log(f,a...) { \
+ fprintf(glo_logfile, "%s: " f "\n" , __FUNCTION__ , ## a); \
+}
+#define report_it(f,a...) {fprintf(stderr, "internal error>" f "< in function %s\nplease report to cotte@de.ibm.com\n", ## a, __FUNCTION__); exit(1);}
+
+/* init/bootloader.c */
+unsigned long load_ipl(void);
+
+/* init/dump.c */
+extern unsigned long glo_memsize;
+void create_dumpfile(void);
+
+/* init/kvm.c */
+extern int glo_kvm_vm_fd;
+void kvm_create_vm();
+void kvm_run(struct vcpu *cpu);
+void kvm_create_vcpu(struct vcpu* cpu);
+void kvm_s390_initial_reset(struct vcpu *cpu);
+void kvm_s390_interrupt(struct vcpu *cpu, struct kvm_s390_interrupt *kvmint);
+void kvm_s390_set_initial_psw(struct vcpu *cpu, psw_t psw);
+void kvm_s390_store_status(struct vcpu *cpu, unsigned long addr);
+void kvm_get_regs(struct vcpu *cpu, struct kvm_regs *regs);
+void kvm_set_regs(struct vcpu *cpu, struct kvm_regs *regs);
+
+/* init/message.c */
+extern FILE* glo_logfile;
+extern pthread_mutex_t glo_logfile_lock;
+void init_logging();
+void close_logging();
+
+/* init/options.c */
+extern char* glo_kernel;
+extern char* glo_bootfrom;
+extern char* glo_initrd;
+extern char* glo_parmfile;
+extern char* glo_append;
+extern char* glo_sysdump;
+void parse_options(int argc, char *argv[]);
+
+/* core/cpu.c */
+extern unsigned int glo_numcpu;
+extern int glo_stopcpus;
+
+/* core/diag.c */
+#define DIAG_KVM_HYPERCALL 0x500 /* kvm virtio hypercall */
+
+/* core/instruction.c */
+#define OPCODE_MAJOR_DIAG 0x83
+#define OPCODE_MAJOR_SIGP 0xae
+#define OPCODE_MAJOR_PRIV 0xb2
+#define PRIV_SCLP_CALL 0x20
+void init_instruction();
+int handle_instruction(struct vcpu *cpu);
+
+/* core/intercept.c */
+int enter_pgmcheck(struct vcpu *cpu, uint16_t code);
+int handle_intercept(struct vcpu *cpu);
+
+/* core/reset.c */
+struct reset_call_register {
+ struct list head;
+ reset_handler_t handler;
+};
+void register_reset_handler(struct reset_call_register *);
+int handle_reset(struct vcpu *cpu);
+
+/* core/sclp.c */
+int sclp_service_call(struct vcpu *cpu);
+
+/* devices */
+void init_devices(void);
+void virtio_init(void);
+void virtio_net_init(void);
+void virtio_block_init(void);
+void virtio_rng_init(void);
+void virtio_console_init(void);
+void virtio_net_add(char *parm);
+void virtio_block_add(char *parm);
+
+#endif
diff -ruN empty/include/list.h kuli/include/list.h
--- empty/include/list.h 1970-01-01 01:00:00.000000000 +0100
+++ kuli/include/list.h 2008-05-13 14:34:38.000000000 +0200
@@ -0,0 +1,70 @@
+/*
+ * kuli list implementation header file
+ * Copyright IBM Corp. 2007,2008
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#ifndef __LIST_H
+#define __LIST_H
+struct list {
+ struct list *next, *prev;
+};
+
+#define EMPTY_LIST(list) { &(list), &(list) }
+
+static inline void list_add(struct list *entry, struct list *head)
+{
+ entry->next = head->next;
+ entry->next->prev = entry;
+ head->next = entry;
+ entry->prev = head;
+}
+
+static inline void list_add_end(struct list *entry, struct list *head)
+{
+ entry->prev = head->prev;
+ entry->prev->next = entry;
+ head->prev = entry;
+ entry->next = head;
+}
+
+static inline void list_del(struct list *entry)
+{
+ entry->next->prev = entry->prev;
+ entry->prev->next = entry->next;
+ entry->next=entry;
+ entry->prev=entry;
+}
+
+static inline int list_is_empty(struct list *head)
+{
+ if ((head->next == head) && (head->prev == head))
+ return 1;
+ else
+ return 0;
+}
+
+static inline void list_init(struct list *head)
+{
+ head->next = head;
+ head->prev = head;
+}
+
+#define list_get(entry, type, member) \
+ ((type *)((char *)(entry)-(unsigned long)(&((type *)0)->member)))
+
+#define list_iterate(i, head, member) \
+ for (i = list_get((head)->next, typeof(*i), member); \
+ &i->member != (head); \
+ i = list_get(i->member.next, typeof(*i), member))
+
+#define list_iterate_safe(i, head, member, n) \
+ for (i = list_get((head)->next, typeof(*i), member), \
+ n = list_get(i->member.next, typeof(*i), member);\
+ &i->member != (head); \
+ i = n, \
+ n = list_get(n->member.next, typeof(*n), member))
+
+#endif
diff -ruN empty/include/vcpu.h kuli/include/vcpu.h
--- empty/include/vcpu.h 1970-01-01 01:00:00.000000000 +0100
+++ kuli/include/vcpu.h 2008-06-03 14:31:35.000000000 +0200
@@ -0,0 +1,45 @@
+/*
+ * guest cpu header file
+ * Copyright IBM Corp. 2007,2008
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#ifndef __VCPU_H
+#define __VCPU_H
+
+#include <pthread.h>
+#include <stdint.h>
+
+#include <linux/kvm.h>
+#include <asm/ptrace.h>
+
+#define CPU_STATUS_RUNNING 0
+#define CPU_STATUS_STOPPED 1
+#define CPU_STATUS_PANIC 2
+#define CPU_STATUS_RESTART 3
+
+struct vcpu {
+ int cpufd; /* vcpu file descriptor (local) */
+ struct kvm_run* run; /* kvm running cpu structure (local) */
+ unsigned int cpuno; /* this cpu number (local) */
+ pthread_mutex_t cpu_lock; /* lock */
+ int cpu_status; /* cpu status (lock) */
+ pthread_cond_t cpu_sleeping;/* sleeping condition for cpu (lock) */
+};
+
+static inline void setcc(struct vcpu *cpu, unsigned long cc)
+{
+ cpu->run->s390_sieic.mask &= ~(3ul << 44);
+ cpu->run->s390_sieic.mask |= (cc & 3) << 44;
+}
+
+int handle_sigp(struct vcpu *cpu);
+void wait_for_cpusdown();
+void launch_cpu_ipl(uint64_t address);
+void init_cpus();
+void cpu_restart(unsigned int cpunum);
+void stop_all_cpus(void);
+int cpu_store_status_address(int cpuno, uint64_t address);
+#endif
diff -ruN empty/include/virtio.h kuli/include/virtio.h
--- empty/include/virtio.h 1970-01-01 01:00:00.000000000 +0100
+++ kuli/include/virtio.h 2008-06-03 16:16:56.000000000 +0200
@@ -0,0 +1,93 @@
+/*
+ * virtio header file, based on lguests descriptor based approach
+ * Copyright IBM Corp. 2007,2008
+ * Author: Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#ifndef __VIRTIO_H
+#define __VIRTIO_H
+
+#include <err.h>
+#include <string.h>
+#include <asm/kvm_virtio.h>
+#include <linux/virtio_ring.h>
+#include <sys/uio.h>
+#include <kuli.h>
+#include <list.h>
+
+/* We can have up to 256 pages for devices. */
+#define DEVICE_PAGES 256
+/* This will occupy 2 pages: it must be a power of 2. */
+#define VIRTQUEUE_NUM 128
+
+/* The device structure describes a single device. */
+struct device
+{
+ struct list head;
+
+ /* The this device's descriptor, as mapped into the Guest. */
+ struct kvm_device_desc *desc;
+
+ /* Any queues attached to this device */
+ struct virtqueue *vq;
+
+ /* Device-specific data. */
+ void *priv;
+
+ /* Callbacks for activation/deactivation/reset */
+ int (*deactivate)(struct device *device);
+ int (*activate)(struct device *device);
+};
+
+/* The virtqueue structure describes a queue attached to a device. */
+struct virtqueue
+{
+ struct virtqueue *next;
+
+ /* Which device owns me. */
+ struct device *dev;
+
+ /* The configuration for this queue. */
+ struct kvm_vqconfig *config;
+
+ /* The actual ring of buffers. */
+ struct vring vring;
+
+ /* Last available index we saw. */
+ __u16 last_avail_idx;
+
+ /* The routine to call when the Guest pings us. */
+ void (*handle_output)(struct virtqueue *me);
+};
+
+#define wmb() asm volatile("" ::: "memory");
+
+static inline void *_convert(struct iovec *iov, size_t size, size_t align,
+ const char *name)
+{
+ if (iov->iov_len != size)
+ errx(1, "Bad iovec size %zu for %s", iov->iov_len, name);
+ if ((unsigned long)iov->iov_base % align != 0)
+ errx(1, "Bad alignment %p for %s", iov->iov_base, name);
+ return iov->iov_base;
+}
+#define convert(iov, type) \
+ ((type *)_convert((iov), sizeof(type), __alignof__(type), #type))
+
+unsigned get_vq_desc(struct virtqueue *vq, struct iovec iov[],
+ unsigned int *out_num, unsigned int *in_num);
+void add_used(struct virtqueue *vq, unsigned int head, int len);
+void trigger_irq(struct virtqueue *vq);
+void add_used_and_trigger(struct virtqueue *vq, unsigned int head, int len);
+void *get_pages(unsigned int num);
+struct kvm_device_desc *new_dev_desc(__u16 type);
+void add_feature(struct device *dev, unsigned bit);
+void set_config(struct device *dev, unsigned len, const void *conf);
+void add_virtqueue(struct device *dev, unsigned int num_descs,
+ void (*handle_output)(struct virtqueue *me));
+struct device *new_device(__u16 type);
+int handle_kvm_hypercall(struct vcpu *cpu);
+#endif
+
diff -ruN empty/init/bootloader.c kuli/init/bootloader.c
--- empty/init/bootloader.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/init/bootloader.c 2008-06-03 16:12:34.000000000 +0200
@@ -0,0 +1,289 @@
+#define _LARGEFILE64_SOURCE
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <kuli.h>
+
+/*
+ * for loading from files
+ */
+#define KERN_IMAGE_START 0x010000UL
+#define KERN_PARM_AREA 0x010480UL
+#define INITRD_START 0x800000UL
+#define INITRD_PARM_START 0x010408UL
+#define INITRD_PARM_SIZE 0x010410UL
+#define PARMFILE_START 0x001000UL
+
+static int fd;
+
+struct scsi_blockptr {
+ uint64_t blockno;
+ uint16_t size;
+ uint16_t blockct;
+ uint8_t reserved[4];
+} __attribute__ ((packed));
+
+enum {
+ component_execute = 0x01,
+ component_load = 0x02
+} component_entry_type;
+
+struct component_entry {
+ struct scsi_blockptr data;
+ uint8_t pad[7];
+ uint8_t component_type;
+ union {
+ uint64_t load_address;
+ uint64_t load_psw;
+ } address;
+} __attribute((packed));
+
+enum {
+ component_header_ipl = 0x00,
+ component_header_dump = 0x01
+} component_header_type;
+
+struct component_header {
+ uint8_t magic[4];
+ uint8_t type;
+ uint8_t reserved[27];
+} __attribute((packed));
+
+#define PROGRAM_TABLE_BLOCK_SIZE 512
+#define PROGRAM_TABLE_ENTRIES PROGRAM_TABLE_BLOCK_SIZE / \
+ sizeof(struct scsi_blockptr)
+
+/* Pointer to a block on a disk with linear layout */
+struct program_table {
+ struct scsi_blockptr entries[PROGRAM_TABLE_ENTRIES];
+} __attribute__ ((packed));
+
+struct mbr {
+ char magic[4];
+ uint32_t version_id;
+ uint8_t reserved[8];
+ struct scsi_blockptr blockptr;
+} __attribute__ ((packed));
+
+static int read_from(void *buffer, off_t offset, size_t size)
+{
+ int ret;
+
+ ret = lseek64(fd, offset, SEEK_SET);
+ if (ret == -1)
+ return ret;
+ return read(fd, buffer, size);
+}
+
+static void check_magic(void *tmp)
+{
+ char *buf = tmp;
+
+ if (memcmp(buf, "zIPL", 4)) {
+ printf("Wrong magic\n");
+ exit(2);
+ }
+}
+
+/* FIXME?: zipl treats virtio as scsi, so it uses 512 byte for sector size */
+#define PHY_BLOCK_SIZE 512
+#define max_entries (PHY_BLOCK_SIZE / sizeof(struct scsi_blockptr))
+
+static uint64_t parse_segment_elements(struct scsi_blockptr *bprs,
+ uint64_t *address)
+{
+ unsigned d;
+ int len;
+
+ for (d = 0; d < max_entries - 1; d++) {
+ if (*address > glo_memsize) {
+ screen("bootmap points to illegal addresses");
+ exit(1);
+ }
+ if (bprs[d].blockno == 0)
+ return 0;
+ len = read_from((void *)(*address + glo_origin),
+ bprs[d].blockno * bprs[d].size,
+ bprs[d].size * (bprs[d].blockct + 1));
+ if (len != bprs[d].size * (bprs[d].blockct + 1))
+ report_it("Read was not completed");
+ *address += len;
+ }
+ return bprs[max_entries - 1].blockno;
+
+}
+
+static void parse_segment_table(uint64_t blockno, uint64_t address)
+{
+ struct scsi_blockptr bprs[max_entries + 1];
+
+ do {
+ read_from(bprs, blockno * 512, sizeof(bprs));
+ blockno = parse_segment_elements(bprs, &address);
+ } while (blockno);
+}
+
+static uint64_t parse_program(struct scsi_blockptr *blockptr)
+{
+ struct component_header header;
+ struct component_entry entry;
+ uint64_t offset = blockptr->blockno * 512;
+
+ read_from(&header, offset, sizeof(header));
+ check_magic(&header.magic);
+ switch (header.type) {
+ case component_header_ipl:
+ screen("found IPL record\n");
+ break;
+ case component_header_dump:
+ screen("found dump header");
+ exit(1);
+ default:
+ screen("Unknown header");
+ exit(1);
+ }
+
+ offset += sizeof(header);
+ read_from(&entry, offset, sizeof(entry));
+ while (entry.component_type == component_load) {
+ parse_segment_table(entry.data.blockno,
+ entry.address.load_address);
+ offset += sizeof(entry);
+ read_from(&entry, offset, sizeof(entry));
+ }
+ if (entry.component_type == component_execute)
+ return entry.address.load_address;
+ else {
+ screen("No ipl address found\n");
+ exit(1);
+ }
+}
+
+static uint64_t parse_program_table(uint64_t blockno)
+{
+ struct program_table ptb;
+ unsigned d;
+
+ /* entry 0, holds the magic, entry 1 the default */
+ read_from(&ptb, blockno * 512, sizeof(ptb));
+ check_magic(&ptb.entries[0]);
+ for (d = 1; d < PROGRAM_TABLE_ENTRIES; d++)
+ if (ptb.entries[d].blockno == 0)
+ break;
+ screen("Found %d entries in the boot table, starting default", d - 2);
+ /* entry 1 is default */
+ return parse_program(&ptb.entries[1]);
+ /* FIXME: menu and config file for choosing boot entry */
+}
+
+static uint64_t parse_mbr(void)
+{
+ struct mbr mbr;
+
+ log("Reading MBR of disk");
+ read_from(&mbr, 0, sizeof(mbr));
+ check_magic(&mbr.magic);
+ return parse_program_table(mbr.blockptr.blockno);
+}
+
+/*
+ * looks at the program tables written by the boot loader to load
+ * everything which is specified in the bootmap
+ */
+static unsigned long load_from_disk(char *filename)
+{
+ uint64_t address;
+
+ fd = open(filename, O_RDONLY);
+ address = parse_mbr();
+ close(fd);
+ return address & 0x7fffffff;
+}
+
+static unsigned long load_file(char* name, unsigned long location,
+ off_t file_offset)
+{
+ int fd;
+ ssize_t readct;
+ unsigned long length = 0;
+
+ fd = open (name, O_RDONLY);
+ if (fd<0)
+ report_it("cannot open file %s", name);
+
+ if (file_offset != lseek (fd, file_offset, SEEK_SET))
+ report_it("file error %s", name);
+
+ while ((readct = read(fd, (void*)(location+length), 4096)) > 0) {
+ length += readct;
+ }
+ if (readct<0)
+ report_it("read error %s", name);;
+ close(fd);
+ return length;
+}
+
+/*
+ * load_ipl loads everything from files
+ */
+static unsigned long load_from_files(void)
+{
+ unsigned long length;
+ unsigned long *rd_start = (unsigned long *)INITRD_PARM_START;
+ unsigned long *rd_size = (unsigned long *)INITRD_PARM_SIZE;
+ unsigned long kernel_size;
+
+ kernel_size = load_file(glo_kernel, KERN_IMAGE_START + glo_origin,
+ KERN_IMAGE_START);
+ if (glo_initrd) {
+ *rd_start = INITRD_START;
+ /* increase initrd address if kernel + 1M would overlap */
+ while (kernel_size + 0x100000 > *rd_start)
+ *rd_start += 0x100000;
+ log("loading ramdisk file %s at %lX", glo_initrd, *rd_start);
+ *rd_size = load_file(glo_initrd, *rd_start + glo_origin, 0);
+ }
+ if (glo_parmfile) {
+ log ("loading kernel parameter file %s",glo_parmfile);
+ length = load_file(glo_parmfile, PARMFILE_START + glo_origin,
+ 0);
+ strncpy ((void*)KERN_PARM_AREA + glo_origin,
+ (void*)PARMFILE_START + glo_origin, length);
+ } else {
+ if (!glo_append)
+ report_it ("we should have kernel parameters at this " \
+ "point");
+ log ("using kernel parameters %s",glo_append);
+ strcpy ((void*)KERN_PARM_AREA + glo_origin, glo_append);
+ }
+ return 0x10000;
+}
+
+/*
+ * depending on the command line this loads kernel, parameters, initrd
+ * from file or disk. The disk must be prepared with the zipl boot loader
+ * program.
+ * Returns the IPL address.
+ */
+unsigned long load_ipl(void)
+{
+ if ((!glo_kernel && !glo_bootfrom) ||
+ (glo_kernel && glo_bootfrom)) {
+ screen("either -kernel or -bootfrom required, both together"
+ " are not allowed");
+ exit(1);
+ }
+ if (glo_kernel) {
+ log("Booting from kernel image file \"%s\"", glo_kernel);
+ return load_from_files() & 0x7fffffff;
+ } else {
+ log("Booting from disk \"%s\"", glo_bootfrom);
+ return load_from_disk(glo_bootfrom) & 0x7fffffff;
+ }
+}
diff -ruN empty/init/dump.c kuli/init/dump.c
--- empty/init/dump.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/init/dump.c 2008-06-02 15:56:04.000000000 +0200
@@ -0,0 +1,82 @@
+/*
+ * memory image related functions
+ * Copyright IBM Corp. 2007
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <linux/kvm.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <string.h>
+
+#include <kuli.h>
+
+unsigned long glo_memsize;
+
+static inline unsigned long long get_clock (void)
+{
+ unsigned long long clk;
+
+ __asm__("stck 0(%1)" : "=m" (clk) : "a" (&clk) : "cc");
+ return clk;
+}
+
+static inline void write_loop(void *data, size_t size, int fd)
+{
+ size_t written=0;
+ int rc;
+ while (written<size) {
+ rc = write (fd, data+written, size-written);
+ if (rc<0) {
+ perror ("cannot write to guest dump file");
+ exit (1);
+ }
+ written+=rc;
+ }
+}
+
+/*
+ * create dumpheader creates an lcrash compatible dump header
+ * this way we can actually debug suspended machines
+ */
+static void create_dumpheader(int fd)
+{
+ unsigned long header[512]={
+ 0xa8190173618f23fdUL, // dump magic
+ 0x0000000300001000UL, // version/header_size
+ 0x0000000400001000UL, // level/page size
+ glo_memsize, // 3: mem_size
+ 0, // 4: mem_start
+ glo_memsize, // 5: mem_end
+ (glo_memsize>>12)<<32, // 6: num_pages in first 4 byte
+ get_clock(), // 7: dump time
+ 0, // 8: cpuid
+ 0x0000000200000000UL, // arch=s390x,volume 0
+ 0x0000000200000000UL, // arch=s390x,volume 0
+ glo_memsize, // 11:real mem size
+ };
+ write_loop (header, getpagesize(), fd);
+}
+
+void create_dumpfile(void)
+{
+ int fd;
+ char *name = glo_sysdump;
+
+ fd = open (name, O_CREAT|O_TRUNC|O_RDWR, S_IRWXU);
+ if (fd < 0) {
+ log ("unable to create dump file %s, errno %d", name, errno);
+ return;
+ }
+ create_dumpheader (fd);
+ write_loop(glo_origin, glo_memsize, fd);
+ close(fd);
+}
diff -ruN empty/init/kvm.c kuli/init/kvm.c
--- empty/init/kvm.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/init/kvm.c 2008-06-02 15:56:04.000000000 +0200
@@ -0,0 +1,128 @@
+/*
+ * kvm user interface related callbacks
+ * Copyright IBM Corp. 2007
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <linux/kvm.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <kuli.h>
+
+int glo_kvm_vm_fd;
+
+void kvm_create_vm() {
+ int devfd, vmfd, iorc;
+ struct kvm_userspace_memory_region memreg;
+ void* addr;
+
+ devfd = open ("/dev/kvm", O_RDWR);
+ if (devfd<0) {
+ screen ("failed to open /dev/kvm in read+write mode");
+ exit(1);
+ }
+
+ vmfd = ioctl (devfd, KVM_CREATE_VM, NULL);
+ if (vmfd<0) {
+ screen ("KVM_CREATE_VM ioctl failed with rc %d", vmfd);
+ close(devfd);
+ exit(1);
+ }
+ glo_kvm_vm_fd = vmfd;
+ close (devfd);
+
+ //map guest memory
+ addr = mmap((void *) glo_origin, glo_memsize,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_FIXED | MAP_SHARED | MAP_ANONYMOUS, -1 , 0);
+ if (addr == MAP_FAILED) {
+ screen ("cannot map guest memory");
+ exit (1);
+ }
+ memreg.slot = 0;
+ memreg.flags = 0;
+ memreg.guest_phys_addr = 0;
+ memreg.userspace_addr = glo_origin;
+ memreg.memory_size = glo_memsize;
+ iorc = ioctl(glo_kvm_vm_fd, KVM_SET_USER_MEMORY_REGION, &memreg);
+ if (iorc < 0) {
+ screen("KVM_SET_USER_MEMORY_REGION failed");
+ exit (1);
+ }
+}
+
+void kvm_run(struct vcpu *cpu)
+{
+ int rc;
+
+again:
+ rc = ioctl(cpu->cpufd, KVM_RUN, NULL);
+ if ((rc == -1) && (errno == EINTR) && (!glo_stopcpus))
+ goto again;
+ if (rc != 0)
+ report_it("sie: rc %d errno is %d", rc, errno);
+}
+
+void kvm_create_vcpu(struct vcpu* cpu)
+{
+ int rc;
+
+ rc = ioctl(glo_kvm_vm_fd, KVM_CREATE_VCPU, cpu->cpuno);
+ if (rc < 0) {
+ screen("KVM_CREATE_VCPU failed for virtual cpu %d with rc %d",
+ cpu->cpuno, rc);
+ exit(1);
+ }
+ cpu->cpufd = rc;
+}
+
+void kvm_s390_initial_reset(struct vcpu *cpu)
+{
+ if (ioctl(cpu->cpufd, KVM_S390_INITIAL_RESET, NULL) < 0)
+ report_it("KVM_S390_INITIAL_RESET failed for cpu %d with rc %d",
+ cpu->cpuno, errno);
+}
+
+void kvm_s390_interrupt(struct vcpu *cpu, struct kvm_s390_interrupt *kvmint)
+{
+ int rc;
+ if (cpu)
+ rc = ioctl(cpu->cpufd, KVM_S390_INTERRUPT, kvmint);
+ else
+ rc = ioctl(glo_kvm_vm_fd, KVM_S390_INTERRUPT, kvmint);
+
+ if (rc<0)
+ report_it("cannot inject interrupt");
+}
+
+void kvm_s390_set_initial_psw(struct vcpu *cpu, psw_t psw)
+{
+ if (ioctl(cpu->cpufd, KVM_S390_SET_INITIAL_PSW, &psw))
+ report_it ("cannot set ipl psw");
+}
+
+void kvm_s390_store_status(struct vcpu *cpu, unsigned long addr)
+{
+ if (ioctl(cpu->cpufd, KVM_S390_STORE_STATUS, addr))
+ report_it("cannot inject store status request rc %d", errno);
+}
+
+void kvm_get_regs(struct vcpu *cpu, struct kvm_regs *regs)
+{
+ if (ioctl(cpu->cpufd, KVM_GET_REGS, regs))
+ report_it("call to KVM_GET_REGS failed\n");
+}
+
+void kvm_set_regs(struct vcpu *cpu, struct kvm_regs *regs)
+{
+ if (ioctl(cpu->cpufd, KVM_SET_REGS, regs))
+ report_it("ioctl SET_REGS failed with %d", errno);
+}
diff -ruN empty/init/Makefile kuli/init/Makefile
--- empty/init/Makefile 1970-01-01 01:00:00.000000000 +0100
+++ kuli/init/Makefile 2008-06-02 12:32:27.000000000 +0200
@@ -0,0 +1,12 @@
+# init makefile
+# Copyright IBM Corp. 2007
+# Author: Carsten Otte <cotte@de.ibm.com>
+# This file is licensed under the terms of the GNU General Public License(GPL)
+
+%.o : %.c ; echo " Compiling " $<; $(CC) $(CFLAGS) -c $<
+
+OBJS := dump.o kvm.o message.o options.o bootloader.o
+
+all: $(OBJS)
+clean:
+ rm -f *.o
diff -ruN empty/init/message.c kuli/init/message.c
--- empty/init/message.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/init/message.c 2008-06-02 12:32:27.000000000 +0200
@@ -0,0 +1,32 @@
+/*
+ * message logging
+ * Copyright IBM Corp. 2007
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+
+#include <kuli.h>
+
+FILE* glo_logfile;
+
+void init_logging(void)
+{
+ glo_logfile = fopen ("kuli.log", "w");
+ if (glo_logfile == NULL) {
+ perror ("cannot open log file for writing");
+ exit (1);
+ }
+ log ("KULI logging initialized");
+}
+
+void close_logging(void)
+{
+ fclose (glo_logfile);
+ glo_logfile = NULL;
+}
diff -ruN empty/init/options.c kuli/init/options.c
--- empty/init/options.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/init/options.c 2008-06-02 11:29:08.000000000 +0200
@@ -0,0 +1,121 @@
+/*
+ * kuli command line options
+ * Copyright IBM Corp. 2007,2008
+ * Author: Carsten Otte <cotte@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <unistd.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <kuli.h>
+
+char* glo_kernel;
+char* glo_bootfrom;
+char* glo_initrd;
+char* glo_parmfile;
+char* glo_append;
+char* glo_sysdump;
+
+static struct option long_options[] = {
+ {"append", 1, 0, 'a'},
+ {"bootfrom", 1, 0, 'b'},
+ {"smp", 1, 0, 'c'},
+ {"drive", 1, 0, 'd'},
+ {"help", 0, 0, 'h'},
+ {"initrd", 1, 0, 'i'},
+ {"kernel", 1, 0, 'k'},
+ {"megs", 1, 0, 'm'},
+ {"net", 1, 0, 'n'},
+ {"parmfile", 1, 0, 'p'},
+ {"sysdump", 1, 0, 's'},
+ {0, 0, 0, 0}
+};
+
+static void help_and_exit(void) {
+ screen ("usage: kuli <OPTIONS>");
+ screen ("where <OPTIONS> are:");
+ screen ("-a or -append to set the kernel parameters");
+ screen ("-b or -bootfrom to set the disk image for booting");
+ screen ("-c or -smp to set the amount of cpus");
+ screen ("-d or -drive to set the disk images to be used");
+ screen (" Example: -drive file=/dev/mapper/guestroot");
+ screen ("-h or -help to see this message");
+ screen ("-i or -initrd to set the initrd image");
+ screen ("-k or -kernel to set the kernel image");
+ screen ("-m or -megs to set the guest memory in megs");
+ screen ("-n or -net to set the network connection");
+ screen (" Example: -net tap,ifname=tap2");
+ screen ("-p or -parmfile to set the kernel parmfile");
+ screen ("-s or -sysdump to set the system dump image");
+ exit(1);
+}
+
+static char* __fill_string(int c, char *string, char* optarg) {
+ if (string) {
+ screen ("Option -%c presented twice", c);
+ help_and_exit();
+ }
+ string = malloc(strlen(optarg)+1);
+ strcpy (string, optarg);
+ return string;
+}
+
+void parse_options(int argc, char *argv[])
+{
+ int c;
+
+ glo_memsize=256<<20;
+
+ while (1) {
+ int this_option_optind = optind ? optind : 1;
+ int option_index = 0;
+
+ c = getopt_long_only(argc, argv, "hc:s:", long_options, &option_index);
+
+ if (c == -1) // no more getopt processable options?
+ break;
+ switch (c) {
+ case 'h':
+ help_and_exit();
+ case 'k':
+ glo_kernel = __fill_string(c, glo_kernel, optarg);
+ break;
+ case 'b':
+ glo_bootfrom = __fill_string(c, glo_bootfrom, optarg);
+ break;
+ case 'i':
+ glo_initrd = __fill_string(c, glo_initrd, optarg);
+ break;
+ case 'p':
+ glo_parmfile = __fill_string(c, glo_parmfile, optarg);
+ break;
+ case 'a':
+ glo_append = __fill_string(c, glo_append, optarg);
+ break;
+ case 's':
+ glo_sysdump = __fill_string(c, glo_sysdump, optarg);
+ break;
+ case 'n':
+ virtio_net_add(optarg);
+ break;
+ case 'd':
+ virtio_block_add(optarg);
+ break;
+ case 'm':
+ glo_memsize = strtoul(optarg, NULL, 10)<<20;
+ break;
+ case 'c':
+ glo_numcpu = strtoul(optarg, NULL, 10);
+ break;
+ default:
+ screen ("Option not understood: %s",argv[this_option_optind]);
+ help_and_exit();
+ }
+ }
+ if (optind != argc)
+ help_and_exit();
+}
diff -ruN empty/kuli.ld kuli/kuli.ld
--- empty/kuli.ld 1970-01-01 01:00:00.000000000 +0100
+++ kuli/kuli.ld 2008-06-02 11:23:33.000000000 +0200
@@ -0,0 +1,207 @@
+/* Script for -z combreloc: combine and sort reloc sections */
+OUTPUT_FORMAT("elf64-s390", "elf64-s390",
+ "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+ENTRY(_start)
+SEARCH_DIR("/usr/s390x-linux/lib"); SEARCH_DIR("/usr/local/lib64"); SEARCH_DIR("/lib64"); SEARCH_DIR("/usr/lib64"); SEARCH_DIR("/usr/lib"); SEARCH_DIR("/usr/local/lib"); SEARCH_DIR("/lib");
+/* Do we need any of these for elf?
+ __DYNAMIC = 0; */
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = 0x1ff00000000); . = 0x1ff00000000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .hash : { *(.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.dyn :
+ {
+ *(.rel.init)
+ *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*)
+ *(.rel.fini)
+ *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*)
+ *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*)
+ *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*)
+ *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*)
+ *(.rel.ctors)
+ *(.rel.dtors)
+ *(.rel.got)
+ *(.rel.sdata .rel.sdata.* .rel.gnu.linkonce.s.*)
+ *(.rel.sbss .rel.sbss.* .rel.gnu.linkonce.sb.*)
+ *(.rel.sdata2 .rel.sdata2.* .rel.gnu.linkonce.s2.*)
+ *(.rel.sbss2 .rel.sbss2.* .rel.gnu.linkonce.sb2.*)
+ *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*)
+ }
+ .rela.dyn :
+ {
+ *(.rela.init)
+ *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
+ *(.rela.fini)
+ *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
+ *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
+ *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
+ *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
+ *(.rela.ctors)
+ *(.rela.dtors)
+ *(.rela.got)
+ *(.rela.sdata .rela.sdata.* .rela.gnu.linkonce.s.*)
+ *(.rela.sbss .rela.sbss.* .rela.gnu.linkonce.sb.*)
+ *(.rela.sdata2 .rela.sdata2.* .rela.gnu.linkonce.s2.*)
+ *(.rela.sbss2 .rela.sbss2.* .rela.gnu.linkonce.sb2.*)
+ *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
+ }
+ .rel.plt : { *(.rel.plt) }
+ .rela.plt : { *(.rela.plt) }
+ .init :
+ {
+ KEEP (*(.init))
+ } =0x07070707
+ .plt : { *(.plt) }
+ .text :
+ {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ } =0x07070707
+ .fini :
+ {
+ KEEP (*(.fini))
+ } =0x07070707
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .sdata2 : { *(.sdata2 .sdata2.* .gnu.linkonce.s2.*) }
+ .sbss2 : { *(.sbss2 .sbss2.* .gnu.linkonce.sb2.*) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ /* Adjust the address for the data segment. We want to adjust up to
+ the same address within the page on the next page up. */
+ . = ALIGN(0x1000) + (. & (0x1000 - 1));
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(64 / 8);
+ PROVIDE (__preinit_array_start = .);
+ .preinit_array : { *(.preinit_array) }
+ PROVIDE (__preinit_array_end = .);
+ PROVIDE (__init_array_start = .);
+ .init_array : { *(.init_array) }
+ PROVIDE (__init_array_end = .);
+ PROVIDE (__fini_array_start = .);
+ .fini_array : { *(.fini_array) }
+ PROVIDE (__fini_array_end = .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ .eh_frame : { KEEP (*(.eh_frame)) }
+ .gcc_except_table : { *(.gcc_except_table) }
+ .dynamic : { *(.dynamic) }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin*.o(.ctors))
+ /* We don't want to include the .ctor section from
+ from the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend*.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin*.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend*.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .got : { *(.got.plt) *(.got) }
+ /* We want the small data sections together, so single-instruction offsets
+ can access them all, and initialized data all before uninitialized, so
+ we can shorten the on-disk segment size. */
+ .sdata :
+ {
+ *(.sdata .sdata.* .gnu.linkonce.s.*)
+ }
+ _edata = .;
+ PROVIDE (edata = .);
+ __bss_start = .;
+ .sbss :
+ {
+ PROVIDE (__sbss_start = .);
+ PROVIDE (___sbss_start = .);
+ *(.dynsbss)
+ *(.sbss .sbss.* .gnu.linkonce.sb.*)
+ *(.scommon)
+ PROVIDE (__sbss_end = .);
+ PROVIDE (___sbss_end = .);
+ }
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ PROVIDE (end = .);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /DISCARD/ : { *(.note.GNU-stack) }
+}
+
+
diff -ruN empty/main.c kuli/main.c
--- empty/main.c 1970-01-01 01:00:00.000000000 +0100
+++ kuli/main.c 2008-06-05 14:10:45.000000000 +0200
@@ -0,0 +1,74 @@
+/*
+ * kuli main component
+ * Copyright IBM Corp. 2008
+ * Authors: Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <cborntra@de.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License(GPL)
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <kuli.h>
+#include <vcpu.h>
+
+
+static void shutdown_req(int signal)
+{
+ if (signal != SIGINT)
+ report_it ("received illegal signal");
+
+ glo_stopcpus = 1;
+ stop_all_cpus();
+}
+
+static void dump_req(int signal)
+{
+ if (signal != SIGUSR1)
+ report_it ("received illegal signal");
+
+ if (!glo_sysdump)
+ glo_sysdump = "kuli.dump";
+ glo_stopcpus = 1;
+ stop_all_cpus();
+}
+
+static int do_ipl(void)
+{
+ uint64_t address;
+
+ log ("perparing for IPL");
+ kvm_create_vm();
+ init_cpus();
+ init_devices();
+
+ address = load_ipl();
+ log("IPL from address %zX", address);
+ launch_cpu_ipl(address);
+
+ return 0;
+}
+
+/*
+ * main()
+ */
+int main(int argc, char* argv[])
+{
+ parse_options(argc, argv);
+ init_logging();
+ do_ipl();
+ signal(SIGINT, shutdown_req);
+ signal(SIGUSR1, dump_req);
+ wait_for_cpusdown();
+ if (glo_sysdump) {
+ screen("Taking a dump...");
+ signal(SIGINT, SIG_DFL);
+ create_dumpfile();
+ screen("FIXME SLEEPING 10 SECONDS TO ALLOW RETRIEVAL OF DBF");
+ sleep(10);
+ }
+ close_logging();
+ return 0;
+}
diff -ruN empty/Makefile kuli/Makefile
--- empty/Makefile 1970-01-01 01:00:00.000000000 +0100
+++ kuli/Makefile 2008-06-05 14:10:45.000000000 +0200
@@ -0,0 +1,61 @@
+# main makefile
+# Copyright IBM Corp. 2007,2008
+# Authors: Carsten Otte <cotte@de.ibm.com>
+# Christian Borntraeger <cborntra@de.ibm.com>
+# This file is licensed under the terms of the GNU General Public License(GPL)
+
+%.o : %.c ; echo " Compiling " $<; $(CC) $(CFLAGS) -c $<
+
+CC := $(CROSS)gcc
+INCLUDE := -I$(PWD)/include -I$(KERNELDIR)/include
+CFLAGS := -Wall -Wno-missing-field-initializers -Wno-nonnull -W -D_GNU_SOURCE -m64 -fpic -ggdb $(INCLUDE)
+EXELDFLAGS := -Tkuli.ld -m64 -fpic -lpthread
+LDFLAGS := -m64
+ASFLAGS := -Wall -m64
+GCCVERSION = `gcc -dumpversion`
+GCCLIB = /usr/lib/gcc/s390x-redhat-linux/$(GCCVERSION)
+#GCCLIB = /usr/lib64/gcc/s390x-suse-linux/$(GCCVERSION)
+SUBDIRS = init core drivers
+
+MAKEFLAGS+=--quiet
+
+export CC CFLAGS LDFLAGS ASFLAGS
+
+all: kuli
+
+kuli: subdirs main.o
+ echo " Linking " $@
+ $(CC) $(EXELDFLAGS) -nostartfiles -o kuli /usr/lib64/crt1.o /usr/lib64/crti.o $(GCCLIB)/crtbeginS.o main.o init/*.o core/*.o drivers/*.o $(GCCLIB)/crtendS.o /usr/lib64/crtn.o
+
+.PHONY: clean all subdirs mrproper tags TAGS $(SUBDIRS)
+
+subdirs: $(SUBDIRS)
+
+$(SUBDIRS):
+ $(MAKE) -C $@ all
+
+clean:
+ for dir in $(SUBDIRS); do $(MAKE) -C $$dir clean ; done
+ rm -f main.o kuli kuli.map
+
+TAGS: *.c init/*.c core/*.c drivers/*.c include/*.h
+ etags $^
+
+tags: *.c init/*.c core/*.c drivers/*.c include/*.h
+ ctags $^
+
+mrproper: clean
+ find . -name "*~" |xargs -e rm -f
+ find . -name "#*#" |xargs -e rm -f
+ find . -name ".#*" |xargs -e rm -f
+ find . -name "*.orig" |xargs -e rm -f
+ find . -name "*.rej" |xargs -e rm -f
+ find . -name "*.s" |xargs -e rm -f
+ find . -name "*.i" |xargs -e rm -f
+ rm -f TAGS tags
+
+install: kuli
+ install kuli /usr/bin
+
+uninstall:
+ rm -rf /usr/bin/kuli
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [RFC] kvm-s390: userspace snapshot
2008-06-06 15:54 [RFC] kvm-s390: userspace snapshot Carsten Otte
@ 2008-06-10 5:55 ` Oliver Paukstadt
2008-06-11 14:35 ` Christian Borntraeger
0 siblings, 1 reply; 8+ messages in thread
From: Oliver Paukstadt @ 2008-06-10 5:55 UTC (permalink / raw)
To: Carsten Otte
Cc: kvm, Avi Kivity, aliguori, jblunck, ihno, rvdheij, rusty,
Christian Borntraeger
On Fri, 2008-06-06 at 17:54 +0200, Carsten Otte wrote:
> This patch is a full snapshot of "kuli", our current userspace for
> kvm.
> It is <3000 lines of code, and it contains a bootloader as well as
> virtio backeds for console, network, and block. It's command line
> syntax
> is same as the common kvm userspace, but not all options are
> supported.
> See --help for details.
> Our next step will be to get rid of the guest phys == user virt
> address
> mapping, so that we can have the guest memory anywhere on a megabyte
> boundary in userland. After that, we'd like to integrate this into the
> common kvm userspace, it is not intended for customer production use
> but
> is a good starting point to explore what kvm can look alike on s390.
>
> Have fun reading
I started playing around with the stuff.
I was able to get a running host kernel based on yesterdays kvm.git +
Christian's VIRTIO_BLK_F_BLK_SIZE patch + Frank's latest qeth fixes.
I decided to use the same kernel/initrd for my first guest system and
expected the kernel to run at least through initial setup code, but
there was an unexpected intercept:
init_logging: KULI logging initialized
do_ipl: perparing for IPL
get_pages: map guest memory at 0x10000000 (pfn: 65536)
get_pages: map guest memory at 0x10001000 (pfn: 65537)
get_pages: map guest memory at 0x10003000 (pfn: 65539)
get_pages: map guest memory at 0x10005000 (pfn: 65541)
load_ipl: Booting from kernel image file
"image-2.6.26-rc5kvm-20080609-01433-gdf4245d-dirty"
load_from_files: loading ramdisk file
initrd-2.6.26-rc5kvm-20080609-01433-gdf4245d-dirty at 800000
load_from_files: loading kernel parameter file parmfile
do_ipl: IPL from address 10000
launch_cpu_ipl: starting guest (ipl)
run_cpu: cpu 0: activated, running work...
handle_should_not_happen: PANIC:cpu 0 caught unexpected intercept. Magic
number: 0x20
Did I miss anything, like setting up virtual consoles, addressing mode
or is there another kernel option for guests I did not see?
Documentation is a little short, any hint appreciated ;-)
I did this test running the host system as guest in z/VM 5.3 on z900, is
this "supported"?
Regards,
Oliver
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC] kvm-s390: userspace snapshot
2008-06-10 5:55 ` Oliver Paukstadt
@ 2008-06-11 14:35 ` Christian Borntraeger
2008-06-11 20:53 ` Oliver Paukstadt
0 siblings, 1 reply; 8+ messages in thread
From: Christian Borntraeger @ 2008-06-11 14:35 UTC (permalink / raw)
To: Oliver Paukstadt
Cc: Carsten Otte, kvm, Avi Kivity, aliguori, jblunck, ihno, rvdheij,
rusty
Am Dienstag, 10. Juni 2008 schrieb Oliver Paukstadt:
> Did I miss anything, like setting up virtual consoles, addressing mode
> or is there another kernel option for guests I did not see?
It is not related to the problem you are seeing, but you will need
virtio_console kernel support before you get any console output.
I am currently at the KVM forum, but I plan to continue the following patch
set:
http://www.gossamer-threads.com/lists/linux/kernel/929047
This patch set should work nevertheless.
> Documentation is a little short, any hint appreciated ;-)
> I did this test running the host system as guest in z/VM 5.3 on z900, is
> this "supported"?
Hmm, I have no z900 to test on, currently we prototype on an z9. It used to
work under z/VM - I have to retest.
Can you run kuli with the -sysdump parameter and check
in /sys/kernel/debug/s390dbf/kvm-*/sprintf for
"unhandled validity intercept code %d"
and tell us about %d
while the dump is taken?
Christian
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC] kvm-s390: userspace snapshot
2008-06-11 14:35 ` Christian Borntraeger
@ 2008-06-11 20:53 ` Oliver Paukstadt
2008-06-11 22:14 ` Christian Borntraeger
0 siblings, 1 reply; 8+ messages in thread
From: Oliver Paukstadt @ 2008-06-11 20:53 UTC (permalink / raw)
To: Christian Borntraeger; +Cc: Carsten Otte, kvm
On Wed, 2008-06-11 at 16:35 +0200, Christian Borntraeger wrote:
> Can you run kuli with the -sysdump parameter and check
> in /sys/kernel/debug/s390dbf/kvm-*/sprintf for
>
> "unhandled validity intercept code %d"
> and tell us about %d
> while the dump is taken?
00 01213216145:212946 3 - 03 000003e0004e9a36 vm created
00 01213216145:213593 3 - 00 000003e0004e90d8 create cpu 0 at
000000001ca46400, sie block at 000000001faf9000
00 01213216145:237824 2 - 00 000003e0004e9dcc
00[0000000180000000-0000000000010000]: unhandled validity intercept
code 61
Regards,
Oliver Paukstadt
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC] kvm-s390: userspace snapshot
2008-06-11 20:53 ` Oliver Paukstadt
@ 2008-06-11 22:14 ` Christian Borntraeger
2008-06-12 5:39 ` Oliver Paukstadt
0 siblings, 1 reply; 8+ messages in thread
From: Christian Borntraeger @ 2008-06-11 22:14 UTC (permalink / raw)
To: Oliver Paukstadt; +Cc: Carsten Otte, kvm
Am Mittwoch, 11. Juni 2008 schrieb Oliver Paukstadt:
> 00 01213216145:212946 3 - 03 000003e0004e9a36 vm created
> 00 01213216145:213593 3 - 00 000003e0004e90d8 create cpu 0 at
> 000000001ca46400, sie block at 000000001faf9000
> 00 01213216145:237824 2 - 00 000003e0004e9dcc
> 00[0000000180000000-0000000000010000]: unhandled validity intercept
> code 61
Ok, I got an idea.
Does that patch fix the handle_should_not_happen PANIC?
---
arch/s390/kvm/kvm-s390.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
Index: kvm/arch/s390/kvm/kvm-s390.c
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -250,10 +250,13 @@ static void kvm_s390_vcpu_initial_reset(
vcpu->arch.sie_block->gbea = 1;
}
+/* some random value for testing */
+#define VIRTIODESCSPACE (100ul*1024ul*1024ul)
+
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
- vcpu->arch.sie_block->gmslm = 0xffffffffffUL;
+ vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize + VIRTIODESCSPACE -1;
vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
vcpu->arch.sie_block->ecb = 2;
vcpu->arch.sie_block->eca = 0xC1002001U;
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [RFC] kvm-s390: userspace snapshot
2008-06-11 22:14 ` Christian Borntraeger
@ 2008-06-12 5:39 ` Oliver Paukstadt
2008-06-12 14:14 ` Christian Borntraeger
0 siblings, 1 reply; 8+ messages in thread
From: Oliver Paukstadt @ 2008-06-12 5:39 UTC (permalink / raw)
To: Christian Borntraeger; +Cc: Carsten Otte, kvm, Martin Schwidefsky
On Thu, 2008-06-12 at 00:14 +0200, Christian Borntraeger wrote:
> Ok, I got an idea.
> Does that patch fix the handle_should_not_happen PANIC?
>
Patch does not fit, because my code contains
vcpu->arch.sie_block->gmsor = 0x000000000000;
so I changed this before I applied the patch.
The console patch you mentioned was applied too.
Now I am able to get the kernel running a little further:
[...]
PID hash table entries: 256 (order: 8, 2048 bytes)
console [hvc0] enabled
sclp vt220 tty driver: could not register vt220 - sclp_register returned
-5
list_del corruption. prev->next should be 00000000003d72a8, but was
0000000000000000
------------[ cut here ]------------
kernel BUG at lib/list_debug.c:67!
illegal operation: 0001 [#1] SMP
Modules linked in:
CPU: 0 Not tainted 2.6.26-rc5-guest-20080609-01433-gdf4245d-dirty #2
Process swapper (pid: 0, task: 00000000003ada00, ksp: 00000000003e8000)
Krnl PSW : 0400000180000000 0000000000198c64 (list_del+0x50/0xb8)
R:0 T:1 IO:0 EX:0 Key:0 M:0 W:0 P:0 AS:0 CC:0 PM:0 EA:3
Krnl GPRS: 0000000000000479 00000000003b2c28 0000000000000058
0000000000000001
0000000000041246 0000000000000000 00000000003e8594
0000000000414000
00000000003a2000 0000000000005000 00000000003d72a8
0000000000454418
00000000003d72a8 00000000002bd820 0000000000198c60
00000000003e7dd8
Krnl Code: 0000000000198c54: e34040000004 lg %r4,0(%r4)
0000000000198c5a: c0e5fff542cf brasl %r14,411f8
0000000000198c60: a7f40001 brc 15,198c62
>0000000000198c64: e310c0000004 lg %r1,0(%r12)
0000000000198c6a: b904003c lgr %r3,%r12
0000000000198c6e: c020000be1b9 larl %r2,314fe0
0000000000198c74: e3c010080020 cg %r12,8(%r1)
0000000000198c7a: a784000a brc 8,198c8e
Call Trace:
([<0000000000198c60>] list_del+0x4c/0xb8)
[<00000000001e9a72>] sclp_unregister+0x3a/0x5c
[<0000000000401410>] __sclp_vt220_cleanup+0x98/0xb4
[<0000000000401594>] __sclp_vt220_init+0x168/0x17c
[<00000000004016e8>] sclp_vt220_con_init+0x3c/0x60
[<00000000003fddd0>] console_init+0x48/0x60
[<00000000003e8bd0>] start_kernel+0x37c/0x4c4
[<0000000000012020>] _ehead+0x20/0x80
Last Breaking-Event-Address:
[<0000000000000000>] 0x0
<4>---[ end trace 31fd0ba7d8756001 ]---
Kernel panic - not syncing: Attempted to kill the idle task!
Looks like the sclp_vt220 stuff has a problem to unregister an
unitialized nonexistant console.
kuli.log reports some unknown diags and instructions, but I guess this
is the runtime feature detection:
launch_cpu_ipl: starting guest (ipl)
run_cpu: cpu 0: activated, running work...
handle_diag: cpu 0: unknown diagnose 9c at addr 3f136c, sending prog 1
enter_pgmcheck: cpu: 0: sending program check 1
handle_diag: cpu 0: unknown diagnose 260 at addr 3f1048, sending prog 1
enter_pgmcheck: cpu: 0: sending program check 1
handle_priv: cpu 0: unknown privileged instruction b216 at addr
400200180000000, sending prog 1
enter_pgmcheck: cpu: 0: sending program check 1
sclp_service_call: cpu 0: unknown sclp service call 0x780005, sccb
0x452000,addr 0x1e8780
sclp_service_call: cpu 0: unknown sclp service call 0x780005, sccb
0x452000,addr 0x1e8780
handle_waitpsw: cpu 0: entered disabled wait PSW at 1f312
Regards,
Oliver Paukstadt
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [RFC] kvm-s390: userspace snapshot
2008-06-12 5:39 ` Oliver Paukstadt
@ 2008-06-12 14:14 ` Christian Borntraeger
2008-06-12 19:56 ` Oliver Paukstadt
0 siblings, 1 reply; 8+ messages in thread
From: Christian Borntraeger @ 2008-06-12 14:14 UTC (permalink / raw)
To: Oliver Paukstadt; +Cc: Carsten Otte, kvm, Martin Schwidefsky
Am Donnerstag, 12. Juni 2008 schrieb Oliver Paukstadt:
> On Thu, 2008-06-12 at 00:14 +0200, Christian Borntraeger wrote:
>
> > Ok, I got an idea.
> > Does that patch fix the handle_should_not_happen PANIC?
> >
> Patch does not fit, because my code contains
> vcpu->arch.sie_block->gmsor = 0x000000000000;
> so I changed this before I applied the patch.
> The console patch you mentioned was applied too.
>
> Now I am able to get the kernel running a little further:
good. I will make this patch proper and send it to Avi.
> PID hash table entries: 256 (order: 8, 2048 bytes)
> console [hvc0] enabled
> sclp vt220 tty driver: could not register vt220 - sclp_register returned
> -5
> list_del corruption. prev->next should be 00000000003d72a8, but was
Yes, Carsten ran into that as well, when we changed from vt220 to
virtio_console. Looks like the vt220 driver doesnt like it, when there is no
sclp available.
A fix is upstream in Linus git since yesterday:
commit 7b439d25300dc59bba76b53eb344bb9e5a1133f2
Author: Carsten Otte <cotte@de.ibm.com>
Date: Tue Jun 10 10:03:22 2008 +0200
[S390] vt220 console, initialize list head before use
[...]
diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c
index 62576af..3e577f6 100644
--- a/drivers/s390/char/sclp_vt220.c
+++ b/drivers/s390/char/sclp_vt220.c
@@ -773,6 +773,7 @@ sclp_vt220_con_init(void)
{
int rc;
+ INIT_LIST_HEAD(&sclp_vt220_register.list);
if (!CONSOLE_IS_SCLP)
return 0;
rc = __sclp_vt220_init();
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [RFC] kvm-s390: userspace snapshot
2008-06-12 14:14 ` Christian Borntraeger
@ 2008-06-12 19:56 ` Oliver Paukstadt
0 siblings, 0 replies; 8+ messages in thread
From: Oliver Paukstadt @ 2008-06-12 19:56 UTC (permalink / raw)
To: Christian Borntraeger; +Cc: Carsten Otte, kvm
On Thu, 2008-06-12 at 16:14 +0200, Christian Borntraeger wrote:
> Am Donnerstag, 12. Juni 2008 schrieb Oliver Paukstadt:
> > PID hash table entries: 256 (order: 8, 2048 bytes)
> > console [hvc0] enabled
> > sclp vt220 tty driver: could not register vt220 - sclp_register returned
> > -5
> > list_del corruption. prev->next should be 00000000003d72a8, but was
>
> Yes, Carsten ran into that as well, when we changed from vt220 to
> virtio_console. Looks like the vt220 driver doesnt like it, when there is no
> sclp available.
>
Thanks, looks like my new toy is running ;-)
Great job!
VM00 Name: KVMguest
VM00 Control Program: KVM/Linux
VM00 Adjustment: 1000
VM00 CPUs Total: 1
VM00 CPUs Configured: 1
VM00 CPUs Standby: 0
VM00 CPUs Reserved: 0
VM01 Name: KVM01
VM01 Control Program: z/VM 5.3.0
VM01 Adjustment: 285
VM01 CPUs Total: 4
VM01 CPUs Configured: 4
VM01 CPUs Standby: 0
VM01 CPUs Reserved: 0
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2008-06-12 19:56 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-06 15:54 [RFC] kvm-s390: userspace snapshot Carsten Otte
2008-06-10 5:55 ` Oliver Paukstadt
2008-06-11 14:35 ` Christian Borntraeger
2008-06-11 20:53 ` Oliver Paukstadt
2008-06-11 22:14 ` Christian Borntraeger
2008-06-12 5:39 ` Oliver Paukstadt
2008-06-12 14:14 ` Christian Borntraeger
2008-06-12 19:56 ` Oliver Paukstadt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox