[PATCH 1/8] kvm tools: Add initial SPAPR PPC64 architecture support

kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/8] kvm tools: Add initial SPAPR PPC64 architecture support
       [not found] <cover.1323143103.git.matt@ozlabs.org>
@ 2011-12-06  4:05 ` Matt Evans
  2011-12-06 18:03   ` Scott Wood
  2011-12-06  4:06 ` [PATCH 2/8] kvm tools: Generate SPAPR PPC64 guest device tree Matt Evans
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 14+ messages in thread
From: Matt Evans @ 2011-12-06  4:05 UTC (permalink / raw)
  To: kvm, kvm-ppc

This patch adds a new arch directory, powerpc, basic file structure, register
setup and where necessary stubs out arch-specific functions (e.g. interrupts,
runloop exits) that later patches will provide.  The target is an
SPAPR-compliant PPC64 machine (i.e. pSeries); there is no support for PPC32 or
'bare metal' PPC64 guests as yet.  Subsequent patches implement the hcalls and
RTAS required to boot SPAPR pSeries kernels.

Memory is mapped from hugetlbfs (as that is currently required by upstream PPC64
HV-mode KVM).  The mapping of a VRMA region is yet to be implemented; this is
only necessary on processors that don't support VRMA, e.g. <= P6.  Work is
therefore needed to get this going on pre-P7 CPUs.

Processor state is set up as a guest kernel would expect (both primary and
secondaries), and SMP is fully supported.

Finally, support is added for simply loading flat binary kernels (plus initrd).
(bzImages are not used on PPC, and this series does not add zImage support or an
ELF loader.)  The intention is to later support loading firmware such as SLOF.

Signed-off-by: Matt Evans <matt@ozlabs.org>
---
 tools/kvm/Makefile                           |   10 +
 tools/kvm/kvm.c                              |    3 +
 tools/kvm/powerpc/include/kvm/barrier.h      |    6 +
 tools/kvm/powerpc/include/kvm/kvm-arch.h     |   70 ++++++++
 tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h |   46 +++++
 tools/kvm/powerpc/ioport.c                   |   18 ++
 tools/kvm/powerpc/irq.c                      |   40 +++++
 tools/kvm/powerpc/kvm-cpu.c                  |  232 ++++++++++++++++++++++++++
 tools/kvm/powerpc/kvm.c                      |  231 +++++++++++++++++++++++++
 9 files changed, 656 insertions(+), 0 deletions(-)
 create mode 100644 tools/kvm/powerpc/include/kvm/barrier.h
 create mode 100644 tools/kvm/powerpc/include/kvm/kvm-arch.h
 create mode 100644 tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
 create mode 100644 tools/kvm/powerpc/ioport.c
 create mode 100644 tools/kvm/powerpc/irq.c
 create mode 100644 tools/kvm/powerpc/kvm-cpu.c
 create mode 100644 tools/kvm/powerpc/kvm.c

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 57dc521..58815a2 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -121,6 +121,16 @@ ifeq ($(ARCH),x86)
 	OTHEROBJS	+= x86/bios/bios-rom.o
 	ARCH_INCLUDE := x86/include
 endif
+# POWER/ppc:  Actually only support ppc64 currently.
+ifeq ($(uname_M), ppc64)
+	DEFINES += -DCONFIG_PPC
+	OBJS	+= powerpc/ioport.o
+	OBJS	+= powerpc/irq.o
+	OBJS	+= powerpc/kvm.o
+	OBJS	+= powerpc/kvm-cpu.o
+	ARCH_INCLUDE := powerpc/include
+	CFLAGS += -m64
+endif
 
 ###
 
diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c
index 503ceae..d716ede 100644
--- a/tools/kvm/kvm.c
+++ b/tools/kvm/kvm.c
@@ -49,6 +49,9 @@ const char *kvm_exit_reasons[] = {
 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_DCR),
 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_NMI),
 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR),
+#ifdef CONFIG_PPC64
+	DEFINE_KVM_EXIT_REASON(KVM_EXIT_PAPR_HCALL),
+#endif
 };
 
 extern struct kvm *kvm;
diff --git a/tools/kvm/powerpc/include/kvm/barrier.h b/tools/kvm/powerpc/include/kvm/barrier.h
new file mode 100644
index 0000000..bc7d179
--- /dev/null
+++ b/tools/kvm/powerpc/include/kvm/barrier.h
@@ -0,0 +1,6 @@
+#ifndef _KVM_BARRIER_H_
+#define _KVM_BARRIER_H_
+
+#include <asm/system.h>
+
+#endif /* _KVM_BARRIER_H_ */
diff --git a/tools/kvm/powerpc/include/kvm/kvm-arch.h b/tools/kvm/powerpc/include/kvm/kvm-arch.h
new file mode 100644
index 0000000..722d01c
--- /dev/null
+++ b/tools/kvm/powerpc/include/kvm/kvm-arch.h
@@ -0,0 +1,70 @@
+/*
+ * PPC64 architecture-specific definitions
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef KVM__KVM_ARCH_H
+#define KVM__KVM_ARCH_H
+
+#include <stdbool.h>
+#include <linux/types.h>
+#include <time.h>
+
+#define KVM_NR_CPUS			(255)
+
+/* MMIO lives after RAM, but it'd be nice if it didn't constantly move.
+ * Choose a suitably high address, e.g. 63T...  This limits RAM size.
+ */
+#define PPC_MMIO_START			0x3F0000000000UL
+#define PPC_MMIO_SIZE			0x010000000000UL
+
+#define KERNEL_LOAD_ADDR        	0x0000000000000000
+#define KERNEL_START_ADDR       	0x0000000000000000
+#define KERNEL_SECONDARY_START_ADDR     0x0000000000000060
+#define INITRD_LOAD_ADDR        	0x0000000002800000
+
+#define FDT_MAX_SIZE            	0x10000
+#define RTAS_MAX_SIZE           	0x10000
+
+#define TIMEBASE_FREQ           	512000000ULL
+
+#define KVM_MMIO_START			PPC_MMIO_START
+
+/* This is the address that pci_get_io_space_block() starts allocating
+ * from.  Note that this is a PCI bus address.
+ */
+#define KVM_PCI_MMIO_AREA		0x1000000
+
+struct kvm {
+	int			sys_fd;		/* For system ioctls(), i.e. /dev/kvm */
+	int			vm_fd;		/* For VM ioctls() */
+	timer_t			timerid;	/* Posix timer for interrupts */
+
+	int			nrcpus;		/* Number of cpus to run */
+
+	u32			mem_slots;	/* for KVM_SET_USER_MEMORY_REGION */
+
+	u64			ram_size;
+	void			*ram_start;
+
+	bool			nmi_disabled;
+
+	bool			single_step;
+
+	const char		*vmlinux;
+	struct disk_image       **disks;
+	int                     nr_disks;
+	unsigned long		rtas_gra;
+	unsigned long		rtas_size;
+	unsigned long		fdt_gra;
+	unsigned long		initrd_gra;
+	unsigned long		initrd_size;
+	const char		*name;
+};
+
+#endif /* KVM__KVM_ARCH_H */
diff --git a/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h b/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
new file mode 100644
index 0000000..dbabc57
--- /dev/null
+++ b/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
@@ -0,0 +1,46 @@
+/*
+ * PPC64 cpu-specific definitions
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef KVM__KVM_CPU_ARCH_H
+#define KVM__KVM_CPU_ARCH_H
+
+/* Architecture-specific kvm_cpu definitions. */
+
+#include <linux/kvm.h>	/* for struct kvm_regs */
+
+#include <pthread.h>
+
+struct kvm;
+
+struct kvm_cpu {
+	pthread_t		thread;		/* VCPU thread */
+
+	unsigned long		cpu_id;
+
+	struct kvm		*kvm;		/* parent KVM */
+	int			vcpu_fd;	/* For VCPU ioctls() */
+	struct kvm_run		*kvm_run;
+
+	struct kvm_regs		regs;
+	struct kvm_sregs	sregs;
+	struct kvm_fpu		fpu;
+
+	u8			is_running;
+	u8			paused;
+
+	/* Although PPC KVM doesn't yet support coalesced MMIO, generic code
+	 * needs this in our kvm_cpu:
+	 */
+	struct kvm_coalesced_mmio_ring  *ring;
+};
+
+void kvm_cpu__irq(struct kvm_cpu *vcpu, int pin, int level);
+
+#endif /* KVM__KVM_CPU_ARCH_H */
diff --git a/tools/kvm/powerpc/ioport.c b/tools/kvm/powerpc/ioport.c
new file mode 100644
index 0000000..a8e4dc3
--- /dev/null
+++ b/tools/kvm/powerpc/ioport.c
@@ -0,0 +1,18 @@
+/*
+ * PPC64 ioport platform setup.  There isn't any! :-)
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "kvm/ioport.h"
+
+#include <stdlib.h>
+
+void ioport__setup_arch(void)
+{
+	/* PPC has no legacy ioports to set up */
+}
diff --git a/tools/kvm/powerpc/irq.c b/tools/kvm/powerpc/irq.c
new file mode 100644
index 0000000..46aa64f
--- /dev/null
+++ b/tools/kvm/powerpc/irq.c
@@ -0,0 +1,40 @@
+/*
+ * PPC64 IRQ routines
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "kvm/irq.h"
+#include "kvm/kvm.h"
+#include "kvm/util.h"
+
+#include <linux/types.h>
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <linux/kvm.h>
+#include <sys/ioctl.h>
+
+#include <stddef.h>
+#include <stdlib.h>
+
+int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)
+{
+	fprintf(stderr, "irq__register_device(%d, [%d], [%d], [%d]\n",
+		dev, *num, *pin, *line);
+	return 0;
+}
+
+void irq__init(struct kvm *kvm)
+{
+	fprintf(stderr, __func__);
+}
+
+int irq__add_msix_route(struct kvm *kvm, struct msi_msg *msg)
+{
+	die(__FUNCTION__);
+	return 0;
+}
diff --git a/tools/kvm/powerpc/kvm-cpu.c b/tools/kvm/powerpc/kvm-cpu.c
new file mode 100644
index 0000000..79422ff
--- /dev/null
+++ b/tools/kvm/powerpc/kvm-cpu.c
@@ -0,0 +1,232 @@
+/*
+ * PPC64 processor support
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "kvm/kvm-cpu.h"
+
+#include "kvm/symbol.h"
+#include "kvm/util.h"
+#include "kvm/kvm.h"
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+
+static int debug_fd;
+
+#define MSR_SF		(1UL<<63)
+#define MSR_HV		(1UL<<60)
+#define MSR_VEC		(1UL<<25)
+#define MSR_VSX		(1UL<<23)
+#define MSR_POW		(1UL<<18)
+#define MSR_EE		(1UL<<15)
+#define MSR_PR		(1UL<<14)
+#define MSR_FP		(1UL<<13)
+#define MSR_ME		(1UL<<12)
+#define MSR_FE0		(1UL<<11)
+#define MSR_SE		(1UL<<10)
+#define MSR_BE		(1UL<<9)
+#define MSR_FE1		(1UL<<8)
+#define MSR_IR		(1UL<<5)
+#define MSR_DR		(1UL<<4)
+#define MSR_PMM		(1UL<<2)
+#define MSR_RI		(1UL<<1)
+#define MSR_LE		(1UL<<0)
+
+
+void kvm_cpu__set_debug_fd(int fd)
+{
+	debug_fd = fd;
+}
+
+int kvm_cpu__get_debug_fd(void)
+{
+	return debug_fd;
+}
+
+static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm)
+{
+	struct kvm_cpu *vcpu;
+
+	vcpu		= calloc(1, sizeof *vcpu);
+	if (!vcpu)
+		return NULL;
+
+	vcpu->kvm	= kvm;
+
+	return vcpu;
+}
+
+void kvm_cpu__delete(struct kvm_cpu *vcpu)
+{
+	free(vcpu);
+}
+
+struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id)
+{
+	struct kvm_cpu *vcpu;
+	int mmap_size;
+	struct kvm_enable_cap papr_cap = { .cap = KVM_CAP_PPC_PAPR };
+
+	vcpu		= kvm_cpu__new(kvm);
+	if (!vcpu)
+		return NULL;
+
+	vcpu->cpu_id	= cpu_id;
+
+	vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id);
+	if (vcpu->vcpu_fd < 0)
+		die_perror("KVM_CREATE_VCPU ioctl");
+
+	mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
+	if (mmap_size < 0)
+		die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
+
+	vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0);
+	if (vcpu->kvm_run == MAP_FAILED)
+		die("unable to mmap vcpu fd");
+
+	ioctl(vcpu->vcpu_fd, KVM_ENABLE_CAP, &papr_cap);
+
+	/* We start all CPUs, directing non-primary threads into the kernel's
+	 * secondary start point.  When we come to support SLOF, we will start
+	 * only one and SLOF will RTAS call us to ask for others to be
+	 * started.
+	 */
+	vcpu->is_running = true;
+
+	return vcpu;
+}
+
+static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu)
+{
+	/* Don't have to do anything, there's no expected FPU state. */
+}
+
+static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu)
+{
+	struct kvm_regs *r = &vcpu->regs;
+
+	if (vcpu->cpu_id == 0) {
+		r->pc = KERNEL_START_ADDR;
+		r->gpr[3] = vcpu->kvm->fdt_gra;
+		r->gpr[5] = 0;
+	} else {
+		r->pc = KERNEL_SECONDARY_START_ADDR;
+		r->gpr[3] = vcpu->cpu_id;
+	}
+	r->msr = 0x8000000000001000UL; /* 64bit, non-HV, ME */
+
+	if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0)
+		die_perror("KVM_SET_REGS failed");
+}
+
+static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu)
+{
+	/* There's actually no sregs setup required on PPC64/SPAPR. */
+}
+
+/**
+ * kvm_cpu__reset_vcpu - reset virtual CPU to a known state
+ */
+void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
+{
+	kvm_cpu__setup_regs(vcpu);
+	kvm_cpu__setup_sregs(vcpu);
+	kvm_cpu__setup_fpu(vcpu);
+}
+
+/* kvm_cpu__irq - set KVM's IRQ flag on this vcpu */
+void kvm_cpu__irq(struct kvm_cpu *vcpu, int pin, int level)
+{
+}
+
+bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu)
+{
+	bool ret = true;
+	struct kvm_run *run = vcpu->kvm_run;
+	switch(run->exit_reason) {
+	default:
+		ret = false;
+	}
+	return ret;
+}
+
+#define CONDSTR_BIT(m, b) (((m) & MSR_##b) ? #b" " : "")
+
+void kvm_cpu__show_registers(struct kvm_cpu *vcpu)
+{
+	struct kvm_regs regs;
+	struct kvm_sregs sregs;
+	int r;
+
+	if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &regs) < 0)
+		die("KVM_GET_REGS failed");
+        if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
+		die("KVM_GET_SREGS failed");
+
+	dprintf(debug_fd, "\n Registers:\n");
+	dprintf(debug_fd, " NIP:   %016lx  MSR:   %016lx ( %s%s%s%s%s%s%s%s%s%s%s%s)\n",
+		regs.pc, regs.msr,
+		CONDSTR_BIT(regs.msr, SF),
+		CONDSTR_BIT(regs.msr, HV), /* ! */
+		CONDSTR_BIT(regs.msr, VEC),
+		CONDSTR_BIT(regs.msr, VSX),
+		CONDSTR_BIT(regs.msr, EE),
+		CONDSTR_BIT(regs.msr, PR),
+		CONDSTR_BIT(regs.msr, FP),
+		CONDSTR_BIT(regs.msr, ME),
+		CONDSTR_BIT(regs.msr, IR),
+		CONDSTR_BIT(regs.msr, DR),
+		CONDSTR_BIT(regs.msr, RI),
+		CONDSTR_BIT(regs.msr, LE));
+	dprintf(debug_fd, " CTR:   %016lx  LR:    %016lx  CR:   %08lx\n",
+		regs.ctr, regs.lr, regs.cr);
+	dprintf(debug_fd, " SRR0:  %016lx  SRR1:  %016lx  XER:  %016lx\n",
+		regs.srr0, regs.srr1, regs.xer);
+	dprintf(debug_fd, " SPRG0: %016lx  SPRG1: %016lx\n", regs.sprg0, regs.sprg1);
+	dprintf(debug_fd, " SPRG2: %016lx  SPRG3: %016lx\n", regs.sprg2, regs.sprg3);
+	dprintf(debug_fd, " SPRG4: %016lx  SPRG5: %016lx\n", regs.sprg4, regs.sprg5);
+	dprintf(debug_fd, " SPRG6: %016lx  SPRG7: %016lx\n", regs.sprg6, regs.sprg7);
+	dprintf(debug_fd, " GPRs:\n ");
+	for (r = 0; r < 32; r++) {
+		dprintf(debug_fd, "%016lx  ", regs.gpr[r]);
+		if ((r & 3) == 3)
+			dprintf(debug_fd, "\n ");
+	}
+	dprintf(debug_fd, "\n");
+
+	for (r = 0; r < 32; r++) {
+		dprintf(debug_fd, " SLB%02d  %016lx %016lx\n", r,
+			sregs.u.s.ppc64.slb[r].slbe, sregs.u.s.ppc64.slb[r].slbv);
+	}
+	dprintf(debug_fd, "----------\n");
+}
+
+void kvm_cpu__show_code(struct kvm_cpu *vcpu)
+{
+	if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0)
+		die("KVM_GET_REGS failed");
+
+	/* To do: dump some code...! */
+
+	dprintf(debug_fd, "\n Stack:\n");
+	dprintf(debug_fd,   " ------\n");
+	/* Only works in real mode: */
+	kvm__dump_mem(vcpu->kvm, vcpu->regs.gpr[1], 32);
+}
+
+void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu)
+{
+	/* Does nothing yet */
+}
diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
new file mode 100644
index 0000000..036bfc0
--- /dev/null
+++ b/tools/kvm/powerpc/kvm.c
@@ -0,0 +1,231 @@
+/*
+ * PPC64 (SPAPR) platform support
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "kvm/kvm.h"
+#include "kvm/util.h"
+
+#include <linux/kvm.h>
+
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <asm/unistd.h>
+#include <errno.h>
+
+#include <libfdt.h>
+
+#define HUGETLBFS_PATH "/var/lib/hugetlbfs/global/pagesize-16MB/"
+
+static char kern_cmdline[2048];
+
+struct kvm_ext kvm_req_ext[] = {
+	{ 0, 0 }
+};
+
+bool kvm__arch_cpu_supports_vm(void)
+{
+	return true;
+}
+
+int kvm__recommended_cpus(struct kvm *kvm)
+{
+	/* FIXME, this is obviously fictional; does KVM/POWER have a 'get
+	 * reasonable number of CPUs' IOCTL like x86?  If not, perhaps suggest
+	 * number of present cores. */
+	return 4;
+}
+
+int kvm__max_cpus(struct kvm *kvm)
+{
+	return KVM_NR_CPUS;
+}
+
+void kvm__init_ram(struct kvm *kvm)
+{
+	u64	phys_start, phys_size;
+	void	*host_mem;
+
+	phys_start = 0;
+	phys_size  = kvm->ram_size;
+	host_mem   = kvm->ram_start;
+
+	/* We put MMIO at PPC_MMIO_START, high up.  Make sure that
+	 * this doesn't crash into the end of RAM -- practically, this is
+	 * so high (63TB!) that this won't happen.
+	 */
+	if (phys_size >= PPC_MMIO_START)
+		die("Too much memory (%ld, what a nice problem): overlaps MMIO!\n",
+		    phys_size);
+
+	kvm__register_mem(kvm, phys_start, phys_size, host_mem);
+}
+
+#define HUGETLBFS_MAGIC       0x958458f6
+
+static void kvm__map_ram(struct kvm *kvm, const char *path)
+{
+	char mpath[PATH_MAX];
+	int fd;
+	int r;
+	struct statfs sfs;
+	const char *htlbfs_path = path;
+
+	if (!path) {
+		htlbfs_path = HUGETLBFS_PATH;
+		pr_info("Using default %s for memory", htlbfs_path);
+	}
+
+	do {
+		/* QEMU seems to work around this returning EINTR...  Let's do
+		 * that too. */
+		r = statfs(htlbfs_path, &sfs);
+	} while (r && errno == EINTR);
+
+	if (r)
+		die("Can't stat %s\n", htlbfs_path);
+
+	if (sfs.f_type != HUGETLBFS_MAGIC) {
+		die("%s is not hugetlbfs!\n", htlbfs_path);
+	}
+
+	snprintf(mpath, PATH_MAX, "%s/kvmtoolXXXXXX", htlbfs_path);
+
+	if (sfs.f_bsize == 0 || (unsigned long)sfs.f_bsize > kvm->ram_size) {
+		die("Can't use hugetlbfs pagesize %ld for mem size %ld\n",
+		    sfs.f_bsize, kvm->ram_size);
+	}
+	fd = mkstemp(mpath);
+
+	if (fd < 0)
+		die("Can't open %s for hugetlbfs map\n", mpath);
+
+	unlink(mpath);
+
+	ftruncate(fd, kvm->ram_size);
+
+	/* What other flags?  Is it required that we prealloc? */
+	kvm->ram_start = mmap(NULL, kvm->ram_size, PROT_RW, MAP_PRIVATE, fd, 0);
+
+	if (kvm->ram_start == MAP_FAILED)
+		die("Couldn't map %ld bytes for RAM (%d)\n", kvm->ram_size, errno);
+}
+
+void kvm__arch_set_cmdline(char *cmdline, bool video)
+{
+	/* We don't need anything unusual in here. */
+}
+
+/* Architecture-specific KVM init */
+void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char *hugetlbfs_path, u64 ram_size, const char *name)
+{
+	int cap_ppc_rma;
+	kvm->ram_size		= ram_size;
+
+	kvm__map_ram(kvm, hugetlbfs_path);
+
+	/* FDT goes at top of memory, RTAS just below */
+	kvm->fdt_gra = kvm->ram_size - FDT_MAX_SIZE;
+	kvm->rtas_gra = kvm->fdt_gra - RTAS_MAX_SIZE;
+	madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE);
+
+	cap_ppc_rma = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_PPC_RMA);
+	if (cap_ppc_rma == 2)
+		die("Need contiguous RMA allocation on this hardware, which is not yet supported.");
+}
+
+void kvm__irq_line(struct kvm *kvm, int irq, int level)
+{
+	fprintf(stderr, "irq_line(%d, %d)\n", irq, level);
+}
+
+void kvm__irq_trigger(struct kvm *kvm, int irq)
+{
+	kvm__irq_line(kvm, irq, 1);
+	kvm__irq_line(kvm, irq, 0);
+}
+
+int load_flat_binary(struct kvm *kvm, int fd_kernel, int fd_initrd, const char *kernel_cmdline)
+{
+	void *p;
+	void *k_start;
+	void *i_start;
+	int nr;
+
+	if (lseek(fd_kernel, 0, SEEK_SET) < 0)
+		die_perror("lseek");
+
+	p = k_start = guest_flat_to_host(kvm, KERNEL_LOAD_ADDR);
+
+	while ((nr = read(fd_kernel, p, 65536)) > 0)
+		p += nr;
+
+	pr_info("Loaded kernel to 0x%x (%ld bytes)", KERNEL_LOAD_ADDR, p-k_start);
+
+	if (fd_initrd != -1) {
+		if (lseek(fd_initrd, 0, SEEK_SET) < 0)
+			die_perror("lseek");
+
+		if (p-k_start > INITRD_LOAD_ADDR)
+			die("Kernel overlaps initrd!");
+
+		/* Round up kernel size to 8byte alignment, and load initrd right after. */
+		i_start = p = guest_flat_to_host(kvm, INITRD_LOAD_ADDR);
+
+		while (((nr = read(fd_initrd, p, 65536)) > 0) && p < (kvm->ram_start + kvm->ram_size))
+			p += nr;
+
+		if (p >= (kvm->ram_start + kvm->ram_size))
+			die("initrd too big to contain in guest RAM.\n");
+
+		pr_info("Loaded initrd to 0x%x (%ld bytes)", INITRD_LOAD_ADDR, p-i_start);
+		kvm->initrd_gra = INITRD_LOAD_ADDR;
+		kvm->initrd_size = p-i_start;
+	} else {
+		kvm->initrd_size = 0;
+	}
+	strncpy(kern_cmdline, kernel_cmdline, 2048);
+	kern_cmdline[2047] = '\0';
+
+	return true;
+}
+
+bool load_bzimage(struct kvm *kvm, int fd_kernel,
+		  int fd_initrd, const char *kernel_cmdline, u16 vidmode)
+{
+	/* We don't support bzImages. */
+	return false;
+}
+
+static void setup_fdt(struct kvm *kvm)
+{
+
+}
+
+/**
+ * kvm__arch_setup_firmware
+ */
+void kvm__arch_setup_firmware(struct kvm *kvm)
+{
+	/* Load RTAS */
+
+	/* Load SLOF */
+
+	/* Init FDT */
+	setup_fdt(kvm);
+}

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/8] kvm tools: Add initial SPAPR PPC64 architecture support
  2011-12-06  4:05 ` [PATCH 1/8] kvm tools: Add initial SPAPR PPC64 architecture support Matt Evans
@ 2011-12-06 18:03   ` Scott Wood
  2011-12-06 18:33     ` Pekka Enberg
  2011-12-07  7:35     ` Matt Evans
  0 siblings, 2 replies; 14+ messages in thread
From: Scott Wood @ 2011-12-06 18:03 UTC (permalink / raw)
  To: Matt Evans; +Cc: kvm, kvm-ppc

On 12/05/2011 10:05 PM, Matt Evans wrote:
> This patch adds a new arch directory, powerpc, basic file structure, register
> setup and where necessary stubs out arch-specific functions (e.g. interrupts,
> runloop exits) that later patches will provide.  The target is an
> SPAPR-compliant PPC64 machine (i.e. pSeries); there is no support for PPC32 or
> 'bare metal' PPC64 guests as yet.  Subsequent patches implement the hcalls and
> RTAS required to boot SPAPR pSeries kernels.

You just sent out 28 patches removing "everything is x86"
dependencies -- may I suggest that the PPC code be structured so that
there isn't an "everything on PPC (or even PPC64) is SPAPR" assumption,
even if SPAPR is initially the only sub-arch present?

E.g. this is SPAPR-specific despite being in generically-named
tools/kvm/powerpc/kvm-cpu.c:

> +static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu)
> +{
> +	struct kvm_regs *r = &vcpu->regs;
> +
> +	if (vcpu->cpu_id == 0) {
> +		r->pc = KERNEL_START_ADDR;
> +		r->gpr[3] = vcpu->kvm->fdt_gra;
> +		r->gpr[5] = 0;
> +	} else {
> +		r->pc = KERNEL_SECONDARY_START_ADDR;
> +		r->gpr[3] = vcpu->cpu_id;
> +	}
> +	r->msr = 0x8000000000001000UL; /* 64bit, non-HV, ME */
> +
> +	if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0)
> +		die_perror("KVM_SET_REGS failed");
> +}

> diff --git a/tools/kvm/powerpc/include/kvm/kvm-arch.h b/tools/kvm/powerpc/include/kvm/kvm-arch.h
> new file mode 100644
> index 0000000..722d01c
> --- /dev/null
> +++ b/tools/kvm/powerpc/include/kvm/kvm-arch.h
> @@ -0,0 +1,70 @@
> +/*
> + * PPC64 architecture-specific definitions
> + *
> + * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published
> + * by the Free Software Foundation.
> + */
> +
> +#ifndef KVM__KVM_ARCH_H
> +#define KVM__KVM_ARCH_H
[snip]
> +void ioport__setup_arch(void)
[snip]
> +int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)

I'm seeing a lot of double-underscores -- is this common style in KVM
tool?  It's reserved for use by the compiler and system library.  It's
common in the kernel (though not used like this for namespace
prefixes), but there's no system library involved there.

> diff --git a/tools/kvm/powerpc/kvm-cpu.c b/tools/kvm/powerpc/kvm-cpu.c
> new file mode 100644
> index 0000000..79422ff
> --- /dev/null
> +++ b/tools/kvm/powerpc/kvm-cpu.c
[snip]
> +#define MSR_SF		(1UL<<63)
> +#define MSR_HV		(1UL<<60)
> +#define MSR_VEC		(1UL<<25)
> +#define MSR_VSX		(1UL<<23)
> +#define MSR_POW		(1UL<<18)
> +#define MSR_EE		(1UL<<15)
> +#define MSR_PR		(1UL<<14)
> +#define MSR_FP		(1UL<<13)
> +#define MSR_ME		(1UL<<12)
> +#define MSR_FE0		(1UL<<11)
> +#define MSR_SE		(1UL<<10)
> +#define MSR_BE		(1UL<<9)
> +#define MSR_FE1		(1UL<<8)
> +#define MSR_IR		(1UL<<5)
> +#define MSR_DR		(1UL<<4)
> +#define MSR_PMM		(1UL<<2)
> +#define MSR_RI		(1UL<<1)
> +#define MSR_LE		(1UL<<0)

Shouldn't these go in a header?

> +#define HUGETLBFS_MAGIC       0x958458f6

#include <linux/magic.h>

?

-Scott

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/8] kvm tools: Add initial SPAPR PPC64 architecture support
  2011-12-06 18:03   ` Scott Wood
@ 2011-12-06 18:33     ` Pekka Enberg
  2011-12-06 18:54       ` Scott Wood
  2011-12-07  7:35     ` Matt Evans
  1 sibling, 1 reply; 14+ messages in thread
From: Pekka Enberg @ 2011-12-06 18:33 UTC (permalink / raw)
  To: Scott Wood
  Cc: Matt Evans, kvm, kvm-ppc, Ingo Molnar, Asias He, Sasha Levin,
	Cyrill Gorcunov

On Tue, Dec 6, 2011 at 8:03 PM, Scott Wood <scottwood@freescale.com> wrote:
> I'm seeing a lot of double-underscores -- is this common style in KVM
> tool?  It's reserved for use by the compiler and system library.  It's
> common in the kernel (though not used like this for namespace
> prefixes), but there's no system library involved there.

Yes, they are KVM tool coding style which we took from perf. Double
underscore _prefixes_ are reserved in userspace but there's no reason
we can't use them in identifiers like we do.

                         Pekka

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/8] kvm tools: Add initial SPAPR PPC64 architecture support
  2011-12-06 18:33     ` Pekka Enberg
@ 2011-12-06 18:54       ` Scott Wood
  0 siblings, 0 replies; 14+ messages in thread
From: Scott Wood @ 2011-12-06 18:54 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Matt Evans, kvm, kvm-ppc, Ingo Molnar, Asias He, Sasha Levin,
	Cyrill Gorcunov

On 12/06/2011 12:33 PM, Pekka Enberg wrote:
> On Tue, Dec 6, 2011 at 8:03 PM, Scott Wood <scottwood@freescale.com> wrote:
>> I'm seeing a lot of double-underscores -- is this common style in KVM
>> tool?  It's reserved for use by the compiler and system library.  It's
>> common in the kernel (though not used like this for namespace
>> prefixes), but there's no system library involved there.
> 
> Yes, they are KVM tool coding style which we took from perf. Double
> underscore _prefixes_ are reserved in userspace but there's no reason
> we can't use them in identifiers like we do.

OK, it looks like it's just C++ that also reserves non-leading double
underscores -- sorry about that.

-Scott

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/8] kvm tools: Add initial SPAPR PPC64 architecture support
  2011-12-06 18:03   ` Scott Wood
  2011-12-06 18:33     ` Pekka Enberg
@ 2011-12-07  7:35     ` Matt Evans
  2011-12-07 18:31       ` Scott Wood
  1 sibling, 1 reply; 14+ messages in thread
From: Matt Evans @ 2011-12-07  7:35 UTC (permalink / raw)
  To: Scott Wood; +Cc: kvm, kvm-ppc

Hi Scott,

On 07/12/11 05:03, Scott Wood wrote:
> On 12/05/2011 10:05 PM, Matt Evans wrote:
>> This patch adds a new arch directory, powerpc, basic file structure, register
>> setup and where necessary stubs out arch-specific functions (e.g. interrupts,
>> runloop exits) that later patches will provide.  The target is an
>> SPAPR-compliant PPC64 machine (i.e. pSeries); there is no support for PPC32 or
>> 'bare metal' PPC64 guests as yet.  Subsequent patches implement the hcalls and
>> RTAS required to boot SPAPR pSeries kernels.
> 
> You just sent out 28 patches removing "everything is x86"
> dependencies -- may I suggest that the PPC code be structured so that
> there isn't an "everything on PPC (or even PPC64) is SPAPR" assumption,
> even if SPAPR is initially the only sub-arch present?

I had anticipated this comment (though not the "28 patches" remark, easy now).
It is a fair comment, but you hit the nail on the head with your other mail
(regarding configuring in i8042, presumably to emulate crappy dev boards)
asking whether kvmtool has a config system.  It does not.

Since we currently lack any kind of build-time configuration (or any fancy
run-time -M <machine> a la QEMU) it's a bit hard to cater for multiple
platforms.  I'm aware that the PPC patches are painfully PPC64-with-SPAPR and I
don't present them as perfect, but I really think we need to attack the
configuration stuff before bifurcating.  Is this something you'd like to see to?

(Your comments on the #defines and magic accepted & fixed, thank you.)


Cheers,


Matt



> 
> E.g. this is SPAPR-specific despite being in generically-named
> tools/kvm/powerpc/kvm-cpu.c:
> 
>> +static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu)
>> +{
>> +	struct kvm_regs *r = &vcpu->regs;
>> +
>> +	if (vcpu->cpu_id == 0) {
>> +		r->pc = KERNEL_START_ADDR;
>> +		r->gpr[3] = vcpu->kvm->fdt_gra;
>> +		r->gpr[5] = 0;
>> +	} else {
>> +		r->pc = KERNEL_SECONDARY_START_ADDR;
>> +		r->gpr[3] = vcpu->cpu_id;
>> +	}
>> +	r->msr = 0x8000000000001000UL; /* 64bit, non-HV, ME */
>> +
>> +	if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0)
>> +		die_perror("KVM_SET_REGS failed");
>> +}
> 
>> diff --git a/tools/kvm/powerpc/include/kvm/kvm-arch.h b/tools/kvm/powerpc/include/kvm/kvm-arch.h
>> new file mode 100644
>> index 0000000..722d01c
>> --- /dev/null
>> +++ b/tools/kvm/powerpc/include/kvm/kvm-arch.h
>> @@ -0,0 +1,70 @@
>> +/*
>> + * PPC64 architecture-specific definitions
>> + *
>> + * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms of the GNU General Public License version 2 as published
>> + * by the Free Software Foundation.
>> + */
>> +
>> +#ifndef KVM__KVM_ARCH_H
>> +#define KVM__KVM_ARCH_H
> [snip]
>> +void ioport__setup_arch(void)
> [snip]
>> +int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)
> 
> I'm seeing a lot of double-underscores -- is this common style in KVM
> tool?  It's reserved for use by the compiler and system library.  It's
> common in the kernel (though not used like this for namespace
> prefixes), but there's no system library involved there.
> 
>> diff --git a/tools/kvm/powerpc/kvm-cpu.c b/tools/kvm/powerpc/kvm-cpu.c
>> new file mode 100644
>> index 0000000..79422ff
>> --- /dev/null
>> +++ b/tools/kvm/powerpc/kvm-cpu.c
> [snip]
>> +#define MSR_SF		(1UL<<63)
>> +#define MSR_HV		(1UL<<60)
>> +#define MSR_VEC		(1UL<<25)
>> +#define MSR_VSX		(1UL<<23)
>> +#define MSR_POW		(1UL<<18)
>> +#define MSR_EE		(1UL<<15)
>> +#define MSR_PR		(1UL<<14)
>> +#define MSR_FP		(1UL<<13)
>> +#define MSR_ME		(1UL<<12)
>> +#define MSR_FE0		(1UL<<11)
>> +#define MSR_SE		(1UL<<10)
>> +#define MSR_BE		(1UL<<9)
>> +#define MSR_FE1		(1UL<<8)
>> +#define MSR_IR		(1UL<<5)
>> +#define MSR_DR		(1UL<<4)
>> +#define MSR_PMM		(1UL<<2)
>> +#define MSR_RI		(1UL<<1)
>> +#define MSR_LE		(1UL<<0)
> 
> Shouldn't these go in a header?
> 
>> +#define HUGETLBFS_MAGIC       0x958458f6
> 
> #include <linux/magic.h>
> 
> ?
> 
> -Scott

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/8] kvm tools: Add initial SPAPR PPC64 architecture support
  2011-12-07  7:35     ` Matt Evans
@ 2011-12-07 18:31       ` Scott Wood
  2011-12-08  2:57         ` Matt Evans
  0 siblings, 1 reply; 14+ messages in thread
From: Scott Wood @ 2011-12-07 18:31 UTC (permalink / raw)
  To: Matt Evans; +Cc: kvm, kvm-ppc

On 12/07/2011 01:35 AM, Matt Evans wrote:
> Hi Scott,
> 
> On 07/12/11 05:03, Scott Wood wrote:
>> On 12/05/2011 10:05 PM, Matt Evans wrote:
>>> This patch adds a new arch directory, powerpc, basic file structure, register
>>> setup and where necessary stubs out arch-specific functions (e.g. interrupts,
>>> runloop exits) that later patches will provide.  The target is an
>>> SPAPR-compliant PPC64 machine (i.e. pSeries); there is no support for PPC32 or
>>> 'bare metal' PPC64 guests as yet.  Subsequent patches implement the hcalls and
>>> RTAS required to boot SPAPR pSeries kernels.
>>
>> You just sent out 28 patches removing "everything is x86"
>> dependencies -- may I suggest that the PPC code be structured so that
>> there isn't an "everything on PPC (or even PPC64) is SPAPR" assumption,
>> even if SPAPR is initially the only sub-arch present?
> 
> I had anticipated this comment (though not the "28 patches" remark, easy now).

I was just using that to illustrate how it's easier to handle earlier
than later -- no offense intended. :-)

> It is a fair comment, but you hit the nail on the head with your other mail
> (regarding configuring in i8042, presumably to emulate crappy dev boards)
> asking whether kvmtool has a config system.  It does not.
> 
> Since we currently lack any kind of build-time configuration (or any fancy
> run-time -M <machine> a la QEMU) it's a bit hard to cater for multiple
> platforms.  I'm aware that the PPC patches are painfully PPC64-with-SPAPR and I
> don't present them as perfect, but I really think we need to attack the
> configuration stuff before bifurcating.  Is this something you'd like to see to?

Just putting all SPAPR stuff in SPAPR-named files (or at least
SPAPR-named functions), and likewise for book3s stuff, etc. would be an
improvement.  I see that you did this for some things, but not all.  Try
to make it obvious where the target-specific branching would take place,
even if the actual branching mechanism is currently just a FIXME comment.

-Scott

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/8] kvm tools: Add initial SPAPR PPC64 architecture support
  2011-12-07 18:31       ` Scott Wood
@ 2011-12-08  2:57         ` Matt Evans
  0 siblings, 0 replies; 14+ messages in thread
From: Matt Evans @ 2011-12-08  2:57 UTC (permalink / raw)
  To: Scott Wood; +Cc: kvm, kvm-ppc

On 08/12/11 05:31, Scott Wood wrote:
> On 12/07/2011 01:35 AM, Matt Evans wrote:
>> Hi Scott,
>>
>> On 07/12/11 05:03, Scott Wood wrote:
>>> On 12/05/2011 10:05 PM, Matt Evans wrote:
>>>> This patch adds a new arch directory, powerpc, basic file structure, register
>>>> setup and where necessary stubs out arch-specific functions (e.g. interrupts,
>>>> runloop exits) that later patches will provide.  The target is an
>>>> SPAPR-compliant PPC64 machine (i.e. pSeries); there is no support for PPC32 or
>>>> 'bare metal' PPC64 guests as yet.  Subsequent patches implement the hcalls and
>>>> RTAS required to boot SPAPR pSeries kernels.
>>>
>>> You just sent out 28 patches removing "everything is x86"
>>> dependencies -- may I suggest that the PPC code be structured so that
>>> there isn't an "everything on PPC (or even PPC64) is SPAPR" assumption,
>>> even if SPAPR is initially the only sub-arch present?
>>
>> I had anticipated this comment (though not the "28 patches" remark, easy now).
> 
> I was just using that to illustrate how it's easier to handle earlier
> than later -- no offense intended. :-)
> 
>> It is a fair comment, but you hit the nail on the head with your other mail
>> (regarding configuring in i8042, presumably to emulate crappy dev boards)
>> asking whether kvmtool has a config system.  It does not.
>>
>> Since we currently lack any kind of build-time configuration (or any fancy
>> run-time -M <machine> a la QEMU) it's a bit hard to cater for multiple
>> platforms.  I'm aware that the PPC patches are painfully PPC64-with-SPAPR and I
>> don't present them as perfect, but I really think we need to attack the
>> configuration stuff before bifurcating.  Is this something you'd like to see to?
> 
> Just putting all SPAPR stuff in SPAPR-named files (or at least
> SPAPR-named functions), and likewise for book3s stuff, etc. would be an
> improvement.  I see that you did this for some things, but not all.  Try
> to make it obvious where the target-specific branching would take place,
> even if the actual branching mechanism is currently just a FIXME comment.

No worries, that's a good suggestion-- I'll have a spin through the PPC stuff and
see if there's anything worth splitting, or at least point out everywhere I can find
with an appropriate comment.

Thanks,


Matt

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 2/8] kvm tools: Generate SPAPR PPC64 guest device tree
       [not found] <cover.1323143103.git.matt@ozlabs.org>
  2011-12-06  4:05 ` [PATCH 1/8] kvm tools: Add initial SPAPR PPC64 architecture support Matt Evans
@ 2011-12-06  4:06 ` Matt Evans
  2011-12-06  4:06 ` [PATCH 3/8] kvm tools: Add SPAPR PPC64 hcall & rtascall structure Matt Evans
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 14+ messages in thread
From: Matt Evans @ 2011-12-06  4:06 UTC (permalink / raw)
  To: kvm, kvm-ppc

The generated DT is the bare minimum structure required for SPAPR (on which
subsequent patches for VIO, XICS, PCI etc. will build); root node, cpus, memory.

Some aspects are currently hardwired for simplicity, for example advertised
page sizes, HPT size, SLB size, VMX/DFP, etc.  Future support of a variety
of POWER CPUs should acquire this info from the host and encode appropriately.

This requires a 64-bit libfdt.

Signed-off-by: Matt Evans <matt@ozlabs.org>
---
 tools/kvm/Makefile      |    3 +-
 tools/kvm/powerpc/kvm.c |  141 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+), 1 deletions(-)

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 58815a2..dc18959 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -129,7 +129,8 @@ ifeq ($(uname_M), ppc64)
 	OBJS	+= powerpc/kvm.o
 	OBJS	+= powerpc/kvm-cpu.o
 	ARCH_INCLUDE := powerpc/include
-	CFLAGS += -m64
+	CFLAGS 	+= -m64
+	LIBS 	+= -lfdt
 endif
 
 ###
diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
index 036bfc0..d792bee 100644
--- a/tools/kvm/powerpc/kvm.c
+++ b/tools/kvm/powerpc/kvm.c
@@ -3,6 +3,9 @@
  *
  * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
  *
+ * Portions of FDT setup borrowed from QEMU, copyright 2010 David Gibson, IBM
+ * Corporation.
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
  * by the Free Software Foundation.
@@ -28,8 +31,11 @@
 #include <asm/unistd.h>
 #include <errno.h>
 
+#include <linux/byteorder.h>
 #include <libfdt.h>
 
+#define HPT_ORDER 24
+
 #define HUGETLBFS_PATH "/var/lib/hugetlbfs/global/pagesize-16MB/"
 
 static char kern_cmdline[2048];
@@ -212,9 +218,144 @@ bool load_bzimage(struct kvm *kvm, int fd_kernel,
 	return false;
 }
 
+#define SMT_THREADS 4
+
+#define _FDT(exp)							\
+	do {								\
+		int ret = (exp);					\
+		if (ret < 0) {						\
+			die("Error creating device tree: %s: %s\n",	\
+			    #exp, fdt_strerror(ret));			\
+		}							\
+	} while (0)
+
+static uint32_t mfpvr(void)
+{
+	uint32_t r;
+	asm volatile ("mfpvr %0" : "=r"(r));
+	return r;
+}
+
 static void setup_fdt(struct kvm *kvm)
 {
+	uint64_t 	mem_reg_property[] = { 0, cpu_to_be64(kvm->ram_size) };
+	int 		smp_cpus = kvm->nrcpus;
+	uint32_t 	interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
+	char 		hypertas_prop_kvm[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
+		"\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
+	int 		i, j;
+	char 		cpu_name[30];
+	u8		staging_fdt[FDT_MAX_SIZE];
+	uint32_t      	pvr = mfpvr();
+
+	/* Generate an appropriate DT at kvm->fdt_gra */
+	void *fdt_dest = guest_flat_to_host(kvm, kvm->fdt_gra);
+	void *fdt = staging_fdt;
+
+	_FDT(fdt_create(fdt, FDT_MAX_SIZE));
+	_FDT(fdt_finish_reservemap(fdt));
+
+	_FDT(fdt_begin_node(fdt, ""));
+
+	_FDT(fdt_property_string(fdt, "device_type", "chrp"));
+	_FDT(fdt_property_string(fdt, "model", "IBM pSeries (emulated by kvmtool)"));
+	_FDT(fdt_property_cell(fdt, "#address-cells", 0x2));
+	_FDT(fdt_property_cell(fdt, "#size-cells", 0x2));
+
+	/* /chosen */
+	_FDT(fdt_begin_node(fdt, "chosen"));
+	/* cmdline */
+	_FDT(fdt_property_string(fdt, "bootargs", kern_cmdline));
+	/* Initrd */
+	if (kvm->initrd_size != 0) {
+		uint32_t ird_st_prop = cpu_to_be32(kvm->initrd_gra);
+		uint32_t ird_end_prop = cpu_to_be32(kvm->initrd_gra +
+						    kvm->initrd_size);
+		_FDT(fdt_property(fdt, "linux,initrd-start",
+				   &ird_st_prop, sizeof(ird_st_prop)));
+		_FDT(fdt_property(fdt, "linux,initrd-end",
+				   &ird_end_prop, sizeof(ird_end_prop)));
+	}
+
+	/* Memory: We don't alloc. a separate RMA yet.  If we ever need to
+	 * (CAP_PPC_RMA == 2) then have one memory node for 0->RMAsize, and
+	 * another RMAsize->endOfMem.
+	 */
+	_FDT(fdt_begin_node(fdt, "memory@0"));
+	_FDT(fdt_property_string(fdt, "device_type", "memory"));
+	_FDT(fdt_property(fdt, "reg", mem_reg_property, sizeof(mem_reg_property)));
+	_FDT(fdt_end_node(fdt));
+
+	/* CPUs */
+	_FDT(fdt_begin_node(fdt, "cpus"));
+	_FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
+	_FDT(fdt_property_cell(fdt, "#size-cells", 0x0));
+
+	for (i = 0; i < smp_cpus; i += SMT_THREADS) {
+		/* These page and segment sizes are a basic minimum set.  Really,
+		 * we should be fancier and work out what the host supports then
+		 * encode this here.
+		 */
+		int32_t page_sizes_prop[] = {0xc, 0x0, 0x1, 0xc, 0x0,
+					     0x18, 0x100, 0x1, 0x18, 0x0};
+		int32_t seg_sizes_prop[] = {0x1c, 0x28, 0xffffffff, 0xffffffff};
+		int32_t pft_size_prop[] = { 0, HPT_ORDER };
+		uint32_t servers_prop[SMT_THREADS];
+		uint32_t gservers_prop[SMT_THREADS * 2];
+		int threads = (smp_cpus - i) >= SMT_THREADS ? SMT_THREADS :
+			smp_cpus - i;
+
+		sprintf(cpu_name, "PowerPC,POWER7@%d", i);
+		_FDT(fdt_begin_node(fdt, cpu_name));
+		_FDT(fdt_property_string(fdt, "name", "PowerPC,POWER7"));
+		_FDT(fdt_property_string(fdt, "device_type", "cpu"));
+
+		_FDT(fdt_property_cell(fdt, "reg", i));
+		_FDT(fdt_property_cell(fdt, "cpu-version", pvr));
+		_FDT(fdt_property_cell(fdt, "dcache-block-size", 0x00000080));
+		_FDT(fdt_property_cell(fdt, "icache-block-size", 0x00000080));
+		_FDT(fdt_property_cell(fdt, "timebase-frequency", 512000000));
+		_FDT(fdt_property_cell(fdt, "clock-frequency", 0xddbab200));
+
+		/* SLB size is hardwired as we currently assume POWERn */
+		_FDT(fdt_property_cell(fdt, "ibm,slb-size", 32));
+		/* HPT size is also hardwired; KVM currently fixes it at 16MB
+		 * but the moment that changes we'll need to read it out of the
+		 * kernel.
+		 */
+		_FDT(fdt_property(fdt, "ibm,pft-size", pft_size_prop, sizeof(pft_size_prop)));
+
+		_FDT(fdt_property_string(fdt, "status", "okay"));
+		_FDT(fdt_property(fdt, "64-bit", NULL, 0));
+		/* A server for each thread in this core */
+		for (j = 0; j < SMT_THREADS; j++) {
+			servers_prop[j] = cpu_to_be32(i+j);
+			/* Hack borrowed from QEMU, direct the group queues back to cpu 0 */
+			gservers_prop[j*2] = cpu_to_be32(i+j);
+			gservers_prop[j*2 + 1] = 0;
+		}
+		_FDT(fdt_property(fdt, "ibm,ppc-interrupt-server#s",
+				   servers_prop, threads * sizeof(uint32_t)));
+		_FDT(fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
+				   gservers_prop, threads * 2 * sizeof(uint32_t)));
+		_FDT(fdt_property(fdt, "ibm,segment-page-sizes",
+				   page_sizes_prop, sizeof(page_sizes_prop)));
+		_FDT(fdt_property(fdt, "ibm,processor-segment-sizes",
+				  seg_sizes_prop, sizeof(seg_sizes_prop)));
+		/* And VMX / DFP */
+		_FDT(fdt_property_cell(fdt, "ibm,vmx", 0x2));
+		_FDT(fdt_property_cell(fdt, "ibm,dfp", 0x1));
+		_FDT(fdt_end_node(fdt));
+	}
+	_FDT(fdt_end_node(fdt));
+
+	/* Finalise: */
+	_FDT(fdt_end_node(fdt)); /* Root node */
+	_FDT(fdt_finish(fdt));
 
+	_FDT(fdt_open_into(fdt, fdt_dest, FDT_MAX_SIZE));
+	_FDT(fdt_add_mem_rsv(fdt_dest, kvm->rtas_gra, kvm->rtas_size));
+	_FDT(fdt_pack(fdt_dest));
 }
 
 /**

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/8] kvm tools: Add SPAPR PPC64 hcall & rtascall structure
       [not found] <cover.1323143103.git.matt@ozlabs.org>
  2011-12-06  4:05 ` [PATCH 1/8] kvm tools: Add initial SPAPR PPC64 architecture support Matt Evans
  2011-12-06  4:06 ` [PATCH 2/8] kvm tools: Generate SPAPR PPC64 guest device tree Matt Evans
@ 2011-12-06  4:06 ` Matt Evans
  2011-12-06  4:06 ` [PATCH 4/8] kvm tools: Add SPAPR PPC64 HV console Matt Evans
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 14+ messages in thread
From: Matt Evans @ 2011-12-06  4:06 UTC (permalink / raw)
  To: kvm, kvm-ppc

This patch adds the basic structure for HV calls, their registration and some of
the simpler calls.  A similar layout for RTAS calls is also added, again with
some of the simpler RTAS calls used by the guest.  The SPAPR RTAS stub is
generated inline.  Also, nodes for RTAS are added to the device tree.

Signed-off-by: Matt Evans <matt@ozlabs.org>
---
 tools/kvm/Makefile              |    2 +
 tools/kvm/powerpc/kvm-cpu.c     |    5 +
 tools/kvm/powerpc/kvm.c         |   39 +++++-
 tools/kvm/powerpc/spapr.h       |  308 +++++++++++++++++++++++++++++++++++++++
 tools/kvm/powerpc/spapr_hcall.c |  151 +++++++++++++++++++
 tools/kvm/powerpc/spapr_rtas.c  |  226 ++++++++++++++++++++++++++++
 6 files changed, 730 insertions(+), 1 deletions(-)
 create mode 100644 tools/kvm/powerpc/spapr.h
 create mode 100644 tools/kvm/powerpc/spapr_hcall.c
 create mode 100644 tools/kvm/powerpc/spapr_rtas.c

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index dc18959..0f24104 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -128,6 +128,8 @@ ifeq ($(uname_M), ppc64)
 	OBJS	+= powerpc/irq.o
 	OBJS	+= powerpc/kvm.o
 	OBJS	+= powerpc/kvm-cpu.o
+	OBJS	+= powerpc/spapr_hcall.o
+	OBJS	+= powerpc/spapr_rtas.o
 	ARCH_INCLUDE := powerpc/include
 	CFLAGS 	+= -m64
 	LIBS 	+= -lfdt
diff --git a/tools/kvm/powerpc/kvm-cpu.c b/tools/kvm/powerpc/kvm-cpu.c
index 79422ff..71c648e 100644
--- a/tools/kvm/powerpc/kvm-cpu.c
+++ b/tools/kvm/powerpc/kvm-cpu.c
@@ -14,6 +14,8 @@
 #include "kvm/util.h"
 #include "kvm/kvm.h"
 
+#include "spapr.h"
+
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <signal.h>
@@ -156,6 +158,9 @@ bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu)
 	bool ret = true;
 	struct kvm_run *run = vcpu->kvm_run;
 	switch(run->exit_reason) {
+	case KVM_EXIT_PAPR_HCALL:
+		run->papr_hcall.ret = spapr_hypercall(vcpu, run->papr_hcall.nr, run->papr_hcall.args);
+		break;
 	default:
 		ret = false;
 	}
diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
index d792bee..2f0a921 100644
--- a/tools/kvm/powerpc/kvm.c
+++ b/tools/kvm/powerpc/kvm.c
@@ -14,6 +14,8 @@
 #include "kvm/kvm.h"
 #include "kvm/util.h"
 
+#include "spapr.h"
+
 #include <linux/kvm.h>
 
 #include <sys/types.h>
@@ -153,6 +155,10 @@ void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char *hugetlbfs_
 	cap_ppc_rma = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_PPC_RMA);
 	if (cap_ppc_rma == 2)
 		die("Need contiguous RMA allocation on this hardware, which is not yet supported.");
+
+	/* Do these before FDT setup, IRQ setup, etc. */
+	hypercall_init();
+	register_core_rtas();
 }
 
 void kvm__irq_line(struct kvm *kvm, int irq, int level)
@@ -262,6 +268,20 @@ static void setup_fdt(struct kvm *kvm)
 	_FDT(fdt_property_cell(fdt, "#address-cells", 0x2));
 	_FDT(fdt_property_cell(fdt, "#size-cells", 0x2));
 
+	/* RTAS */
+	_FDT(fdt_begin_node(fdt, "rtas"));
+	/* This is what the kernel uses to switch 'We're an LPAR'! */
+        _FDT(fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop_kvm,
+                           sizeof(hypertas_prop_kvm)));
+	_FDT(fdt_property_cell(fdt, "linux,rtas-base", kvm->rtas_gra));
+	_FDT(fdt_property_cell(fdt, "linux,rtas-entry", kvm->rtas_gra));
+	_FDT(fdt_property_cell(fdt, "rtas-size", kvm->rtas_size));
+	/* Now add properties for all RTAS tokens: */
+	if (spapr_rtas_fdt_setup(kvm, fdt))
+		die("Couldn't create RTAS FDT properties\n");
+
+	_FDT(fdt_end_node(fdt));
+
 	/* /chosen */
 	_FDT(fdt_begin_node(fdt, "chosen"));
 	/* cmdline */
@@ -363,7 +383,24 @@ static void setup_fdt(struct kvm *kvm)
  */
 void kvm__arch_setup_firmware(struct kvm *kvm)
 {
-	/* Load RTAS */
+	/* Set up RTAS stub.  All it is is a single hypercall:
+	   0:   7c 64 1b 78     mr      r4,r3
+	   4:   3c 60 00 00     lis     r3,0
+	   8:   60 63 f0 00     ori     r3,r3,61440
+	   c:   44 00 00 22     sc      1
+	  10:   4e 80 00 20     blr
+	*/
+	uint32_t *rtas = guest_flat_to_host(kvm, kvm->rtas_gra);
+
+	rtas[0] = 0x7c641b78;
+	rtas[1] = 0x3c600000;
+	rtas[2] = 0x6063f000;
+	rtas[3] = 0x44000022;
+	rtas[4] = 0x4e800020;
+	kvm->rtas_size = 20;
+
+	pr_info("Set up %ld bytes of RTAS at 0x%lx\n",
+		kvm->rtas_size, kvm->rtas_gra);
 
 	/* Load SLOF */
 
diff --git a/tools/kvm/powerpc/spapr.h b/tools/kvm/powerpc/spapr.h
new file mode 100644
index 0000000..4e5d7bd
--- /dev/null
+++ b/tools/kvm/powerpc/spapr.h
@@ -0,0 +1,308 @@
+/*
+ * SPAPR definitions and declarations
+ *
+ * Borrowed heavily from QEMU's spapr.h,
+ * Copyright (c) 2010 David Gibson, IBM Corporation.
+ *
+ * Modifications by Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#if !defined(__HW_SPAPR_H__)
+#define __HW_SPAPR_H__
+
+#include <inttypes.h>
+#include "kvm/kvm.h"
+#include "kvm/kvm-cpu.h"
+
+typedef unsigned long target_ulong;
+typedef uintptr_t target_phys_addr_t;
+
+
+#define H_SUCCESS         0
+#define H_BUSY            1        /* Hardware busy -- retry later */
+#define H_CLOSED          2        /* Resource closed */
+#define H_NOT_AVAILABLE   3
+#define H_CONSTRAINED     4        /* Resource request constrained to max allowed */
+#define H_PARTIAL         5
+#define H_IN_PROGRESS     14       /* Kind of like busy */
+#define H_PAGE_REGISTERED 15
+#define H_PARTIAL_STORE   16
+#define H_PENDING         17       /* returned from H_POLL_PENDING */
+#define H_CONTINUE        18       /* Returned from H_Join on success */
+#define H_LONG_BUSY_START_RANGE         9900  /* Start of long busy range */
+#define H_LONG_BUSY_ORDER_1_MSEC        9900  /* Long busy, hint that 1msec \
+                                                 is a good time to retry */
+#define H_LONG_BUSY_ORDER_10_MSEC       9901  /* Long busy, hint that 10msec \
+                                                 is a good time to retry */
+#define H_LONG_BUSY_ORDER_100_MSEC      9902  /* Long busy, hint that 100msec \
+                                                 is a good time to retry */
+#define H_LONG_BUSY_ORDER_1_SEC         9903  /* Long busy, hint that 1sec \
+                                                 is a good time to retry */
+#define H_LONG_BUSY_ORDER_10_SEC        9904  /* Long busy, hint that 10sec \
+                                                 is a good time to retry */
+#define H_LONG_BUSY_ORDER_100_SEC       9905  /* Long busy, hint that 100sec \
+                                                 is a good time to retry */
+#define H_LONG_BUSY_END_RANGE           9905  /* End of long busy range */
+#define H_HARDWARE        -1       /* Hardware error */
+#define H_FUNCTION        -2       /* Function not supported */
+#define H_PRIVILEGE       -3       /* Caller not privileged */
+#define H_PARAMETER       -4       /* Parameter invalid, out-of-range or conflicting */
+#define H_BAD_MODE        -5       /* Illegal msr value */
+#define H_PTEG_FULL       -6       /* PTEG is full */
+#define H_NOT_FOUND       -7       /* PTE was not found" */
+#define H_RESERVED_DABR   -8       /* DABR address is reserved by the hypervisor on this processor" */
+#define H_NO_MEM          -9
+#define H_AUTHORITY       -10
+#define H_PERMISSION      -11
+#define H_DROPPED         -12
+#define H_SOURCE_PARM     -13
+#define H_DEST_PARM       -14
+#define H_REMOTE_PARM     -15
+#define H_RESOURCE        -16
+#define H_ADAPTER_PARM    -17
+#define H_RH_PARM         -18
+#define H_RCQ_PARM        -19
+#define H_SCQ_PARM        -20
+#define H_EQ_PARM         -21
+#define H_RT_PARM         -22
+#define H_ST_PARM         -23
+#define H_SIGT_PARM       -24
+#define H_TOKEN_PARM      -25
+#define H_MLENGTH_PARM    -27
+#define H_MEM_PARM        -28
+#define H_MEM_ACCESS_PARM -29
+#define H_ATTR_PARM       -30
+#define H_PORT_PARM       -31
+#define H_MCG_PARM        -32
+#define H_VL_PARM         -33
+#define H_TSIZE_PARM      -34
+#define H_TRACE_PARM      -35
+
+#define H_MASK_PARM       -37
+#define H_MCG_FULL        -38
+#define H_ALIAS_EXIST     -39
+#define H_P_COUNTER       -40
+#define H_TABLE_FULL      -41
+#define H_ALT_TABLE       -42
+#define H_MR_CONDITION    -43
+#define H_NOT_ENOUGH_RESOURCES -44
+#define H_R_STATE         -45
+#define H_RESCINDEND      -46
+#define H_MULTI_THREADS_ACTIVE -9005
+
+
+/* Long Busy is a condition that can be returned by the firmware
+ * when a call cannot be completed now, but the identical call
+ * should be retried later.  This prevents calls blocking in the
+ * firmware for long periods of time.  Annoyingly the firmware can return
+ * a range of return codes, hinting at how long we should wait before
+ * retrying.  If you don't care for the hint, the macro below is a good
+ * way to check for the long_busy return codes
+ */
+#define H_IS_LONG_BUSY(x)  ((x >= H_LONG_BUSY_START_RANGE) \
+                            && (x <= H_LONG_BUSY_END_RANGE))
+
+/* Flags */
+#define H_LARGE_PAGE      (1ULL<<(63-16))
+#define H_EXACT           (1ULL<<(63-24))       /* Use exact PTE or return H_PTEG_FULL */
+#define H_R_XLATE         (1ULL<<(63-25))       /* include a valid logical page num in the pte if the valid bit is set */
+#define H_READ_4          (1ULL<<(63-26))       /* Return 4 PTEs */
+#define H_PAGE_STATE_CHANGE (1ULL<<(63-28))
+#define H_PAGE_UNUSED     ((1ULL<<(63-29)) | (1ULL<<(63-30)))
+#define H_PAGE_SET_UNUSED (H_PAGE_STATE_CHANGE | H_PAGE_UNUSED)
+#define H_PAGE_SET_LOANED (H_PAGE_SET_UNUSED | (1ULL<<(63-31)))
+#define H_PAGE_SET_ACTIVE H_PAGE_STATE_CHANGE
+#define H_AVPN            (1ULL<<(63-32))       /* An avpn is provided as a sanity test */
+#define H_ANDCOND         (1ULL<<(63-33))
+#define H_ICACHE_INVALIDATE (1ULL<<(63-40))     /* icbi, etc.  (ignored for IO pages) */
+#define H_ICACHE_SYNCHRONIZE (1ULL<<(63-41))    /* dcbst, icbi, etc (ignored for IO pages */
+#define H_ZERO_PAGE       (1ULL<<(63-48))       /* zero the page before mapping (ignored for IO pages) */
+#define H_COPY_PAGE       (1ULL<<(63-49))
+#define H_N               (1ULL<<(63-61))
+#define H_PP1             (1ULL<<(63-62))
+#define H_PP2             (1ULL<<(63-63))
+
+/* VASI States */
+#define H_VASI_INVALID    0
+#define H_VASI_ENABLED    1
+#define H_VASI_ABORTED    2
+#define H_VASI_SUSPENDING 3
+#define H_VASI_SUSPENDED  4
+#define H_VASI_RESUMED    5
+#define H_VASI_COMPLETED  6
+
+/* DABRX flags */
+#define H_DABRX_HYPERVISOR (1ULL<<(63-61))
+#define H_DABRX_KERNEL     (1ULL<<(63-62))
+#define H_DABRX_USER       (1ULL<<(63-63))
+
+/* Each control block has to be on a 4K bondary */
+#define H_CB_ALIGNMENT     4096
+
+/* pSeries hypervisor opcodes */
+#define H_REMOVE                0x04
+#define H_ENTER                 0x08
+#define H_READ                  0x0c
+#define H_CLEAR_MOD             0x10
+#define H_CLEAR_REF             0x14
+#define H_PROTECT               0x18
+#define H_GET_TCE               0x1c
+#define H_PUT_TCE               0x20
+#define H_SET_SPRG0             0x24
+#define H_SET_DABR              0x28
+#define H_PAGE_INIT             0x2c
+#define H_SET_ASR               0x30
+#define H_ASR_ON                0x34
+#define H_ASR_OFF               0x38
+#define H_LOGICAL_CI_LOAD       0x3c
+#define H_LOGICAL_CI_STORE      0x40
+#define H_LOGICAL_CACHE_LOAD    0x44
+#define H_LOGICAL_CACHE_STORE   0x48
+#define H_LOGICAL_ICBI          0x4c
+#define H_LOGICAL_DCBF          0x50
+#define H_GET_TERM_CHAR         0x54
+#define H_PUT_TERM_CHAR         0x58
+#define H_REAL_TO_LOGICAL       0x5c
+#define H_HYPERVISOR_DATA       0x60
+#define H_EOI                   0x64
+#define H_CPPR                  0x68
+#define H_IPI                   0x6c
+#define H_IPOLL                 0x70
+#define H_XIRR                  0x74
+#define H_PERFMON               0x7c
+#define H_MIGRATE_DMA           0x78
+#define H_REGISTER_VPA          0xDC
+#define H_CEDE                  0xE0
+#define H_CONFER                0xE4
+#define H_PROD                  0xE8
+#define H_GET_PPP               0xEC
+#define H_SET_PPP               0xF0
+#define H_PURR                  0xF4
+#define H_PIC                   0xF8
+#define H_REG_CRQ               0xFC
+#define H_FREE_CRQ              0x100
+#define H_VIO_SIGNAL            0x104
+#define H_SEND_CRQ              0x108
+#define H_COPY_RDMA             0x110
+#define H_REGISTER_LOGICAL_LAN  0x114
+#define H_FREE_LOGICAL_LAN      0x118
+#define H_ADD_LOGICAL_LAN_BUFFER 0x11C
+#define H_SEND_LOGICAL_LAN      0x120
+#define H_BULK_REMOVE           0x124
+#define H_MULTICAST_CTRL        0x130
+#define H_SET_XDABR             0x134
+#define H_STUFF_TCE             0x138
+#define H_PUT_TCE_INDIRECT      0x13C
+#define H_CHANGE_LOGICAL_LAN_MAC 0x14C
+#define H_VTERM_PARTNER_INFO    0x150
+#define H_REGISTER_VTERM        0x154
+#define H_FREE_VTERM            0x158
+#define H_RESET_EVENTS          0x15C
+#define H_ALLOC_RESOURCE        0x160
+#define H_FREE_RESOURCE         0x164
+#define H_MODIFY_QP             0x168
+#define H_QUERY_QP              0x16C
+#define H_REREGISTER_PMR        0x170
+#define H_REGISTER_SMR          0x174
+#define H_QUERY_MR              0x178
+#define H_QUERY_MW              0x17C
+#define H_QUERY_HCA             0x180
+#define H_QUERY_PORT            0x184
+#define H_MODIFY_PORT           0x188
+#define H_DEFINE_AQP1           0x18C
+#define H_GET_TRACE_BUFFER      0x190
+#define H_DEFINE_AQP0           0x194
+#define H_RESIZE_MR             0x198
+#define H_ATTACH_MCQP           0x19C
+#define H_DETACH_MCQP           0x1A0
+#define H_CREATE_RPT            0x1A4
+#define H_REMOVE_RPT            0x1A8
+#define H_REGISTER_RPAGES       0x1AC
+#define H_DISABLE_AND_GETC      0x1B0
+#define H_ERROR_DATA            0x1B4
+#define H_GET_HCA_INFO          0x1B8
+#define H_GET_PERF_COUNT        0x1BC
+#define H_MANAGE_TRACE          0x1C0
+#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
+#define H_QUERY_INT_STATE       0x1E4
+#define H_POLL_PENDING          0x1D8
+#define H_ILLAN_ATTRIBUTES      0x244
+#define H_MODIFY_HEA_QP         0x250
+#define H_QUERY_HEA_QP          0x254
+#define H_QUERY_HEA             0x258
+#define H_QUERY_HEA_PORT        0x25C
+#define H_MODIFY_HEA_PORT       0x260
+#define H_REG_BCMC              0x264
+#define H_DEREG_BCMC            0x268
+#define H_REGISTER_HEA_RPAGES   0x26C
+#define H_DISABLE_AND_GET_HEA   0x270
+#define H_GET_HEA_INFO          0x274
+#define H_ALLOC_HEA_RESOURCE    0x278
+#define H_ADD_CONN              0x284
+#define H_DEL_CONN              0x288
+#define H_JOIN                  0x298
+#define H_VASI_STATE            0x2A4
+#define H_ENABLE_CRQ            0x2B0
+#define H_GET_EM_PARMS          0x2B8
+#define H_SET_MPP               0x2D0
+#define H_GET_MPP               0x2D4
+#define MAX_HCALL_OPCODE        H_GET_MPP
+
+/* The hcalls above are standardized in PAPR and implemented by pHyp
+ * as well.
+ *
+ * We also need some hcalls which are specific to qemu / KVM-on-POWER.
+ * So far we just need one for H_RTAS, but in future we'll need more
+ * for extensions like virtio.  We put those into the 0xf000-0xfffc
+ * range which is reserved by PAPR for "platform-specific" hcalls.
+ */
+#define KVMPPC_HCALL_BASE       0xf000
+#define KVMPPC_H_RTAS           (KVMPPC_HCALL_BASE + 0x0)
+#define KVMPPC_HCALL_MAX        KVMPPC_H_RTAS
+
+#define DEBUG_SPAPR_HCALLS
+
+#ifdef DEBUG_SPAPR_HCALLS
+#define hcall_dprintf(fmt, ...) \
+    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define hcall_dprintf(fmt, ...) \
+    do { } while (0)
+#endif
+
+typedef target_ulong (*spapr_hcall_fn)(struct kvm_cpu *vcpu,
+				       target_ulong opcode,
+                                       target_ulong *args);
+
+void hypercall_init(void);
+void register_core_rtas(void);
+
+void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn);
+target_ulong spapr_hypercall(struct kvm_cpu *vcpu, target_ulong opcode,
+                             target_ulong *args);
+
+int spapr_rtas_fdt_setup(struct kvm *kvm, void *fdt);
+
+static inline uint32_t rtas_ld(struct kvm *kvm, target_ulong phys, int n)
+{
+	return *((uint32_t *)guest_flat_to_host(kvm, phys + 4*n));
+}
+
+static inline void rtas_st(struct kvm *kvm, target_ulong phys, int n, uint32_t val)
+{
+	*((uint32_t *)guest_flat_to_host(kvm, phys + 4*n)) = val;
+}
+
+typedef void (*spapr_rtas_fn)(struct kvm_cpu *vcpu, uint32_t token,
+                              uint32_t nargs, target_ulong args,
+                              uint32_t nret, target_ulong rets);
+void spapr_rtas_register(const char *name, spapr_rtas_fn fn);
+target_ulong spapr_rtas_call(struct kvm_cpu *vcpu,
+                             uint32_t token, uint32_t nargs, target_ulong args,
+                             uint32_t nret, target_ulong rets);
+
+#endif /* !defined (__HW_SPAPR_H__) */
diff --git a/tools/kvm/powerpc/spapr_hcall.c b/tools/kvm/powerpc/spapr_hcall.c
new file mode 100644
index 0000000..825efb9
--- /dev/null
+++ b/tools/kvm/powerpc/spapr_hcall.c
@@ -0,0 +1,151 @@
+/*
+ * SPAPR hypercalls
+ *
+ * Borrowed heavily from QEMU's spapr_hcall.c,
+ * Copyright (c) 2010 David Gibson, IBM Corporation.
+ *
+ * Copyright (c) 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "spapr.h"
+#include "kvm/util.h"
+#include "kvm/kvm.h"
+#include "kvm/kvm-cpu.h"
+
+#include <stdio.h>
+#include <assert.h>
+
+
+static target_ulong h_set_dabr(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
+{
+	pr_warning("Implement %s!\n", __PRETTY_FUNCTION__);
+	return H_HARDWARE;
+}
+
+static target_ulong h_register_vpa(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
+{
+	pr_warning("Implement %s!\n", __PRETTY_FUNCTION__);
+	return H_SUCCESS;
+}
+
+static target_ulong h_cede(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
+{
+	pr_warning("Implement %s!\n", __PRETTY_FUNCTION__);
+	return H_SUCCESS;
+}
+
+static target_ulong h_rtas(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
+{
+	target_ulong rtas_r3 = args[0];
+	/* Pointer read from phys mem; I hereby state these can't be MMIO...
+	 * so just referencing memory directly.
+	 */
+	uint32_t token, nargs, nret;
+
+	token = rtas_ld(vcpu->kvm, rtas_r3, 0);
+	nargs = rtas_ld(vcpu->kvm, rtas_r3, 1);
+	nret  = rtas_ld(vcpu->kvm, rtas_r3, 2);
+
+	return spapr_rtas_call(vcpu, token, nargs, rtas_r3 + 12,
+			       nret, rtas_r3 + 12 + 4*nargs);
+}
+
+static target_ulong h_logical_load(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
+{
+	/* SLOF will require these, though kernel doesn't. */
+	die(__PRETTY_FUNCTION__);
+	return H_PARAMETER;
+}
+
+static target_ulong h_logical_store(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
+{
+	/* SLOF will require these, though kernel doesn't. */
+	die(__PRETTY_FUNCTION__);
+	return H_PARAMETER;
+}
+
+static target_ulong h_logical_icbi(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
+{
+	/* Nothing to do on emulation, KVM will trap this in the kernel */
+	return H_SUCCESS;
+}
+
+static target_ulong h_logical_dcbf(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
+{
+	/* Nothing to do on emulation, KVM will trap this in the kernel */
+	return H_SUCCESS;
+}
+
+static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
+static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX - KVMPPC_HCALL_BASE + 1];
+
+void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn)
+{
+	spapr_hcall_fn *slot;
+
+	if (opcode <= MAX_HCALL_OPCODE) {
+		assert((opcode & 0x3) == 0);
+
+		slot = &papr_hypercall_table[opcode / 4];
+	} else {
+		assert((opcode >= KVMPPC_HCALL_BASE) && (opcode <= KVMPPC_HCALL_MAX));
+
+
+		slot = &kvmppc_hypercall_table[opcode - KVMPPC_HCALL_BASE];
+	}
+
+	assert(!(*slot) || (fn == *slot));
+	*slot = fn;
+}
+
+target_ulong spapr_hypercall(struct kvm_cpu *vcpu, target_ulong opcode,
+			     target_ulong *args)
+{
+	if ((opcode <= MAX_HCALL_OPCODE)
+	    && ((opcode & 0x3) == 0)) {
+		spapr_hcall_fn fn = papr_hypercall_table[opcode / 4];
+
+		if (fn) {
+			return fn(vcpu, opcode, args);
+		}
+	} else if ((opcode >= KVMPPC_HCALL_BASE) &&
+		   (opcode <= KVMPPC_HCALL_MAX)) {
+		spapr_hcall_fn fn = kvmppc_hypercall_table[opcode - KVMPPC_HCALL_BASE];
+
+		if (fn) {
+			return fn(vcpu, opcode, args);
+		}
+	}
+
+	hcall_dprintf("Unimplemented hcall 0x%lx\n", opcode);
+	return H_FUNCTION;
+}
+
+void hypercall_init(void)
+{
+	/* hcall-dabr */
+	spapr_register_hypercall(H_SET_DABR, h_set_dabr);
+
+	/* hcall-splpar */
+	spapr_register_hypercall(H_REGISTER_VPA, h_register_vpa);
+	spapr_register_hypercall(H_CEDE, h_cede);
+
+	/* "debugger" hcalls (also used by SLOF). Note: We do -not- differentiate
+	 * here between the "CI" and the "CACHE" variants, they will use whatever
+	 * mapping attributes qemu is using. When using KVM, the kernel will
+	 * enforce the attributes more strongly
+	 */
+	spapr_register_hypercall(H_LOGICAL_CI_LOAD, h_logical_load);
+	spapr_register_hypercall(H_LOGICAL_CI_STORE, h_logical_store);
+	spapr_register_hypercall(H_LOGICAL_CACHE_LOAD, h_logical_load);
+	spapr_register_hypercall(H_LOGICAL_CACHE_STORE, h_logical_store);
+	spapr_register_hypercall(H_LOGICAL_ICBI, h_logical_icbi);
+	spapr_register_hypercall(H_LOGICAL_DCBF, h_logical_dcbf);
+
+	/* KVM-PPC specific hcalls */
+	spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas);
+}
diff --git a/tools/kvm/powerpc/spapr_rtas.c b/tools/kvm/powerpc/spapr_rtas.c
new file mode 100644
index 0000000..4e9c2e3
--- /dev/null
+++ b/tools/kvm/powerpc/spapr_rtas.c
@@ -0,0 +1,226 @@
+/*
+ * SPAPR base RTAS calls
+ *
+ * Borrowed heavily from QEMU's spapr_rtas.c
+ * Copyright (c) 2010-2011 David Gibson, IBM Corporation.
+ *
+ * Modifications copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "kvm/kvm.h"
+#include "kvm/kvm-cpu.h"
+#include "kvm/util.h"
+#include "kvm/term.h"
+
+#include "spapr.h"
+
+#include <stdio.h>
+#include <assert.h>
+#include <libfdt.h>
+
+#define TOKEN_BASE      0x2000
+#define TOKEN_MAX       0x100
+
+#define RTAS_CONSOLE
+
+static void rtas_display_character(struct kvm_cpu *vcpu,
+                                   uint32_t token, uint32_t nargs,
+                                   target_ulong args,
+                                   uint32_t nret, target_ulong rets)
+{
+	char c = rtas_ld(vcpu->kvm, args, 0);
+	term_putc(CONSOLE_HV, &c, 1, 0);
+        rtas_st(vcpu->kvm, rets, 0, 0);
+}
+
+#ifdef RTAS_CONSOLE
+static void rtas_put_term_char(struct kvm_cpu *vcpu,
+			       uint32_t token, uint32_t nargs,
+			       target_ulong args,
+			       uint32_t nret, target_ulong rets)
+{
+	char c = rtas_ld(vcpu->kvm, args, 0);
+	term_putc(CONSOLE_HV, &c, 1, 0);
+        rtas_st(vcpu->kvm, rets, 0, 0);
+}
+
+static void rtas_get_term_char(struct kvm_cpu *vcpu,
+			       uint32_t token, uint32_t nargs,
+			       target_ulong args,
+			       uint32_t nret, target_ulong rets)
+{
+	int c;
+	if (term_readable(CONSOLE_HV, 0) && (c = term_getc(CONSOLE_HV, 0)) >= 0) {
+		rtas_st(vcpu->kvm, rets, 0, 0);
+		rtas_st(vcpu->kvm, rets, 1, c);
+	} else {
+		rtas_st(vcpu->kvm, rets, 0, -2);
+	}
+}
+#endif
+
+static void rtas_get_time_of_day(struct kvm_cpu *vcpu,
+                                 uint32_t token, uint32_t nargs,
+                                 target_ulong args,
+                                 uint32_t nret, target_ulong rets)
+{
+	struct tm tm;
+	time_t tnow;
+
+	if (nret != 8) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	tnow = time(NULL);
+	/* Guest time is currently not offset in any way. */
+	gmtime_r(&tnow, &tm);
+
+	rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+	rtas_st(vcpu->kvm, rets, 1, tm.tm_year + 1900);
+	rtas_st(vcpu->kvm, rets, 2, tm.tm_mon + 1);
+	rtas_st(vcpu->kvm, rets, 3, tm.tm_mday);
+	rtas_st(vcpu->kvm, rets, 4, tm.tm_hour);
+	rtas_st(vcpu->kvm, rets, 5, tm.tm_min);
+	rtas_st(vcpu->kvm, rets, 6, tm.tm_sec);
+	rtas_st(vcpu->kvm, rets, 7, 0);
+}
+
+static void rtas_set_time_of_day(struct kvm_cpu *vcpu,
+                                 uint32_t token, uint32_t nargs,
+                                 target_ulong args,
+                                 uint32_t nret, target_ulong rets)
+{
+	pr_warning("%s called; TOD set ignored.\n", __FUNCTION__);
+}
+
+static void rtas_power_off(struct kvm_cpu *vcpu,
+                           uint32_t token, uint32_t nargs, target_ulong args,
+                           uint32_t nret, target_ulong rets)
+{
+	if (nargs != 2 || nret != 1) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+	kvm_cpu__reboot();
+}
+
+static void rtas_query_cpu_stopped_state(struct kvm_cpu *vcpu,
+                                         uint32_t token, uint32_t nargs,
+                                         target_ulong args,
+                                         uint32_t nret, target_ulong rets)
+{
+	unsigned long id;
+
+	if (nargs != 1 || nret != 2) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	id = rtas_ld(vcpu->kvm, args, 0);
+
+	/* We do start all CPUs.  So just return true. */
+	rtas_st(vcpu->kvm, rets, 0, 0);
+	rtas_st(vcpu->kvm, rets, 1, 2);
+}
+
+static void rtas_start_cpu(struct kvm_cpu *vcpu,
+                           uint32_t token, uint32_t nargs,
+                           target_ulong args,
+                           uint32_t nret, target_ulong rets)
+{
+	die(__FUNCTION__);
+}
+
+static struct rtas_call {
+	const char *name;
+	spapr_rtas_fn fn;
+} rtas_table[TOKEN_MAX];
+
+struct rtas_call *rtas_next = rtas_table;
+
+target_ulong spapr_rtas_call(struct kvm_cpu *vcpu,
+                             uint32_t token, uint32_t nargs, target_ulong args,
+                             uint32_t nret, target_ulong rets)
+{
+	if ((token >= TOKEN_BASE)
+	    && ((token - TOKEN_BASE) < TOKEN_MAX)) {
+		struct rtas_call *call = rtas_table + (token - TOKEN_BASE);
+
+		if (call->fn) {
+			call->fn(vcpu, token, nargs, args, nret, rets);
+			return H_SUCCESS;
+		}
+	}
+
+	/* HACK: Some Linux early debug code uses RTAS display-character,
+	 * but assumes the token value is 0xa (which it is on some real
+	 * machines) without looking it up in the device tree.  This
+	 * special case makes this work */
+	if (token == 0xa) {
+		rtas_display_character(vcpu, 0xa, nargs, args, nret, rets);
+		return H_SUCCESS;
+	}
+
+	hcall_dprintf("Unknown RTAS token 0x%x\n", token);
+	rtas_st(vcpu->kvm, rets, 0, -3);
+	return H_PARAMETER;
+}
+
+void spapr_rtas_register(const char *name, spapr_rtas_fn fn)
+{
+	assert(rtas_next < (rtas_table + TOKEN_MAX));
+
+	rtas_next->name = name;
+	rtas_next->fn = fn;
+
+	rtas_next++;
+}
+
+/* This is called from the context of an open /rtas node, in order to add
+ * properties for the rtas call tokens.
+ */
+int spapr_rtas_fdt_setup(struct kvm *kvm, void *fdt)
+{
+	int ret;
+	int i;
+
+	for (i = 0; i < TOKEN_MAX; i++) {
+		struct rtas_call *call = &rtas_table[i];
+
+		if (!call->fn) {
+			continue;
+		}
+
+		/* HACK borrowed from QEMU */
+		ret = fdt_property_cell(fdt, call->name, i + TOKEN_BASE);
+
+		if (ret < 0) {
+			pr_warning("Couldn't add rtas token for %s: %s\n",
+				   call->name, fdt_strerror(ret));
+			return ret;
+		}
+
+	}
+	return 0;
+}
+
+void register_core_rtas(void)
+{
+	spapr_rtas_register("display-character", rtas_display_character);
+	spapr_rtas_register("get-time-of-day", rtas_get_time_of_day);
+	spapr_rtas_register("set-time-of-day", rtas_set_time_of_day);
+	spapr_rtas_register("power-off", rtas_power_off);
+	spapr_rtas_register("query-cpu-stopped-state",
+			    rtas_query_cpu_stopped_state);
+	spapr_rtas_register("start-cpu", rtas_start_cpu);
+#ifdef RTAS_CONSOLE
+	/* These are unused: We do console I/O via hcalls, not rtas. */
+	spapr_rtas_register("put-term-char", rtas_put_term_char);
+	spapr_rtas_register("get-term-char", rtas_get_term_char);
+#endif
+}

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/8] kvm tools: Add SPAPR PPC64 HV console
       [not found] <cover.1323143103.git.matt@ozlabs.org>
                   ` (2 preceding siblings ...)
  2011-12-06  4:06 ` [PATCH 3/8] kvm tools: Add SPAPR PPC64 hcall & rtascall structure Matt Evans
@ 2011-12-06  4:06 ` Matt Evans
  2011-12-06  4:06 ` [PATCH 5/8] kvm tools: Add PPC64 XICS interrupt controller support Matt Evans
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 14+ messages in thread
From: Matt Evans @ 2011-12-06  4:06 UTC (permalink / raw)
  To: kvm, kvm-ppc

This adds the console code, plus VIO HV terminal nodes are added to
the device tree so the guest kernel will pick it up.

Signed-off-by: Matt Evans <matt@ozlabs.org>
---
 tools/kvm/Makefile               |    1 +
 tools/kvm/powerpc/kvm.c          |   31 ++++++++++++
 tools/kvm/powerpc/spapr_hvcons.c |  101 ++++++++++++++++++++++++++++++++++++++
 tools/kvm/powerpc/spapr_hvcons.h |   19 +++++++
 4 files changed, 152 insertions(+), 0 deletions(-)
 create mode 100644 tools/kvm/powerpc/spapr_hvcons.c
 create mode 100644 tools/kvm/powerpc/spapr_hvcons.h

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 0f24104..76cce3a 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -130,6 +130,7 @@ ifeq ($(uname_M), ppc64)
 	OBJS	+= powerpc/kvm-cpu.o
 	OBJS	+= powerpc/spapr_hcall.o
 	OBJS	+= powerpc/spapr_rtas.o
+	OBJS	+= powerpc/spapr_hvcons.o
 	ARCH_INCLUDE := powerpc/include
 	CFLAGS 	+= -m64
 	LIBS 	+= -lfdt
diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
index 2f0a921..8614538 100644
--- a/tools/kvm/powerpc/kvm.c
+++ b/tools/kvm/powerpc/kvm.c
@@ -15,6 +15,7 @@
 #include "kvm/util.h"
 
 #include "spapr.h"
+#include "spapr_hvcons.h"
 
 #include <linux/kvm.h>
 
@@ -159,6 +160,8 @@ void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char *hugetlbfs_
 	/* Do these before FDT setup, IRQ setup, etc. */
 	hypercall_init();
 	register_core_rtas();
+	/* Now that hypercalls are initialised, register a couple for the console: */
+	spapr_hvcons_init();
 }
 
 void kvm__irq_line(struct kvm *kvm, int irq, int level)
@@ -172,6 +175,11 @@ void kvm__irq_trigger(struct kvm *kvm, int irq)
 	kvm__irq_line(kvm, irq, 0);
 }
 
+void kvm__arch_periodic_poll(struct kvm *kvm)
+{
+	spapr_hvcons_poll(kvm);
+}
+
 int load_flat_binary(struct kvm *kvm, int fd_kernel, int fd_initrd, const char *kernel_cmdline)
 {
 	void *p;
@@ -297,6 +305,13 @@ static void setup_fdt(struct kvm *kvm)
 				   &ird_end_prop, sizeof(ird_end_prop)));
 	}
 
+	/* stdout-path: This is assuming we're using the HV console.  Also, the
+	 * address is hardwired until we do a VIO bus.
+	 */
+	_FDT(fdt_property_string(fdt, "linux,stdout-path",
+				 "/vdevice/vty@30000000"));
+	_FDT(fdt_end_node(fdt));
+
 	/* Memory: We don't alloc. a separate RMA yet.  If we ever need to
 	 * (CAP_PPC_RMA == 2) then have one memory node for 0->RMAsize, and
 	 * another RMAsize->endOfMem.
@@ -369,6 +384,22 @@ static void setup_fdt(struct kvm *kvm)
 	}
 	_FDT(fdt_end_node(fdt));
 
+	/* VIO: See comment in linux,stdout-path; we don't yet represent a VIO
+	 * bus/address allocation so addresses are hardwired here.
+	 */
+	_FDT(fdt_begin_node(fdt, "vdevice"));
+	_FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
+	_FDT(fdt_property_cell(fdt, "#size-cells", 0x0));
+	_FDT(fdt_property_string(fdt, "device_type", "vdevice"));
+	_FDT(fdt_property_string(fdt, "compatible", "IBM,vdevice"));
+	_FDT(fdt_begin_node(fdt, "vty@30000000"));
+	_FDT(fdt_property_string(fdt, "name", "vty"));
+	_FDT(fdt_property_string(fdt, "device_type", "serial"));
+	_FDT(fdt_property_string(fdt, "compatible", "hvterm1"));
+	_FDT(fdt_property_cell(fdt, "reg", 0x30000000));
+	_FDT(fdt_end_node(fdt));
+	_FDT(fdt_end_node(fdt));
+
 	/* Finalise: */
 	_FDT(fdt_end_node(fdt)); /* Root node */
 	_FDT(fdt_finish(fdt));
diff --git a/tools/kvm/powerpc/spapr_hvcons.c b/tools/kvm/powerpc/spapr_hvcons.c
new file mode 100644
index 0000000..97902ac
--- /dev/null
+++ b/tools/kvm/powerpc/spapr_hvcons.c
@@ -0,0 +1,101 @@
+/*
+ * SPAPR HV console
+ *
+ * Borrowed lightly from QEMU's spapr_vty.c, Copyright (c) 2010 David Gibson,
+ * IBM Corporation.
+ *
+ * Copyright (c) 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "kvm/term.h"
+#include "kvm/kvm.h"
+#include "kvm/kvm-cpu.h"
+#include "kvm/util.h"
+#include "spapr.h"
+#include "spapr_hvcons.h"
+
+#include <stdio.h>
+#include <sys/uio.h>
+#include <errno.h>
+
+#include <linux/byteorder.h>
+
+union hv_chario {
+	struct {
+		uint64_t char0_7;
+		uint64_t char8_15;
+	} a;
+	uint8_t buf[16];
+};
+
+static unsigned long h_put_term_char(struct kvm_cpu *vcpu, unsigned long opcode, unsigned long *args)
+{
+	/* To do: Read register from args[0], and check it. */
+	unsigned long len = args[1];
+	union hv_chario data;
+	struct iovec iov;
+
+	if (len > 16) {
+		return H_PARAMETER;
+	}
+	data.a.char0_7 = cpu_to_be64(args[2]);
+	data.a.char8_15 = cpu_to_be64(args[3]);
+
+	iov.iov_base = data.buf;
+	iov.iov_len = len;
+	do {
+		int ret;
+
+		ret = term_putc_iov(CONSOLE_HV, &iov, 1, 0);
+		if (ret < 0) {
+			die("term_putc_iov error %d!\n", errno);
+		}
+		iov.iov_base += ret;
+		iov.iov_len -= ret;
+	} while (iov.iov_len > 0);
+
+	return H_SUCCESS;
+}
+
+
+static unsigned long h_get_term_char(struct kvm_cpu *vcpu, unsigned long opcode, unsigned long *args)
+{
+	/* To do: Read register from args[0], and check it. */
+	unsigned long *len = args + 0;
+	unsigned long *char0_7 = args + 1;
+	unsigned long *char8_15 = args + 2;
+	union hv_chario data;
+	struct iovec iov;
+
+	if (term_readable(CONSOLE_HV, 0)) {
+		iov.iov_base = data.buf;
+		iov.iov_len = 16;
+
+		*len = term_getc_iov(CONSOLE_HV, &iov, 1, 0);
+		*char0_7 = be64_to_cpu(data.a.char0_7);
+		*char8_15 = be64_to_cpu(data.a.char8_15);
+	} else {
+		*len = 0;
+	}
+
+	return H_SUCCESS;
+}
+
+void spapr_hvcons_poll(struct kvm *kvm)
+{
+	if (term_readable(CONSOLE_HV, 0)) {
+		/* We can inject an IRQ to guest here if we want.
+		 * The guest will happily poll, though, so not required.
+		 */
+	}
+}
+
+void spapr_hvcons_init(void)
+{
+	spapr_register_hypercall(H_PUT_TERM_CHAR, h_put_term_char);
+	spapr_register_hypercall(H_GET_TERM_CHAR, h_get_term_char);
+}
diff --git a/tools/kvm/powerpc/spapr_hvcons.h b/tools/kvm/powerpc/spapr_hvcons.h
new file mode 100644
index 0000000..d3e4414
--- /dev/null
+++ b/tools/kvm/powerpc/spapr_hvcons.h
@@ -0,0 +1,19 @@
+/*
+ * SPAPR HV console
+ *
+ * Copyright (c) 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef spapr_hvcons_H
+#define spapr_hvcons_H
+
+#include "kvm/kvm.h"
+
+void spapr_hvcons_init(void);
+void spapr_hvcons_poll(struct kvm *kvm);
+
+#endif

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 5/8] kvm tools: Add PPC64 XICS interrupt controller support
       [not found] <cover.1323143103.git.matt@ozlabs.org>
                   ` (3 preceding siblings ...)
  2011-12-06  4:06 ` [PATCH 4/8] kvm tools: Add SPAPR PPC64 HV console Matt Evans
@ 2011-12-06  4:06 ` Matt Evans
  2011-12-06  4:06 ` [PATCH 6/8] kvm tools: Add PPC64 PCI Host Bridge Matt Evans
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 14+ messages in thread
From: Matt Evans @ 2011-12-06  4:06 UTC (permalink / raw)
  To: kvm, kvm-ppc

This patch adds XICS emulation code (heavily borrowed from QEMU), and wires
this into kvm_cpu__irq() to fire a CPU IRQ via KVM.  A device tree entry is
also added.  IPIs work, xics_alloc_irqnum() is added to allocate an external
IRQ (which will later be used by the PHB PCI code) and finally, kvm__irq_line()
can be called to raise an IRQ on XICS.

Signed-off-by: Matt Evans <matt@ozlabs.org>
---
 tools/kvm/Makefile                           |    1 +
 tools/kvm/powerpc/include/kvm/kvm-arch.h     |    1 +
 tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h |    2 +
 tools/kvm/powerpc/irq.c                      |   11 +-
 tools/kvm/powerpc/kvm-cpu.c                  |   10 +
 tools/kvm/powerpc/kvm.c                      |   25 +-
 tools/kvm/powerpc/xics.c                     |  529 ++++++++++++++++++++++++++
 tools/kvm/powerpc/xics.h                     |   23 ++
 8 files changed, 596 insertions(+), 6 deletions(-)
 create mode 100644 tools/kvm/powerpc/xics.c
 create mode 100644 tools/kvm/powerpc/xics.h

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 76cce3a..6ffffc8 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -131,6 +131,7 @@ ifeq ($(uname_M), ppc64)
 	OBJS	+= powerpc/spapr_hcall.o
 	OBJS	+= powerpc/spapr_rtas.o
 	OBJS	+= powerpc/spapr_hvcons.o
+	OBJS	+= powerpc/xics.o
 	ARCH_INCLUDE := powerpc/include
 	CFLAGS 	+= -m64
 	LIBS 	+= -lfdt
diff --git a/tools/kvm/powerpc/include/kvm/kvm-arch.h b/tools/kvm/powerpc/include/kvm/kvm-arch.h
index 722d01c..ae811e9 100644
--- a/tools/kvm/powerpc/include/kvm/kvm-arch.h
+++ b/tools/kvm/powerpc/include/kvm/kvm-arch.h
@@ -65,6 +65,7 @@ struct kvm {
 	unsigned long		initrd_gra;
 	unsigned long		initrd_size;
 	const char		*name;
+	struct icp_state	*icp;
 };
 
 #endif /* KVM__KVM_ARCH_H */
diff --git a/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h b/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
index dbabc57..551307e 100644
--- a/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
+++ b/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
@@ -17,6 +17,8 @@
 
 #include <pthread.h>
 
+#define POWER7_EXT_IRQ	0
+
 struct kvm;
 
 struct kvm_cpu {
diff --git a/tools/kvm/powerpc/irq.c b/tools/kvm/powerpc/irq.c
index 46aa64f..80c972a 100644
--- a/tools/kvm/powerpc/irq.c
+++ b/tools/kvm/powerpc/irq.c
@@ -21,6 +21,10 @@
 #include <stddef.h>
 #include <stdlib.h>
 
+#include "xics.h"
+
+#define XICS_IRQS               1024
+
 int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)
 {
 	fprintf(stderr, "irq__register_device(%d, [%d], [%d], [%d]\n",
@@ -30,7 +34,12 @@ int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)
 
 void irq__init(struct kvm *kvm)
 {
-	fprintf(stderr, __func__);
+	/* kvm->nr_cpus is now valid; for /now/, pass
+	 * this to xics_system_init(), which assumes servers
+	 * are numbered 0..nrcpus.  This may not really be true,
+	 * but it is OK currently.
+	 */
+	kvm->icp = xics_system_init(XICS_IRQS, kvm->nrcpus);
 }
 
 int irq__add_msix_route(struct kvm *kvm, struct msi_msg *msg)
diff --git a/tools/kvm/powerpc/kvm-cpu.c b/tools/kvm/powerpc/kvm-cpu.c
index 71c648e..63cd106 100644
--- a/tools/kvm/powerpc/kvm-cpu.c
+++ b/tools/kvm/powerpc/kvm-cpu.c
@@ -15,6 +15,7 @@
 #include "kvm/kvm.h"
 
 #include "spapr.h"
+#include "xics.h"
 
 #include <sys/ioctl.h>
 #include <sys/mman.h>
@@ -107,6 +108,9 @@ struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id)
 	 */
 	vcpu->is_running = true;
 
+	/* Register with IRQ controller */
+	xics_cpu_register(vcpu);
+
 	return vcpu;
 }
 
@@ -151,6 +155,12 @@ void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
 /* kvm_cpu__irq - set KVM's IRQ flag on this vcpu */
 void kvm_cpu__irq(struct kvm_cpu *vcpu, int pin, int level)
 {
+	unsigned int virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
+
+	if (pin != POWER7_EXT_IRQ)
+		return;
+	if (ioctl(vcpu->vcpu_fd, KVM_INTERRUPT, &virq) < 0)
+		pr_warning("Could not KVM_INTERRUPT.");
 }
 
 bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu)
diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
index 8614538..bfd7c3a 100644
--- a/tools/kvm/powerpc/kvm.c
+++ b/tools/kvm/powerpc/kvm.c
@@ -41,9 +41,13 @@
 
 #define HUGETLBFS_PATH "/var/lib/hugetlbfs/global/pagesize-16MB/"
 
+#define PHANDLE_XICP		0x00001111
+
 static char kern_cmdline[2048];
 
 struct kvm_ext kvm_req_ext[] = {
+	{ DEFINE_KVM_EXT(KVM_CAP_PPC_UNSET_IRQ) },
+	{ DEFINE_KVM_EXT(KVM_CAP_PPC_IRQ_LEVEL) },
 	{ 0, 0 }
 };
 
@@ -164,11 +168,6 @@ void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char *hugetlbfs_
 	spapr_hvcons_init();
 }
 
-void kvm__irq_line(struct kvm *kvm, int irq, int level)
-{
-	fprintf(stderr, "irq_line(%d, %d)\n", irq, level);
-}
-
 void kvm__irq_trigger(struct kvm *kvm, int irq)
 {
 	kvm__irq_line(kvm, irq, 1);
@@ -384,6 +383,22 @@ static void setup_fdt(struct kvm *kvm)
 	}
 	_FDT(fdt_end_node(fdt));
 
+	/* IRQ controller */
+	_FDT(fdt_begin_node(fdt, "interrupt-controller@0"));
+
+	_FDT(fdt_property_string(fdt, "device_type",
+				 "PowerPC-External-Interrupt-Presentation"));
+	_FDT(fdt_property_string(fdt, "compatible", "IBM,ppc-xicp"));
+	_FDT(fdt_property_cell(fdt, "reg", 0));
+	_FDT(fdt_property(fdt, "interrupt-controller", NULL, 0));
+	_FDT(fdt_property(fdt, "ibm,interrupt-server-ranges",
+			   interrupt_server_ranges_prop,
+			   sizeof(interrupt_server_ranges_prop)));
+	_FDT(fdt_property_cell(fdt, "#interrupt-cells", 2));
+	_FDT(fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP));
+	_FDT(fdt_property_cell(fdt, "phandle", PHANDLE_XICP));
+	_FDT(fdt_end_node(fdt));
+
 	/* VIO: See comment in linux,stdout-path; we don't yet represent a VIO
 	 * bus/address allocation so addresses are hardwired here.
 	 */
diff --git a/tools/kvm/powerpc/xics.c b/tools/kvm/powerpc/xics.c
new file mode 100644
index 0000000..4154ca8
--- /dev/null
+++ b/tools/kvm/powerpc/xics.c
@@ -0,0 +1,529 @@
+/*
+ * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics
+ *
+ * Borrowed heavily from QEMU's xics.c,
+ * Copyright (c) 2010,2011 David Gibson, IBM Corporation.
+ *
+ * Modifications copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "spapr.h"
+#include "xics.h"
+#include "kvm/util.h"
+
+#include <stdio.h>
+#include <malloc.h>
+
+
+//#define DEBUG_XICS yes
+#ifdef DEBUG_XICS
+#define xics_dprintf(fmt, ...)					\
+	do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define xics_dprintf(fmt, ...)			\
+	do { } while (0)
+#endif
+
+/*
+ * ICP: Presentation layer
+ */
+
+struct icp_server_state {
+	uint32_t xirr;
+	uint8_t pending_priority;
+	uint8_t mfrr;
+	struct kvm_cpu *cpu;
+};
+
+#define XICS_IRQ_OFFSET 16
+#define XISR_MASK	0x00ffffff
+#define CPPR_MASK	0xff000000
+
+#define XISR(ss)   (((ss)->xirr) & XISR_MASK)
+#define CPPR(ss)   (((ss)->xirr) >> 24)
+
+struct ics_state;
+
+struct icp_state {
+	unsigned long nr_servers;
+	struct icp_server_state *ss;
+	struct ics_state *ics;
+};
+
+static void ics_reject(struct ics_state *ics, int nr);
+static void ics_resend(struct ics_state *ics);
+static void ics_eoi(struct ics_state *ics, int nr);
+
+static inline void cpu_irq_raise(struct kvm_cpu *vcpu)
+{
+	xics_dprintf("INT1[%p]\n", vcpu);
+	kvm_cpu__irq(vcpu, POWER7_EXT_IRQ, 1);
+}
+
+static inline void cpu_irq_lower(struct kvm_cpu *vcpu)
+{
+	xics_dprintf("INT0[%p]\n", vcpu);
+	kvm_cpu__irq(vcpu, POWER7_EXT_IRQ, 0);
+}
+
+static void icp_check_ipi(struct icp_state *icp, int server)
+{
+	struct icp_server_state *ss = icp->ss + server;
+
+	if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
+		return;
+	}
+
+	if (XISR(ss)) {
+		ics_reject(icp->ics, XISR(ss));
+	}
+
+	ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
+	ss->pending_priority = ss->mfrr;
+	cpu_irq_raise(ss->cpu);
+}
+
+static void icp_resend(struct icp_state *icp, int server)
+{
+	struct icp_server_state *ss = icp->ss + server;
+
+	if (ss->mfrr < CPPR(ss)) {
+		icp_check_ipi(icp, server);
+	}
+	ics_resend(icp->ics);
+}
+
+static void icp_set_cppr(struct icp_state *icp, int server, uint8_t cppr)
+{
+	struct icp_server_state *ss = icp->ss + server;
+	uint8_t old_cppr;
+	uint32_t old_xisr;
+
+	old_cppr = CPPR(ss);
+	ss->xirr = (ss->xirr & ~CPPR_MASK) | (cppr << 24);
+
+	if (cppr < old_cppr) {
+		if (XISR(ss) && (cppr <= ss->pending_priority)) {
+			old_xisr = XISR(ss);
+			ss->xirr &= ~XISR_MASK; /* Clear XISR */
+			cpu_irq_lower(ss->cpu);
+			ics_reject(icp->ics, old_xisr);
+		}
+	} else {
+		if (!XISR(ss)) {
+			icp_resend(icp, server);
+		}
+	}
+}
+
+static void icp_set_mfrr(struct icp_state *icp, int nr, uint8_t mfrr)
+{
+	struct icp_server_state *ss = icp->ss + nr;
+
+	ss->mfrr = mfrr;
+	if (mfrr < CPPR(ss)) {
+		icp_check_ipi(icp, nr);
+	}
+}
+
+static uint32_t icp_accept(struct icp_server_state *ss)
+{
+	uint32_t xirr;
+
+	cpu_irq_lower(ss->cpu);
+	xirr = ss->xirr;
+	ss->xirr = ss->pending_priority << 24;
+	return xirr;
+}
+
+static void icp_eoi(struct icp_state *icp, int server, uint32_t xirr)
+{
+	struct icp_server_state *ss = icp->ss + server;
+
+	ics_eoi(icp->ics, xirr & XISR_MASK);
+	/* Send EOI -> ICS */
+	ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
+	if (!XISR(ss)) {
+		icp_resend(icp, server);
+	}
+}
+
+static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t priority)
+{
+	struct icp_server_state *ss = icp->ss + server;
+	xics_dprintf("icp_irq(nr %d, server %d, prio 0x%x)\n", nr, server, priority);
+	if ((priority >= CPPR(ss))
+	    || (XISR(ss) && (ss->pending_priority <= priority))) {
+		xics_dprintf("reject %d, CPPR 0x%x, XISR 0x%x, pprio 0x%x, prio 0x%x\n",
+			     nr, CPPR(ss), XISR(ss), ss->pending_priority, priority);
+		ics_reject(icp->ics, nr);
+	} else {
+		if (XISR(ss)) {
+			xics_dprintf("reject %d, CPPR 0x%x, XISR 0x%x, pprio 0x%x, prio 0x%x\n",
+				     nr, CPPR(ss), XISR(ss), ss->pending_priority, priority);
+			ics_reject(icp->ics, XISR(ss));
+		}
+		ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
+		ss->pending_priority = priority;
+		cpu_irq_raise(ss->cpu);
+	}
+}
+
+/*
+ * ICS: Source layer
+ */
+
+struct ics_irq_state {
+	int server;
+	uint8_t priority;
+	uint8_t saved_priority;
+	/* int pending:1; */
+	/* int presented:1; */
+	int rejected:1;
+	int masked_pending:1;
+};
+
+struct ics_state {
+	unsigned int nr_irqs;
+	unsigned int offset;
+	struct ics_irq_state *irqs;
+	struct icp_state *icp;
+};
+
+static int ics_valid_irq(struct ics_state *ics, uint32_t nr)
+{
+	return (nr >= ics->offset)
+		&& (nr < (ics->offset + ics->nr_irqs));
+}
+
+static void ics_set_irq_msi(struct ics_state *ics, int srcno, int val)
+{
+	struct ics_irq_state *irq = ics->irqs + srcno;
+
+	if (val) {
+		if (irq->priority == 0xff) {
+			xics_dprintf(" irq pri ff, masked pending\n");
+			irq->masked_pending = 1;
+			/* masked pending */ ;
+		} else	{
+			icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
+		}
+	}
+}
+
+static void ics_reject_msi(struct ics_state *ics, int nr)
+{
+	struct ics_irq_state *irq = ics->irqs + nr - ics->offset;
+
+	irq->rejected = 1;
+}
+
+static void ics_resend_msi(struct ics_state *ics)
+{
+	unsigned int i;
+
+	for (i = 0; i < ics->nr_irqs; i++) {
+		struct ics_irq_state *irq = ics->irqs + i;
+
+		/* FIXME: filter by server#? */
+		if (irq->rejected) {
+			irq->rejected = 0;
+			if (irq->priority != 0xff) {
+				icp_irq(ics->icp, irq->server, i + ics->offset, irq->priority);
+			}
+		}
+	}
+}
+
+static void ics_write_xive_msi(struct ics_state *ics, int nr, int server,
+			       uint8_t priority)
+{
+	struct ics_irq_state *irq = ics->irqs + nr - ics->offset;
+
+	irq->server = server;
+	irq->priority = priority;
+	xics_dprintf("ics_write_xive_msi(nr %d, server %d, pri 0x%x)\n", nr, server, priority);
+
+	if (!irq->masked_pending || (priority == 0xff)) {
+		return;
+	}
+
+	irq->masked_pending = 0;
+	icp_irq(ics->icp, server, nr, priority);
+}
+
+static void ics_reject(struct ics_state *ics, int nr)
+{
+	ics_reject_msi(ics, nr);
+}
+
+static void ics_resend(struct ics_state *ics)
+{
+	ics_resend_msi(ics);
+}
+
+static void ics_eoi(struct ics_state *ics, int nr)
+{
+}
+
+/*
+ * Exported functions
+ */
+
+static int allocated_irqnum = XICS_IRQ_OFFSET;
+
+/* This is hacky.  The problem boils down to the PCI device code which just
+ * calls kvm__irq_line( .. pcidev->pci_hdr.irq_line ..) at will.  Each PCI
+ * device's IRQ line is allocated by irq__register_device() (which allocates an
+ * IRQ AND allocates a.. PCI device num..).
+ *
+ * In future I'd like to at least mimic some kind of 'upstream IRQ controller'
+ * whereby PCI devices let their PHB know when they want to IRQ, and that
+ * percolates up.
+ *
+ * For now, allocate a REAL xics irq number and (via irq__register_device) push
+ * that into the config space.	8 bits only though!
+ */
+int	xics_alloc_irqnum(void)
+{
+	int irq = allocated_irqnum++;
+
+	if (irq > 255)
+		die("Huge numbers of IRQs aren't supported with the daft kvmtool IRQ system.");
+
+	return irq;
+}
+
+static target_ulong h_cppr(struct kvm_cpu *vcpu,
+			   target_ulong opcode, target_ulong *args)
+{
+	target_ulong cppr = args[0];
+
+	xics_dprintf("h_cppr(%lx)\n", cppr);
+	icp_set_cppr(vcpu->kvm->icp, vcpu->cpu_id, cppr);
+	return H_SUCCESS;
+}
+
+static target_ulong h_ipi(struct kvm_cpu *vcpu,
+			  target_ulong opcode, target_ulong *args)
+{
+	target_ulong server = args[0];
+	target_ulong mfrr = args[1];
+
+	xics_dprintf("h_ipi(%lx, %lx)\n", server, mfrr);
+	if (server >= vcpu->kvm->icp->nr_servers) {
+		return H_PARAMETER;
+	}
+
+	icp_set_mfrr(vcpu->kvm->icp, server, mfrr);
+	return H_SUCCESS;
+}
+
+static target_ulong h_xirr(struct kvm_cpu *vcpu,
+			   target_ulong opcode, target_ulong *args)
+{
+	uint32_t xirr = icp_accept(vcpu->kvm->icp->ss + vcpu->cpu_id);
+
+	xics_dprintf("h_xirr() = %x\n", xirr);
+	args[0] = xirr;
+	return H_SUCCESS;
+}
+
+static target_ulong h_eoi(struct kvm_cpu *vcpu,
+			  target_ulong opcode, target_ulong *args)
+{
+	target_ulong xirr = args[0];
+
+	xics_dprintf("h_eoi(%lx)\n", xirr);
+	icp_eoi(vcpu->kvm->icp, vcpu->cpu_id, xirr);
+	return H_SUCCESS;
+}
+
+static void rtas_set_xive(struct kvm_cpu *vcpu, uint32_t token,
+			  uint32_t nargs, target_ulong args,
+			  uint32_t nret, target_ulong rets)
+{
+	struct ics_state *ics = vcpu->kvm->icp->ics;
+	uint32_t nr, server, priority;
+
+	if ((nargs != 3) || (nret != 1)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	nr = rtas_ld(vcpu->kvm, args, 0);
+	server = rtas_ld(vcpu->kvm, args, 1);
+	priority = rtas_ld(vcpu->kvm, args, 2);
+
+	xics_dprintf("rtas_set_xive(%x,%x,%x)\n", nr, server, priority);
+	if (!ics_valid_irq(ics, nr) || (server >= ics->icp->nr_servers)
+	    || (priority > 0xff)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	ics_write_xive_msi(ics, nr, server, priority);
+
+	rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+}
+
+static void rtas_get_xive(struct kvm_cpu *vcpu, uint32_t token,
+			  uint32_t nargs, target_ulong args,
+			  uint32_t nret, target_ulong rets)
+{
+	struct ics_state *ics = vcpu->kvm->icp->ics;
+	uint32_t nr;
+
+	if ((nargs != 1) || (nret != 3)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	nr = rtas_ld(vcpu->kvm, args, 0);
+
+	if (!ics_valid_irq(ics, nr)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+	rtas_st(vcpu->kvm, rets, 1, ics->irqs[nr - ics->offset].server);
+	rtas_st(vcpu->kvm, rets, 2, ics->irqs[nr - ics->offset].priority);
+}
+
+static void rtas_int_off(struct kvm_cpu *vcpu, uint32_t token,
+			 uint32_t nargs, target_ulong args,
+			 uint32_t nret, target_ulong rets)
+{
+	struct ics_state *ics = vcpu->kvm->icp->ics;
+	uint32_t nr;
+
+	if ((nargs != 1) || (nret != 1)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	nr = rtas_ld(vcpu->kvm, args, 0);
+
+	if (!ics_valid_irq(ics, nr)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	/* This is a NOP for now, since the described PAPR semantics don't
+	 * seem to gel with what Linux does (ME: borrowed from QEMU)
+	 */
+#if 0
+	struct ics_irq_state *irq = xics->irqs + (nr - xics->offset);
+
+	irq->saved_priority = irq->priority;
+	ics_write_xive_msi(xics, nr, irq->server, 0xff);
+#endif
+
+	rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+}
+
+static void rtas_int_on(struct kvm_cpu *vcpu, uint32_t token,
+			uint32_t nargs, target_ulong args,
+			uint32_t nret, target_ulong rets)
+{
+	struct ics_state *ics = vcpu->kvm->icp->ics;
+	uint32_t nr;
+
+	if ((nargs != 1) || (nret != 1)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	nr = rtas_ld(vcpu->kvm, args, 0);
+
+	if (!ics_valid_irq(ics, nr)) {
+		rtas_st(vcpu->kvm, rets, 0, -3);
+		return;
+	}
+
+	/* This is a NOP for now, since the described PAPR semantics don't
+	 * seem to gel with what Linux does (ME: borrowed from QEMU)
+	 */
+#if 0
+	struct ics_irq_state *irq = xics->irqs + (nr - xics->offset);
+
+	ics_write_xive_msi(xics, nr, irq->server, irq->saved_priority);
+#endif
+
+	rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+}
+
+void xics_cpu_register(struct kvm_cpu *vcpu)
+{
+	if (vcpu->cpu_id < vcpu->kvm->icp->nr_servers)
+		vcpu->kvm->icp->ss[vcpu->cpu_id].cpu = vcpu;
+	else
+		die("Setting invalid server for cpuid %ld\n", vcpu->cpu_id);
+}
+
+struct icp_state *xics_system_init(unsigned int nr_irqs, unsigned int nr_cpus)
+{
+	int max_server_num;
+	unsigned int i;
+	struct icp_state *icp;
+	struct ics_state *ics;
+
+	max_server_num = nr_cpus;
+
+	icp = malloc(sizeof(*icp));
+	icp->nr_servers = max_server_num + 1;
+	icp->ss = malloc(icp->nr_servers*sizeof(struct icp_server_state));
+
+	for (i = 0; i < icp->nr_servers; i++) {
+		icp->ss[i].xirr = 0;
+		icp->ss[i].pending_priority = 0;
+		icp->ss[i].cpu = 0;
+		icp->ss[i].mfrr = 0xff;
+	}
+
+	/* icp->ss[env->cpu_index].cpu is set by CPUs calling in to
+	 * xics_cpu_register().
+	 */
+
+	ics = malloc(sizeof(*ics));
+	ics->nr_irqs = nr_irqs;
+	ics->offset = XICS_IRQ_OFFSET;
+	ics->irqs = malloc(nr_irqs * sizeof(struct ics_irq_state));
+
+	icp->ics = ics;
+	ics->icp = icp;
+
+	for (i = 0; i < nr_irqs; i++) {
+		ics->irqs[i].server = 0;
+		ics->irqs[i].priority = 0xff;
+		ics->irqs[i].saved_priority = 0xff;
+		ics->irqs[i].rejected = 0;
+		ics->irqs[i].masked_pending = 0;
+	}
+
+	spapr_register_hypercall(H_CPPR, h_cppr);
+	spapr_register_hypercall(H_IPI, h_ipi);
+	spapr_register_hypercall(H_XIRR, h_xirr);
+	spapr_register_hypercall(H_EOI, h_eoi);
+
+	spapr_rtas_register("ibm,set-xive", rtas_set_xive);
+	spapr_rtas_register("ibm,get-xive", rtas_get_xive);
+	spapr_rtas_register("ibm,int-off", rtas_int_off);
+	spapr_rtas_register("ibm,int-on", rtas_int_on);
+
+	return icp;
+}
+
+void kvm__irq_line(struct kvm *kvm, int irq, int level)
+{
+	/* Route me to ICS, which routes to ICP, which eventually
+	 * does a kvm_cpu__irq(vcpu, POWER7_EXT_IRQ, 1)
+	 */
+	xics_dprintf("Raising IRQ %d -> %d\n", irq, level);
+	ics_set_irq_msi(kvm->icp->ics, irq - kvm->icp->ics->offset, level);
+}
diff --git a/tools/kvm/powerpc/xics.h b/tools/kvm/powerpc/xics.h
new file mode 100644
index 0000000..144915b
--- /dev/null
+++ b/tools/kvm/powerpc/xics.h
@@ -0,0 +1,23 @@
+/*
+ * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef XICS_H
+#define XICS_H
+
+#define XICS_IPI        0x2
+
+struct kvm_cpu;
+struct icp_state;
+
+struct icp_state *xics_system_init(unsigned int nr_irqs, unsigned int nr_cpus);
+void xics_cpu_register(struct kvm_cpu *vcpu);
+int xics_alloc_irqnum(void);
+
+#endif

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 6/8] kvm tools: Add PPC64 PCI Host Bridge
       [not found] <cover.1323143103.git.matt@ozlabs.org>
                   ` (4 preceding siblings ...)
  2011-12-06  4:06 ` [PATCH 5/8] kvm tools: Add PPC64 XICS interrupt controller support Matt Evans
@ 2011-12-06  4:06 ` Matt Evans
  2011-12-06  4:06 ` [PATCH 7/8] kvm tools: Add PPC64 kvm_cpu__emulate_io() Matt Evans
  2011-12-06  4:06 ` [PATCH 8/8] kvm tools: Make virtio-pci's ioeventfd__add_event() fall back gracefully if ioeventfds unavailable Matt Evans
  7 siblings, 0 replies; 14+ messages in thread
From: Matt Evans @ 2011-12-06  4:06 UTC (permalink / raw)
  To: kvm, kvm-ppc

This provides the PCI bridge, definitions for the address layout of the windows
and wires in IRQs.  Once PCI devices are all registered, they are enumerated and
DT nodes generated for each.

Signed-off-by: Matt Evans <matt@ozlabs.org>
---
 tools/kvm/powerpc/include/kvm/kvm-arch.h |    3 +
 tools/kvm/powerpc/irq.c                  |   17 +-
 tools/kvm/powerpc/kvm.c                  |   11 +
 tools/kvm/powerpc/spapr.h                |    8 +
 tools/kvm/powerpc/spapr_pci.c            |  429 ++++++++++++++++++++++++++++++
 tools/kvm/powerpc/spapr_pci.h            |   38 +++
 6 files changed, 504 insertions(+), 2 deletions(-)
 create mode 100644 tools/kvm/powerpc/spapr_pci.c
 create mode 100644 tools/kvm/powerpc/spapr_pci.h

diff --git a/tools/kvm/powerpc/include/kvm/kvm-arch.h b/tools/kvm/powerpc/include/kvm/kvm-arch.h
index ae811e9..ba374f5 100644
--- a/tools/kvm/powerpc/include/kvm/kvm-arch.h
+++ b/tools/kvm/powerpc/include/kvm/kvm-arch.h
@@ -40,6 +40,8 @@
  */
 #define KVM_PCI_MMIO_AREA		0x1000000
 
+struct spapr_phb;
+
 struct kvm {
 	int			sys_fd;		/* For system ioctls(), i.e. /dev/kvm */
 	int			vm_fd;		/* For VM ioctls() */
@@ -66,6 +68,7 @@ struct kvm {
 	unsigned long		initrd_size;
 	const char		*name;
 	struct icp_state	*icp;
+	struct spapr_phb	*phb;
 };
 
 #endif /* KVM__KVM_ARCH_H */
diff --git a/tools/kvm/powerpc/irq.c b/tools/kvm/powerpc/irq.c
index 80c972a..134db8f 100644
--- a/tools/kvm/powerpc/irq.c
+++ b/tools/kvm/powerpc/irq.c
@@ -21,14 +21,27 @@
 #include <stddef.h>
 #include <stdlib.h>
 
+#include "kvm/pci.h"
+
 #include "xics.h"
+#include "spapr_pci.h"
 
 #define XICS_IRQS               1024
 
+static int pci_devs = 0;
+
 int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)
 {
-	fprintf(stderr, "irq__register_device(%d, [%d], [%d], [%d]\n",
-		dev, *num, *pin, *line);
+	if (pci_devs >= PCI_MAX_DEVICES)
+		die("Hit PCI device limit!\n");
+
+	*num = pci_devs++;
+
+	*pin = 1;
+	/* Have I said how nasty I find this?  Line should be dontcare... PHB
+	 * should determine which CPU/XICS IRQ to fire.
+	 */
+	*line = xics_alloc_irqnum();
 	return 0;
 }
 
diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
index bfd7c3a..353c667 100644
--- a/tools/kvm/powerpc/kvm.c
+++ b/tools/kvm/powerpc/kvm.c
@@ -16,6 +16,7 @@
 
 #include "spapr.h"
 #include "spapr_hvcons.h"
+#include "spapr_pci.h"
 
 #include <linux/kvm.h>
 
@@ -166,6 +167,11 @@ void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char *hugetlbfs_
 	register_core_rtas();
 	/* Now that hypercalls are initialised, register a couple for the console: */
 	spapr_hvcons_init();
+	spapr_create_phb(kvm, "pci", SPAPR_PCI_BUID,
+			 SPAPR_PCI_MEM_WIN_ADDR,
+			 SPAPR_PCI_MEM_WIN_SIZE,
+			 SPAPR_PCI_IO_WIN_ADDR,
+			 SPAPR_PCI_IO_WIN_SIZE);
 }
 
 void kvm__irq_trigger(struct kvm *kvm, int irq)
@@ -420,6 +426,11 @@ static void setup_fdt(struct kvm *kvm)
 	_FDT(fdt_finish(fdt));
 
 	_FDT(fdt_open_into(fdt, fdt_dest, FDT_MAX_SIZE));
+
+	/* PCI */
+	if (spapr_populate_pci_devices(kvm, PHANDLE_XICP, fdt_dest))
+		die("Fail populating PCI device nodes");
+
 	_FDT(fdt_add_mem_rsv(fdt_dest, kvm->rtas_gra, kvm->rtas_size));
 	_FDT(fdt_pack(fdt_dest));
 }
diff --git a/tools/kvm/powerpc/spapr.h b/tools/kvm/powerpc/spapr.h
index 4e5d7bd..902496d 100644
--- a/tools/kvm/powerpc/spapr.h
+++ b/tools/kvm/powerpc/spapr.h
@@ -305,4 +305,12 @@ target_ulong spapr_rtas_call(struct kvm_cpu *vcpu,
                              uint32_t token, uint32_t nargs, target_ulong args,
                              uint32_t nret, target_ulong rets);
 
+#define SPAPR_PCI_BUID          0x800000020000001ULL
+#define SPAPR_PCI_MEM_WIN_ADDR  (KVM_MMIO_START + 0xA0000000)
+#define SPAPR_PCI_MEM_WIN_SIZE  0x20000000
+#define SPAPR_PCI_IO_WIN_ADDR   (KVM_MMIO_START + 0x80000000)
+/* This, to me, is odd... 32MB of I/O?  Some PHBs are set up like this.
+ * Anything ever use > 64K? :P */
+#define SPAPR_PCI_IO_WIN_SIZE	0x2000000
+
 #endif /* !defined (__HW_SPAPR_H__) */
diff --git a/tools/kvm/powerpc/spapr_pci.c b/tools/kvm/powerpc/spapr_pci.c
new file mode 100644
index 0000000..233c42c
--- /dev/null
+++ b/tools/kvm/powerpc/spapr_pci.c
@@ -0,0 +1,429 @@
+/*
+ * SPAPR PHB emulation, RTAS interface to PCI config space, device tree nodes
+ * for enumerated devices.
+ *
+ * Borrowed heavily from QEMU's spapr_pci.c,
+ * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
+ * Copyright (c) 2011 David Gibson, IBM Corporation.
+ *
+ * Modifications copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "spapr.h"
+#include "spapr_pci.h"
+#include "kvm/util.h"
+#include "kvm/pci.h"
+
+#include <linux/pci_regs.h>
+#include <libfdt.h>
+#include <linux/byteorder.h>
+
+
+//#define DEBUG_PHB yes
+#ifdef DEBUG_PHB
+#define phb_dprintf(fmt, ...)					\
+	do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define phb_dprintf(fmt, ...)			\
+	do { } while (0)
+#endif
+
+static const uint32_t bars[] = {
+	PCI_BASE_ADDRESS_0, PCI_BASE_ADDRESS_1,
+	PCI_BASE_ADDRESS_2, PCI_BASE_ADDRESS_3,
+	PCI_BASE_ADDRESS_4, PCI_BASE_ADDRESS_5
+	/*, PCI_ROM_ADDRESS*/
+};
+
+#define PCI_NUM_REGIONS		7
+
+static struct spapr_phb phb;
+
+
+static void rtas_ibm_read_pci_config(struct kvm_cpu *vcpu,
+				     uint32_t token, uint32_t nargs,
+				     target_ulong args,
+				     uint32_t nret, target_ulong rets)
+{
+	uint32_t val = 0;
+	uint64_t buid = ((uint64_t)rtas_ld(vcpu->kvm, args, 1) << 32) | rtas_ld(vcpu->kvm, args, 2);
+	union pci_config_address addr = { .w = rtas_ld(vcpu->kvm, args, 0) };
+	struct pci_device_header *dev = pci__find_dev(addr.device_number);
+	uint32_t size = rtas_ld(vcpu->kvm, args, 3);
+
+	if (buid != phb.buid || !dev || (size > 4)) {
+		phb_dprintf("- cfgRd buid 0x%lx cfg addr 0x%x size %d not found\n",
+			    buid, addr.w, size);
+
+		rtas_st(vcpu->kvm, rets, 0, -1);
+		return;
+	}
+	pci__config_rd(vcpu->kvm, addr, &val, size);
+	/* It appears this wants a byteswapped result... */
+	switch (size) {
+	case 4:
+		val = le32_to_cpu(val);
+		break;
+	case 2:
+		val = le16_to_cpu(val>>16);
+		break;
+	case 1:
+		val = val >> 24;
+		break;
+	}
+	phb_dprintf("- cfgRd buid 0x%lx addr 0x%x (/%d): b%d,d%d,f%d,r0x%x, val 0x%x\n",
+		    buid, addr.w, size, addr.bus_number, addr.device_number, addr.function_number,
+		    addr.register_number, val);
+
+	rtas_st(vcpu->kvm, rets, 0, 0);
+	rtas_st(vcpu->kvm, rets, 1, val);
+}
+
+static void rtas_read_pci_config(struct kvm_cpu *vcpu,
+				 uint32_t token, uint32_t nargs,
+				 target_ulong args,
+				 uint32_t nret, target_ulong rets)
+{
+	uint32_t val;
+	union pci_config_address addr = { .w = rtas_ld(vcpu->kvm, args, 0) };
+	struct pci_device_header *dev = pci__find_dev(addr.device_number);
+	uint32_t size = rtas_ld(vcpu->kvm, args, 1);
+
+	if (!dev || (size > 4)) {
+		rtas_st(vcpu->kvm, rets, 0, -1);
+		return;
+	}
+	pci__config_rd(vcpu->kvm, addr, &val, size);
+	switch (size) {
+	case 4:
+		val = le32_to_cpu(val);
+		break;
+	case 2:
+		val = le16_to_cpu(val>>16); /* We're yuck-endian. */
+		break;
+	case 1:
+		val = val >> 24;
+		break;
+	}
+	phb_dprintf("- cfgRd addr 0x%x size %d, val 0x%x\n", addr.w, size, val);
+	rtas_st(vcpu->kvm, rets, 0, 0);
+	rtas_st(vcpu->kvm, rets, 1, val);
+}
+
+static void rtas_ibm_write_pci_config(struct kvm_cpu *vcpu,
+				      uint32_t token, uint32_t nargs,
+				      target_ulong args,
+				      uint32_t nret, target_ulong rets)
+{
+	uint64_t buid = ((uint64_t)rtas_ld(vcpu->kvm, args, 1) << 32) | rtas_ld(vcpu->kvm, args, 2);
+	union pci_config_address addr = { .w = rtas_ld(vcpu->kvm, args, 0) };
+	struct pci_device_header *dev = pci__find_dev(addr.device_number);
+	uint32_t size = rtas_ld(vcpu->kvm, args, 3);
+	uint32_t val = rtas_ld(vcpu->kvm, args, 4);
+
+	if (buid != phb.buid || !dev || (size > 4)) {
+		phb_dprintf("- cfgWr buid 0x%lx cfg addr 0x%x/%d error (val 0x%x)\n",
+			    buid, addr.w, size, val);
+
+		rtas_st(vcpu->kvm, rets, 0, -1);
+		return;
+	}
+	phb_dprintf("- cfgWr buid 0x%lx addr 0x%x (/%d): b%d,d%d,f%d,r0x%x, val 0x%x\n",
+		    buid, addr.w, size, addr.bus_number, addr.device_number, addr.function_number,
+		    addr.register_number, val);
+	switch (size) {
+	case 4:
+		val = le32_to_cpu(val);
+		break;
+	case 2:
+		val = le16_to_cpu(val) << 16;
+		break;
+	case 1:
+		val = val >> 24;
+		break;
+	}
+	pci__config_wr(vcpu->kvm, addr, &val, size);
+	rtas_st(vcpu->kvm, rets, 0, 0);
+}
+
+static void rtas_write_pci_config(struct kvm_cpu *vcpu,
+				  uint32_t token, uint32_t nargs,
+				  target_ulong args,
+				  uint32_t nret, target_ulong rets)
+{
+	union pci_config_address addr = { .w = rtas_ld(vcpu->kvm, args, 0) };
+	struct pci_device_header *dev = pci__find_dev(addr.device_number);
+	uint32_t size = rtas_ld(vcpu->kvm, args, 1);
+	uint32_t val = rtas_ld(vcpu->kvm, args, 2);
+
+	if (!dev || (size > 4)) {
+		rtas_st(vcpu->kvm, rets, 0, -1);
+		return;
+	}
+
+	phb_dprintf("- cfgWr addr 0x%x (/%d): b%d,d%d,f%d,r0x%x, val 0x%x\n",
+		    addr.w, size, addr.bus_number, addr.device_number, addr.function_number,
+		    addr.register_number, val);
+	switch (size) {
+	case 4:
+		val = le32_to_cpu(val);
+		break;
+	case 2:
+		val = le16_to_cpu(val) << 16;
+		break;
+	case 1:
+		val = val >> 24;
+		break;
+	}
+	pci__config_wr(vcpu->kvm, addr, &val, size);
+	rtas_st(vcpu->kvm, rets, 0, 0);
+}
+
+void spapr_create_phb(struct kvm *kvm,
+		      const char *busname, uint64_t buid,
+		      uint64_t mem_win_addr, uint64_t mem_win_size,
+		      uint64_t io_win_addr, uint64_t io_win_size)
+{
+	/* Since kvmtool doesn't really have any concept of buses etc.,
+	 * there's nothing to register here.  Just register RTAS.
+	 */
+	spapr_rtas_register("read-pci-config", rtas_read_pci_config);
+	spapr_rtas_register("write-pci-config", rtas_write_pci_config);
+	spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
+	spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
+
+	phb.buid = buid;
+	phb.mem_addr = mem_win_addr;
+	phb.mem_size = mem_win_size;
+	phb.io_addr  = io_win_addr;
+	phb.io_size  = io_win_size;
+
+	kvm->phb = &phb;
+}
+
+/* Macros to operate with address in OF binding to PCI */
+#define b_x(x, p, l)	(((x) & ((1<<(l))-1)) << (p))
+#define b_n(x)		b_x((x), 31, 1) /* 0 if relocatable */
+#define b_p(x)		b_x((x), 30, 1) /* 1 if prefetchable */
+#define b_t(x)		b_x((x), 29, 1) /* 1 if the address is aliased */
+#define b_ss(x)		b_x((x), 24, 2) /* the space code */
+#define b_bbbbbbbb(x)	b_x((x), 16, 8) /* bus number */
+#define b_ddddd(x)	b_x((x), 11, 5) /* device number */
+#define b_fff(x)	b_x((x), 8, 3)	/* function number */
+#define b_rrrrrrrr(x)	b_x((x), 0, 8)	/* register number */
+
+#define SS_M64		3
+#define SS_M32		2
+#define SS_IO		1
+#define SS_CONFIG	0
+
+static uint32_t bar_to_ss(unsigned long bar)
+{
+	if ((bar & PCI_BASE_ADDRESS_SPACE) ==
+	    PCI_BASE_ADDRESS_SPACE_IO)
+		return SS_IO;
+	else if (bar & PCI_BASE_ADDRESS_MEM_TYPE_64)
+		return SS_M64;
+	else
+		return SS_M32;
+}
+
+static unsigned long bar_to_addr(unsigned long bar)
+{
+	if ((bar & PCI_BASE_ADDRESS_SPACE) ==
+	    PCI_BASE_ADDRESS_SPACE_IO)
+		return bar & PCI_BASE_ADDRESS_IO_MASK;
+	else
+		return bar & PCI_BASE_ADDRESS_MEM_MASK;
+}
+
+int spapr_populate_pci_devices(struct kvm *kvm,
+			       uint32_t xics_phandle,
+			       void *fdt)
+{
+	int bus_off, node_off = 0, devid, fn, i, n, devices;
+	char nodename[256];
+	struct {
+		uint32_t hi;
+		uint64_t addr;
+		uint64_t size;
+	} __attribute__((packed)) reg[PCI_NUM_REGIONS + 1],
+		  assigned_addresses[PCI_NUM_REGIONS];
+	uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
+	struct {
+		uint32_t hi;
+		uint64_t child;
+		uint64_t parent;
+		uint64_t size;
+	} __attribute__((packed)) ranges[] = {
+		{
+			cpu_to_be32(b_ss(1)), cpu_to_be64(0),
+			cpu_to_be64(phb.io_addr),
+			cpu_to_be64(phb.io_size),
+		},
+		{
+			cpu_to_be32(b_ss(2)), cpu_to_be64(0),
+			cpu_to_be64(phb.mem_addr),
+			cpu_to_be64(phb.mem_size),
+		},
+	};
+	uint64_t bus_reg[] = { cpu_to_be64(phb.buid), 0 };
+	uint32_t interrupt_map_mask[] = {
+		cpu_to_be32(b_ddddd(-1)|b_fff(-1)), 0x0, 0x0, 0x0};
+	uint32_t interrupt_map[SPAPR_PCI_NUM_LSI][7];
+
+	/* Start populating the FDT */
+	sprintf(nodename, "pci@%" PRIx64, phb.buid);
+	bus_off = fdt_add_subnode(fdt, 0, nodename);
+	if (bus_off < 0) {
+		die("error making bus subnode, %s\n", fdt_strerror(bus_off));
+		return bus_off;
+	}
+
+#define _FDT(exp)							\
+	do {								\
+		int ret = (exp);					\
+		if (ret < 0) {						\
+			die("Error creating device tree: %s: %s\n",	\
+			    #exp, fdt_strerror(ret));			\
+		}							\
+	} while (0)
+
+	/* Write PHB properties */
+	_FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
+	_FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
+	_FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3));
+	_FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2));
+	_FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
+	_FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
+	_FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
+	_FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof(ranges)));
+	_FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
+	_FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
+			 &interrupt_map_mask, sizeof(interrupt_map_mask)));
+
+	/* Populate PCI devices and allocate IRQs */
+	devices = 0;
+
+	for (devid = 0; devid < PCI_MAX_DEVICES; devid++) {
+		uint32_t *irqmap = interrupt_map[devices];
+		struct pci_device_header *hdr = pci__find_dev(devid);
+
+		if (!hdr)
+			continue;
+
+		fn = 0; /* kvmtool doesn't yet do multifunction devices */
+
+		sprintf(nodename, "pci@%u,%u", devid, fn);
+
+		/* Allocate interrupt from the map */
+		if (devid > SPAPR_PCI_NUM_LSI)	{
+			die("Unexpected behaviour in spapr_populate_pci_devices,"
+			    "wrong devid %u\n", devid);
+		}
+		irqmap[0] = cpu_to_be32(b_ddddd(devid)|b_fff(fn));
+		irqmap[1] = 0;
+		irqmap[2] = 0;
+		irqmap[3] = 0;
+		irqmap[4] = cpu_to_be32(xics_phandle);
+		/* This is nasty; the PCI devs are set up such that their own
+		 * header's irq_line indicates the direct XICS IRQ number to
+		 * use.  There REALLY needs to be a hierarchical system in place
+		 * to 'raise' an IRQ on the bridge which indexes/looks up which
+		 * XICS IRQ to fire.
+		 */
+		irqmap[5] = cpu_to_be32(hdr->irq_line);
+		irqmap[6] = cpu_to_be32(0x8);
+
+		/* Add node to FDT */
+		node_off = fdt_add_subnode(fdt, bus_off, nodename);
+		if (node_off < 0) {
+			die("error making node subnode, %s\n", fdt_strerror(bus_off));
+			return node_off;
+		}
+
+		_FDT(fdt_setprop_cell(fdt, node_off, "vendor-id",
+				      le16_to_cpu(hdr->vendor_id)));
+		_FDT(fdt_setprop_cell(fdt, node_off, "device-id",
+				      le16_to_cpu(hdr->device_id)));
+		_FDT(fdt_setprop_cell(fdt, node_off, "revision-id",
+				      hdr->revision_id));
+		_FDT(fdt_setprop_cell(fdt, node_off, "class-code",
+				      hdr->class[0] | (hdr->class[1] << 8) | (hdr->class[2] << 16)));
+		_FDT(fdt_setprop_cell(fdt, node_off, "subsystem-id",
+				      le16_to_cpu(hdr->subsys_id)));
+		_FDT(fdt_setprop_cell(fdt, node_off, "subsystem-vendor-id",
+				      le16_to_cpu(hdr->subsys_vendor_id)));
+
+		/* Config space region comes first */
+		reg[0].hi = cpu_to_be32(
+			b_n(0) |
+			b_p(0) |
+			b_t(0) |
+			b_ss(SS_CONFIG) |
+			b_bbbbbbbb(0) |
+			b_ddddd(devid) |
+			b_fff(fn));
+		reg[0].addr = 0;
+		reg[0].size = 0;
+
+		n = 0;
+		/* Six BARs, no ROM supported, addresses are 32bit */
+		for (i = 0; i < 6; ++i) {
+			if (0 == hdr->bar[i]) {
+				continue;
+			}
+
+			reg[n+1].hi = cpu_to_be32(
+				b_n(0) |
+				b_p(0) |
+				b_t(0) |
+				b_ss(bar_to_ss(le32_to_cpu(hdr->bar[i]))) |
+				b_bbbbbbbb(0) |
+				b_ddddd(devid) |
+				b_fff(fn) |
+				b_rrrrrrrr(bars[i]));
+			reg[n+1].addr = 0;
+			reg[n+1].size = cpu_to_be64(hdr->bar_size[i]);
+
+			assigned_addresses[n].hi = cpu_to_be32(
+				b_n(1) |
+				b_p(0) |
+				b_t(0) |
+				b_ss(bar_to_ss(le32_to_cpu(hdr->bar[i]))) |
+				b_bbbbbbbb(0) |
+				b_ddddd(devid) |
+				b_fff(fn) |
+				b_rrrrrrrr(bars[i]));
+
+			/*
+			 * Writing zeroes to assigned_addresses causes the guest kernel to
+			 * reassign BARs
+			 */
+			assigned_addresses[n].addr = cpu_to_be64(bar_to_addr(le32_to_cpu(hdr->bar[i])));
+			assigned_addresses[n].size = reg[n+1].size;
+
+			++n;
+		}
+		_FDT(fdt_setprop(fdt, node_off, "reg", reg, sizeof(reg[0])*(n+1)));
+		_FDT(fdt_setprop(fdt, node_off, "assigned-addresses",
+				 assigned_addresses,
+				 sizeof(assigned_addresses[0])*(n)));
+		_FDT(fdt_setprop_cell(fdt, node_off, "interrupts",
+				      hdr->irq_pin));
+
+		/* We don't set ibm,dma-window property as we don't have an IOMMU. */
+
+		++devices;
+	}
+
+	/* Write interrupt map */
+	_FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
+			 devices * sizeof(interrupt_map[0])));
+
+	return 0;
+}
diff --git a/tools/kvm/powerpc/spapr_pci.h b/tools/kvm/powerpc/spapr_pci.h
new file mode 100644
index 0000000..8a71f71
--- /dev/null
+++ b/tools/kvm/powerpc/spapr_pci.h
@@ -0,0 +1,38 @@
+/*
+ * SPAPR PHB definitions
+ *
+ * Modifications by Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef SPAPR_PCI_H
+#define SPAPR_PCI_H
+
+#include "kvm/kvm.h"
+#include <inttypes.h>
+
+/* With XICS, we can easily accomodate 1 IRQ per PCI device. */
+
+#define SPAPR_PCI_NUM_LSI 256
+
+struct spapr_phb {
+	uint64_t buid;
+	uint64_t mem_addr;
+	uint64_t mem_size;
+	uint64_t io_addr;
+	uint64_t io_size;
+};
+
+void spapr_create_phb(struct kvm *kvm,
+                      const char *busname, uint64_t buid,
+                      uint64_t mem_win_addr, uint64_t mem_win_size,
+                      uint64_t io_win_addr, uint64_t io_win_size);
+
+int spapr_populate_pci_devices(struct kvm *kvm,
+                               uint32_t xics_phandle,
+                               void *fdt);
+
+#endif

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 7/8] kvm tools: Add PPC64 kvm_cpu__emulate_io()
       [not found] <cover.1323143103.git.matt@ozlabs.org>
                   ` (5 preceding siblings ...)
  2011-12-06  4:06 ` [PATCH 6/8] kvm tools: Add PPC64 PCI Host Bridge Matt Evans
@ 2011-12-06  4:06 ` Matt Evans
  2011-12-06  4:06 ` [PATCH 8/8] kvm tools: Make virtio-pci's ioeventfd__add_event() fall back gracefully if ioeventfds unavailable Matt Evans
  7 siblings, 0 replies; 14+ messages in thread
From: Matt Evans @ 2011-12-06  4:06 UTC (permalink / raw)
  To: kvm, kvm-ppc

This is the final piece of the puzzle for PPC SPAPR PCI; this
function splits MMIO accesses into the two PHB windows & directs
things to MMIO/IO emulation as appropriate.

Signed-off-by: Matt Evans <matt@ozlabs.org>
---
 tools/kvm/Makefile          |    1 +
 tools/kvm/powerpc/kvm-cpu.c |   34 ++++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 0 deletions(-)

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 6ffffc8..9b875dd 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -131,6 +131,7 @@ ifeq ($(uname_M), ppc64)
 	OBJS	+= powerpc/spapr_hcall.o
 	OBJS	+= powerpc/spapr_rtas.o
 	OBJS	+= powerpc/spapr_hvcons.o
+	OBJS	+= powerpc/spapr_pci.o
 	OBJS	+= powerpc/xics.o
 	ARCH_INCLUDE := powerpc/include
 	CFLAGS 	+= -m64
diff --git a/tools/kvm/powerpc/kvm-cpu.c b/tools/kvm/powerpc/kvm-cpu.c
index 63cd106..0cf4dc8 100644
--- a/tools/kvm/powerpc/kvm-cpu.c
+++ b/tools/kvm/powerpc/kvm-cpu.c
@@ -24,6 +24,7 @@
 #include <string.h>
 #include <errno.h>
 #include <stdio.h>
+#include <assert.h>
 
 static int debug_fd;
 
@@ -177,6 +178,39 @@ bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu)
 	return ret;
 }
 
+bool kvm_cpu__emulate_io(struct kvm_cpu *cpu, struct kvm_run *kvm_run)
+{
+	bool ret = false;
+	u64 phys_addr;
+
+	/* We'll never get KVM_EXIT_IO, it's x86-specific.  All IO is MM! :P
+	 * So, look at our windows here & split addresses into I/O or MMIO.
+	 */
+	assert(kvm_run->exit_reason == KVM_EXIT_MMIO);
+
+	phys_addr = cpu->kvm_run->mmio.phys_addr;
+	if ((phys_addr >= SPAPR_PCI_IO_WIN_ADDR) &&
+	    (phys_addr < SPAPR_PCI_IO_WIN_ADDR + SPAPR_PCI_IO_WIN_SIZE)) {
+		ret = kvm__emulate_io(cpu->kvm, phys_addr - SPAPR_PCI_IO_WIN_ADDR,
+				      cpu->kvm_run->mmio.data,
+				      cpu->kvm_run->mmio.is_write ?
+				      KVM_EXIT_IO_OUT : KVM_EXIT_IO_IN,
+				      cpu->kvm_run->mmio.len, 1);
+	} else if ((phys_addr >= SPAPR_PCI_MEM_WIN_ADDR) &&
+		   (phys_addr < SPAPR_PCI_MEM_WIN_ADDR + SPAPR_PCI_MEM_WIN_SIZE)) {
+		ret = kvm__emulate_mmio(cpu->kvm,
+					cpu->kvm_run->mmio.phys_addr - SPAPR_PCI_MEM_WIN_ADDR,
+					cpu->kvm_run->mmio.data,
+					cpu->kvm_run->mmio.len,
+					cpu->kvm_run->mmio.is_write);
+	} else {
+		pr_warning("MMIO %s unknown address %lx (size %d)!\n",
+			   cpu->kvm_run->mmio.is_write ? "write to" : "read from",
+			   phys_addr, cpu->kvm_run->mmio.len);
+	}
+	return ret;
+}
+
 #define CONDSTR_BIT(m, b) (((m) & MSR_##b) ? #b" " : "")
 
 void kvm_cpu__show_registers(struct kvm_cpu *vcpu)

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 8/8] kvm tools: Make virtio-pci's ioeventfd__add_event() fall back gracefully if ioeventfds unavailable
       [not found] <cover.1323143103.git.matt@ozlabs.org>
                   ` (6 preceding siblings ...)
  2011-12-06  4:06 ` [PATCH 7/8] kvm tools: Add PPC64 kvm_cpu__emulate_io() Matt Evans
@ 2011-12-06  4:06 ` Matt Evans
  7 siblings, 0 replies; 14+ messages in thread
From: Matt Evans @ 2011-12-06  4:06 UTC (permalink / raw)
  To: kvm, kvm-ppc

PPC KVM doesn't yet support ioeventfds, so don't bomb out/die.  virtio-pci is
able to function if it instead uses normal IO port notification.

Signed-off-by: Matt Evans <matt@ozlabs.org>
---
 tools/kvm/include/kvm/ioeventfd.h |    3 ++-
 tools/kvm/ioeventfd.c             |   12 +++++++++---
 tools/kvm/virtio/pci.c            |   11 ++++++++++-
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/tools/kvm/include/kvm/ioeventfd.h b/tools/kvm/include/kvm/ioeventfd.h
index df01750..5e458be 100644
--- a/tools/kvm/include/kvm/ioeventfd.h
+++ b/tools/kvm/include/kvm/ioeventfd.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <sys/eventfd.h>
+#include <stdbool.h>
 
 struct kvm;
 
@@ -21,7 +22,7 @@ struct ioevent {
 
 void ioeventfd__init(void);
 void ioeventfd__start(void);
-void ioeventfd__add_event(struct ioevent *ioevent);
+bool ioeventfd__add_event(struct ioevent *ioevent);
 void ioeventfd__del_event(u64 addr, u64 datamatch);
 
 #endif
diff --git a/tools/kvm/ioeventfd.c b/tools/kvm/ioeventfd.c
index 3a240e4..37f9a63 100644
--- a/tools/kvm/ioeventfd.c
+++ b/tools/kvm/ioeventfd.c
@@ -26,7 +26,7 @@ void ioeventfd__init(void)
 		die("Failed creating epoll fd");
 }
 
-void ioeventfd__add_event(struct ioevent *ioevent)
+bool ioeventfd__add_event(struct ioevent *ioevent)
 {
 	struct kvm_ioeventfd kvm_ioevent;
 	struct epoll_event epoll_event;
@@ -48,8 +48,13 @@ void ioeventfd__add_event(struct ioevent *ioevent)
 		.flags			= KVM_IOEVENTFD_FLAG_PIO | KVM_IOEVENTFD_FLAG_DATAMATCH,
 	};
 
-	if (ioctl(ioevent->fn_kvm->vm_fd, KVM_IOEVENTFD, &kvm_ioevent) != 0)
-		die("Failed creating new ioeventfd");
+	if (ioctl(ioevent->fn_kvm->vm_fd, KVM_IOEVENTFD, &kvm_ioevent) != 0) {
+		/* Not all KVM implementations may support KVM_IOEVENTFD,
+		 * so be graceful.
+		 */
+		free(new_ioevent);
+		return false;
+	}
 
 	epoll_event = (struct epoll_event) {
 		.events			= EPOLLIN,
@@ -60,6 +65,7 @@ void ioeventfd__add_event(struct ioevent *ioevent)
 		die("Failed assigning new event to the epoll fd");
 
 	list_add_tail(&new_ioevent->list, &used_ioevents);
+	return true;
 }
 
 void ioeventfd__del_event(u64 addr, u64 datamatch)
diff --git a/tools/kvm/virtio/pci.c b/tools/kvm/virtio/pci.c
index ffa3768..06d3b79 100644
--- a/tools/kvm/virtio/pci.c
+++ b/tools/kvm/virtio/pci.c
@@ -50,7 +50,16 @@ static int virtio_pci__init_ioeventfd(struct kvm *kvm, struct virtio_trans *vtra
 		.fd		= eventfd(0, 0),
 	};
 
-	ioeventfd__add_event(&ioevent);
+	if (!ioeventfd__add_event(&ioevent)) {
+#ifndef CONFIG_PPC
+		/* PPC64 doesn't have kvm ioevents yet, so we expect this to
+		 * fail -- don't need to be verbose about it!  For virtio-pci,
+		 * this is fine.  It catches the IO accesses anyway, so
+		 * still works (but slower).
+		 */
+		pr_warning("Failed creating new ioeventfd");
+#endif
+	}
 
 	if (vtrans->virtio_ops->notify_vq_eventfd)
 		vtrans->virtio_ops->notify_vq_eventfd(kvm, vpci->dev, vq, ioevent.fd);

^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2011-12-08  2:57 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <cover.1323143103.git.matt@ozlabs.org>
2011-12-06  4:05 ` [PATCH 1/8] kvm tools: Add initial SPAPR PPC64 architecture support Matt Evans
2011-12-06 18:03   ` Scott Wood
2011-12-06 18:33     ` Pekka Enberg
2011-12-06 18:54       ` Scott Wood
2011-12-07  7:35     ` Matt Evans
2011-12-07 18:31       ` Scott Wood
2011-12-08  2:57         ` Matt Evans
2011-12-06  4:06 ` [PATCH 2/8] kvm tools: Generate SPAPR PPC64 guest device tree Matt Evans
2011-12-06  4:06 ` [PATCH 3/8] kvm tools: Add SPAPR PPC64 hcall & rtascall structure Matt Evans
2011-12-06  4:06 ` [PATCH 4/8] kvm tools: Add SPAPR PPC64 HV console Matt Evans
2011-12-06  4:06 ` [PATCH 5/8] kvm tools: Add PPC64 XICS interrupt controller support Matt Evans
2011-12-06  4:06 ` [PATCH 6/8] kvm tools: Add PPC64 PCI Host Bridge Matt Evans
2011-12-06  4:06 ` [PATCH 7/8] kvm tools: Add PPC64 kvm_cpu__emulate_io() Matt Evans
2011-12-06  4:06 ` [PATCH 8/8] kvm tools: Make virtio-pci's ioeventfd__add_event() fall back gracefully if ioeventfds unavailable Matt Evans

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).