* [PATCH V2 1/6] kvm tools: Generate SPAPR PPC64 guest device tree
2011-12-13 7:10 [PATCH V2 0/6] Add initial SPAPR PPC64 architecture support Matt Evans
@ 2011-12-13 7:10 ` Matt Evans
2011-12-14 2:25 ` David Gibson
2011-12-13 7:10 ` [PATCH V2 2/6] kvm tools: Add SPAPR PPC64 hcall & rtascall structure Matt Evans
` (4 subsequent siblings)
5 siblings, 1 reply; 12+ messages in thread
From: Matt Evans @ 2011-12-13 7:10 UTC (permalink / raw)
To: kvm, kvm-ppc; +Cc: penberg, asias.hejun, levinsasha928, gorcunov, david, aik
The generated DT is the bare minimum structure required for SPAPR (on which
subsequent patches for VIO, XICS, PCI etc. will build); root node, cpus, memory.
Some aspects are currently hardwired for simplicity, for example advertised
page sizes, HPT size, SLB size, VMX/DFP, etc. Future support of a variety
of POWER CPUs should acquire this info from the host and encode appropriately.
This requires a 64-bit libfdt.
Signed-off-by: Matt Evans <matt@ozlabs.org>
---
tools/kvm/Makefile | 3 +-
tools/kvm/powerpc/include/kvm/kvm-arch.h | 10 ++
tools/kvm/powerpc/kvm.c | 141 ++++++++++++++++++++++++++++++
3 files changed, 153 insertions(+), 1 deletions(-)
diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 5bb3f08..4ee4805 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -132,7 +132,8 @@ ifeq ($(uname_M), ppc64)
OBJS += powerpc/kvm.o
OBJS += powerpc/kvm-cpu.o
ARCH_INCLUDE := powerpc/include
- CFLAGS += -m64
+ CFLAGS += -m64
+ LIBS += -lfdt
endif
###
diff --git a/tools/kvm/powerpc/include/kvm/kvm-arch.h b/tools/kvm/powerpc/include/kvm/kvm-arch.h
index da61774..33a3827 100644
--- a/tools/kvm/powerpc/include/kvm/kvm-arch.h
+++ b/tools/kvm/powerpc/include/kvm/kvm-arch.h
@@ -69,4 +69,14 @@ struct kvm {
const char *name;
};
+/* Helper for the various bits of code that generate FDT nodes */
+#define _FDT(exp) \
+ do { \
+ int ret = (exp); \
+ if (ret < 0) { \
+ die("Error creating device tree: %s: %s\n", \
+ #exp, fdt_strerror(ret)); \
+ } \
+ } while (0)
+
#endif /* KVM__KVM_ARCH_H */
diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
index f838a8f..95ed1cc 100644
--- a/tools/kvm/powerpc/kvm.c
+++ b/tools/kvm/powerpc/kvm.c
@@ -3,6 +3,9 @@
*
* Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
*
+ * Portions of FDT setup borrowed from QEMU, copyright 2010 David Gibson, IBM
+ * Corporation.
+ *
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
@@ -29,6 +32,8 @@
#include <linux/byteorder.h>
#include <libfdt.h>
+#define HPT_ORDER 24
+
#define HUGETLBFS_PATH "/var/lib/hugetlbfs/global/pagesize-16MB/"
static char kern_cmdline[2048];
@@ -168,9 +173,145 @@ bool load_bzimage(struct kvm *kvm, int fd_kernel,
return false;
}
+#define SMT_THREADS 4
+
+static uint32_t mfpvr(void)
+{
+ uint32_t r;
+ asm volatile ("mfpvr %0" : "=r"(r));
+ return r;
+}
+
static void setup_fdt(struct kvm *kvm)
{
+ uint64_t mem_reg_property[] = { 0, cpu_to_be64(kvm->ram_size) };
+ int smp_cpus = kvm->nrcpus;
+ char hypertas_prop_kvm[] = "hcall-pft\0hcall-term\0"
+ "hcall-dabr\0hcall-interrupt\0hcall-tce\0hcall-vio\0"
+ "hcall-splpar\0hcall-bulk";
+ int i, j;
+ char cpu_name[30];
+ u8 staging_fdt[FDT_MAX_SIZE];
+ uint32_t pvr = mfpvr();
+
+ /* Generate an appropriate DT at kvm->fdt_gra */
+ void *fdt_dest = guest_flat_to_host(kvm, kvm->fdt_gra);
+ void *fdt = staging_fdt;
+
+ _FDT(fdt_create(fdt, FDT_MAX_SIZE));
+ _FDT(fdt_finish_reservemap(fdt));
+
+ _FDT(fdt_begin_node(fdt, ""));
+
+ _FDT(fdt_property_string(fdt, "device_type", "chrp"));
+ _FDT(fdt_property_string(fdt, "model", "IBM pSeries (kvmtool)"));
+ _FDT(fdt_property_cell(fdt, "#address-cells", 0x2));
+ _FDT(fdt_property_cell(fdt, "#size-cells", 0x2));
+
+ /* /chosen */
+ _FDT(fdt_begin_node(fdt, "chosen"));
+ /* cmdline */
+ _FDT(fdt_property_string(fdt, "bootargs", kern_cmdline));
+ /* Initrd */
+ if (kvm->initrd_size != 0) {
+ uint32_t ird_st_prop = cpu_to_be32(kvm->initrd_gra);
+ uint32_t ird_end_prop = cpu_to_be32(kvm->initrd_gra +
+ kvm->initrd_size);
+ _FDT(fdt_property(fdt, "linux,initrd-start",
+ &ird_st_prop, sizeof(ird_st_prop)));
+ _FDT(fdt_property(fdt, "linux,initrd-end",
+ &ird_end_prop, sizeof(ird_end_prop)));
+ }
+ _FDT(fdt_end_node(fdt));
+
+ /*
+ * Memory: We don't alloc. a separate RMA yet. If we ever need to
+ * (CAP_PPC_RMA == 2) then have one memory node for 0->RMAsize, and
+ * another RMAsize->endOfMem.
+ */
+ _FDT(fdt_begin_node(fdt, "memory@0"));
+ _FDT(fdt_property_string(fdt, "device_type", "memory"));
+ _FDT(fdt_property(fdt, "reg", mem_reg_property,
+ sizeof(mem_reg_property)));
+ _FDT(fdt_end_node(fdt));
+
+ /* CPUs */
+ _FDT(fdt_begin_node(fdt, "cpus"));
+ _FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
+ _FDT(fdt_property_cell(fdt, "#size-cells", 0x0));
+
+ for (i = 0; i < smp_cpus; i += SMT_THREADS) {
+ /*
+ * These page and segment sizes are a basic minimum set.
+ * Really, we should be fancier and work out what the host
+ * supports then encode this here.
+ */
+ int32_t page_sizes_prop[] = {0xc, 0x0, 0x1, 0xc, 0x0,
+ 0x18, 0x100, 0x1, 0x18, 0x0};
+ int32_t seg_sizes_prop[] = {0x1c, 0x28, 0xffffffff, 0xffffffff};
+ int32_t pft_size_prop[] = { 0, HPT_ORDER };
+ uint32_t servers_prop[SMT_THREADS];
+ uint32_t gservers_prop[SMT_THREADS * 2];
+ int threads = (smp_cpus - i) >= SMT_THREADS ? SMT_THREADS :
+ smp_cpus - i;
+
+ sprintf(cpu_name, "PowerPC,POWER7@%d", i);
+ _FDT(fdt_begin_node(fdt, cpu_name));
+ _FDT(fdt_property_string(fdt, "name", "PowerPC,POWER7"));
+ _FDT(fdt_property_string(fdt, "device_type", "cpu"));
+
+ _FDT(fdt_property_cell(fdt, "reg", i));
+ _FDT(fdt_property_cell(fdt, "cpu-version", pvr));
+ _FDT(fdt_property_cell(fdt, "dcache-block-size", 0x00000080));
+ _FDT(fdt_property_cell(fdt, "icache-block-size", 0x00000080));
+ _FDT(fdt_property_cell(fdt, "timebase-frequency", 512000000));
+ _FDT(fdt_property_cell(fdt, "clock-frequency", 0xddbab200));
+
+ /* SLB size is hardwired as we currently assume POWERn */
+ _FDT(fdt_property_cell(fdt, "ibm,slb-size", 32));
+ /*
+ * HPT size is also hardwired; KVM currently fixes it at 16MB
+ * but the moment that changes we'll need to read it out of the
+ * kernel.
+ */
+ _FDT(fdt_property(fdt, "ibm,pft-size", pft_size_prop,
+ sizeof(pft_size_prop)));
+
+ _FDT(fdt_property_string(fdt, "status", "okay"));
+ _FDT(fdt_property(fdt, "64-bit", NULL, 0));
+ /* A server for each thread in this core */
+ for (j = 0; j < SMT_THREADS; j++) {
+ servers_prop[j] = cpu_to_be32(i+j);
+ /*
+ * Hack borrowed from QEMU, direct the group queues back
+ * to cpu 0:
+ */
+ gservers_prop[j*2] = cpu_to_be32(i+j);
+ gservers_prop[j*2 + 1] = 0;
+ }
+ _FDT(fdt_property(fdt, "ibm,ppc-interrupt-server#s",
+ servers_prop, threads * sizeof(uint32_t)));
+ _FDT(fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
+ gservers_prop,
+ threads * 2 * sizeof(uint32_t)));
+ _FDT(fdt_property(fdt, "ibm,segment-page-sizes",
+ page_sizes_prop, sizeof(page_sizes_prop)));
+ _FDT(fdt_property(fdt, "ibm,processor-segment-sizes",
+ seg_sizes_prop, sizeof(seg_sizes_prop)));
+ /* And VMX / DFP */
+ _FDT(fdt_property_cell(fdt, "ibm,vmx", 0x2));
+ _FDT(fdt_property_cell(fdt, "ibm,dfp", 0x1));
+ _FDT(fdt_end_node(fdt));
+ }
+ _FDT(fdt_end_node(fdt));
+
+ /* Finalise: */
+ _FDT(fdt_end_node(fdt)); /* Root node */
+ _FDT(fdt_finish(fdt));
+ _FDT(fdt_open_into(fdt, fdt_dest, FDT_MAX_SIZE));
+ _FDT(fdt_add_mem_rsv(fdt_dest, kvm->rtas_gra, kvm->rtas_size));
+ _FDT(fdt_pack(fdt_dest));
}
/**
--
1.7.0.4
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH V2 1/6] kvm tools: Generate SPAPR PPC64 guest device tree
2011-12-13 7:10 ` [PATCH V2 1/6] kvm tools: Generate SPAPR PPC64 guest device tree Matt Evans
@ 2011-12-14 2:25 ` David Gibson
0 siblings, 0 replies; 12+ messages in thread
From: David Gibson @ 2011-12-14 2:25 UTC (permalink / raw)
To: Matt Evans
Cc: kvm, kvm-ppc, penberg, asias.hejun, levinsasha928, gorcunov, aik
On Tue, Dec 13, 2011 at 06:10:45PM +1100, Matt Evans wrote:
> The generated DT is the bare minimum structure required for SPAPR (on which
> subsequent patches for VIO, XICS, PCI etc. will build); root node, cpus, memory.
>
> Some aspects are currently hardwired for simplicity, for example advertised
> page sizes, HPT size, SLB size, VMX/DFP, etc. Future support of a variety
> of POWER CPUs should acquire this info from the host and encode appropriately.
>
> This requires a 64-bit libfdt.
There's already a copy of libfdt embedded in the kernel tree
(scripts/dtc/libfdt), which you should be able to use to build one of
these as you go.
>
> Signed-off-by: Matt Evans <matt@ozlabs.org>
> ---
> tools/kvm/Makefile | 3 +-
> tools/kvm/powerpc/include/kvm/kvm-arch.h | 10 ++
> tools/kvm/powerpc/kvm.c | 141 ++++++++++++++++++++++++++++++
> 3 files changed, 153 insertions(+), 1 deletions(-)
>
> diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
> index 5bb3f08..4ee4805 100644
> --- a/tools/kvm/Makefile
> +++ b/tools/kvm/Makefile
> @@ -132,7 +132,8 @@ ifeq ($(uname_M), ppc64)
> OBJS += powerpc/kvm.o
> OBJS += powerpc/kvm-cpu.o
> ARCH_INCLUDE := powerpc/include
> - CFLAGS += -m64
> + CFLAGS += -m64
> + LIBS += -lfdt
> endif
>
> ###
> diff --git a/tools/kvm/powerpc/include/kvm/kvm-arch.h b/tools/kvm/powerpc/include/kvm/kvm-arch.h
> index da61774..33a3827 100644
> --- a/tools/kvm/powerpc/include/kvm/kvm-arch.h
> +++ b/tools/kvm/powerpc/include/kvm/kvm-arch.h
> @@ -69,4 +69,14 @@ struct kvm {
> const char *name;
> };
>
> +/* Helper for the various bits of code that generate FDT nodes */
> +#define _FDT(exp) \
> + do { \
> + int ret = (exp); \
> + if (ret < 0) { \
> + die("Error creating device tree: %s: %s\n", \
> + #exp, fdt_strerror(ret)); \
> + } \
> + } while (0)
> +
> #endif /* KVM__KVM_ARCH_H */
> diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
> index f838a8f..95ed1cc 100644
> --- a/tools/kvm/powerpc/kvm.c
> +++ b/tools/kvm/powerpc/kvm.c
> @@ -3,6 +3,9 @@
> *
> * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
> *
> + * Portions of FDT setup borrowed from QEMU, copyright 2010 David Gibson, IBM
> + * Corporation.
> + *
> * This program is free software; you can redistribute it and/or modify it
> * under the terms of the GNU General Public License version 2 as published
> * by the Free Software Foundation.
> @@ -29,6 +32,8 @@
> #include <linux/byteorder.h>
> #include <libfdt.h>
>
> +#define HPT_ORDER 24
> +
> #define HUGETLBFS_PATH "/var/lib/hugetlbfs/global/pagesize-16MB/"
>
> static char kern_cmdline[2048];
> @@ -168,9 +173,145 @@ bool load_bzimage(struct kvm *kvm, int fd_kernel,
> return false;
> }
>
> +#define SMT_THREADS 4
> +
> +static uint32_t mfpvr(void)
> +{
> + uint32_t r;
> + asm volatile ("mfpvr %0" : "=r"(r));
> + return r;
> +}
> +
> static void setup_fdt(struct kvm *kvm)
> {
> + uint64_t mem_reg_property[] = { 0, cpu_to_be64(kvm->ram_size) };
> + int smp_cpus = kvm->nrcpus;
> + char hypertas_prop_kvm[] = "hcall-pft\0hcall-term\0"
> + "hcall-dabr\0hcall-interrupt\0hcall-tce\0hcall-vio\0"
> + "hcall-splpar\0hcall-bulk";
> + int i, j;
> + char cpu_name[30];
> + u8 staging_fdt[FDT_MAX_SIZE];
> + uint32_t pvr = mfpvr();
> +
> + /* Generate an appropriate DT at kvm->fdt_gra */
> + void *fdt_dest = guest_flat_to_host(kvm, kvm->fdt_gra);
> + void *fdt = staging_fdt;
> +
> + _FDT(fdt_create(fdt, FDT_MAX_SIZE));
> + _FDT(fdt_finish_reservemap(fdt));
> +
> + _FDT(fdt_begin_node(fdt, ""));
> +
> + _FDT(fdt_property_string(fdt, "device_type", "chrp"));
> + _FDT(fdt_property_string(fdt, "model", "IBM pSeries (kvmtool)"));
> + _FDT(fdt_property_cell(fdt, "#address-cells", 0x2));
> + _FDT(fdt_property_cell(fdt, "#size-cells", 0x2));
> +
> + /* /chosen */
> + _FDT(fdt_begin_node(fdt, "chosen"));
> + /* cmdline */
> + _FDT(fdt_property_string(fdt, "bootargs", kern_cmdline));
> + /* Initrd */
> + if (kvm->initrd_size != 0) {
> + uint32_t ird_st_prop = cpu_to_be32(kvm->initrd_gra);
> + uint32_t ird_end_prop = cpu_to_be32(kvm->initrd_gra +
> + kvm->initrd_size);
> + _FDT(fdt_property(fdt, "linux,initrd-start",
> + &ird_st_prop, sizeof(ird_st_prop)));
> + _FDT(fdt_property(fdt, "linux,initrd-end",
> + &ird_end_prop, sizeof(ird_end_prop)));
> + }
> + _FDT(fdt_end_node(fdt));
> +
> + /*
> + * Memory: We don't alloc. a separate RMA yet. If we ever need to
> + * (CAP_PPC_RMA == 2) then have one memory node for 0->RMAsize, and
> + * another RMAsize->endOfMem.
> + */
> + _FDT(fdt_begin_node(fdt, "memory@0"));
> + _FDT(fdt_property_string(fdt, "device_type", "memory"));
> + _FDT(fdt_property(fdt, "reg", mem_reg_property,
> + sizeof(mem_reg_property)));
> + _FDT(fdt_end_node(fdt));
> +
> + /* CPUs */
> + _FDT(fdt_begin_node(fdt, "cpus"));
> + _FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
> + _FDT(fdt_property_cell(fdt, "#size-cells", 0x0));
> +
> + for (i = 0; i < smp_cpus; i += SMT_THREADS) {
> + /*
> + * These page and segment sizes are a basic minimum set.
> + * Really, we should be fancier and work out what the host
> + * supports then encode this here.
> + */
> + int32_t page_sizes_prop[] = {0xc, 0x0, 0x1, 0xc, 0x0,
> + 0x18, 0x100, 0x1, 0x18, 0x0};
> + int32_t seg_sizes_prop[] = {0x1c, 0x28, 0xffffffff,
> 0xffffffff};
So here and in the vmx and dfp settings below, you appear to assume
POWER7, but I haven't spotted any code to check that assumption.
> + int32_t pft_size_prop[] = { 0, HPT_ORDER };
> + uint32_t servers_prop[SMT_THREADS];
> + uint32_t gservers_prop[SMT_THREADS * 2];
> + int threads = (smp_cpus - i) >= SMT_THREADS ? SMT_THREADS :
> + smp_cpus - i;
> +
> + sprintf(cpu_name, "PowerPC,POWER7@%d", i);
> + _FDT(fdt_begin_node(fdt, cpu_name));
> + _FDT(fdt_property_string(fdt, "name", "PowerPC,POWER7"));
> + _FDT(fdt_property_string(fdt, "device_type", "cpu"));
> +
> + _FDT(fdt_property_cell(fdt, "reg", i));
> + _FDT(fdt_property_cell(fdt, "cpu-version", pvr));
> + _FDT(fdt_property_cell(fdt, "dcache-block-size", 0x00000080));
> + _FDT(fdt_property_cell(fdt, "icache-block-size", 0x00000080));
> + _FDT(fdt_property_cell(fdt, "timebase-frequency", 512000000));
> + _FDT(fdt_property_cell(fdt, "clock-frequency", 0xddbab200));
> +
> + /* SLB size is hardwired as we currently assume POWERn */
> + _FDT(fdt_property_cell(fdt, "ibm,slb-size", 32));
> + /*
> + * HPT size is also hardwired; KVM currently fixes it at 16MB
> + * but the moment that changes we'll need to read it out of the
> + * kernel.
> + */
> + _FDT(fdt_property(fdt, "ibm,pft-size", pft_size_prop,
> + sizeof(pft_size_prop)));
> +
> + _FDT(fdt_property_string(fdt, "status", "okay"));
> + _FDT(fdt_property(fdt, "64-bit", NULL, 0));
> + /* A server for each thread in this core */
> + for (j = 0; j < SMT_THREADS; j++) {
> + servers_prop[j] = cpu_to_be32(i+j);
> + /*
> + * Hack borrowed from QEMU, direct the group queues back
> + * to cpu 0:
> + */
> + gservers_prop[j*2] = cpu_to_be32(i+j);
> + gservers_prop[j*2 + 1] = 0;
> + }
> + _FDT(fdt_property(fdt, "ibm,ppc-interrupt-server#s",
> + servers_prop, threads * sizeof(uint32_t)));
> + _FDT(fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
> + gservers_prop,
> + threads * 2 * sizeof(uint32_t)));
> + _FDT(fdt_property(fdt, "ibm,segment-page-sizes",
> + page_sizes_prop, sizeof(page_sizes_prop)));
> + _FDT(fdt_property(fdt, "ibm,processor-segment-sizes",
> + seg_sizes_prop, sizeof(seg_sizes_prop)));
> + /* And VMX / DFP */
> + _FDT(fdt_property_cell(fdt, "ibm,vmx", 0x2));
> + _FDT(fdt_property_cell(fdt, "ibm,dfp", 0x1));
> + _FDT(fdt_end_node(fdt));
> + }
> + _FDT(fdt_end_node(fdt));
> +
> + /* Finalise: */
> + _FDT(fdt_end_node(fdt)); /* Root node */
> + _FDT(fdt_finish(fdt));
>
> + _FDT(fdt_open_into(fdt, fdt_dest, FDT_MAX_SIZE));
> + _FDT(fdt_add_mem_rsv(fdt_dest, kvm->rtas_gra, kvm->rtas_size));
> + _FDT(fdt_pack(fdt_dest));
> }
>
> /**
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V2 2/6] kvm tools: Add SPAPR PPC64 hcall & rtascall structure
2011-12-13 7:10 [PATCH V2 0/6] Add initial SPAPR PPC64 architecture support Matt Evans
2011-12-13 7:10 ` [PATCH V2 1/6] kvm tools: Generate SPAPR PPC64 guest device tree Matt Evans
@ 2011-12-13 7:10 ` Matt Evans
2011-12-14 2:32 ` David Gibson
2011-12-13 7:10 ` [PATCH V2 3/6] kvm tools: Add SPAPR PPC64 HV console Matt Evans
` (3 subsequent siblings)
5 siblings, 1 reply; 12+ messages in thread
From: Matt Evans @ 2011-12-13 7:10 UTC (permalink / raw)
To: kvm, kvm-ppc; +Cc: penberg, asias.hejun, levinsasha928, gorcunov, david, aik
This patch adds the basic structure for HV calls, their registration and some of
the simpler calls. A similar layout for RTAS calls is also added, again with
some of the simpler RTAS calls used by the guest. The SPAPR RTAS stub is
generated inline. Also, nodes for RTAS are added to the device tree.
Signed-off-by: Matt Evans <matt@ozlabs.org>
---
tools/kvm/Makefile | 2 +
tools/kvm/powerpc/kvm-cpu.c | 6 +
tools/kvm/powerpc/kvm.c | 46 ++++++++-
tools/kvm/powerpc/spapr.h | 105 ++++++++++++++++++
tools/kvm/powerpc/spapr_hcall.c | 132 ++++++++++++++++++++++
tools/kvm/powerpc/spapr_rtas.c | 230 +++++++++++++++++++++++++++++++++++++++
6 files changed, 520 insertions(+), 1 deletions(-)
create mode 100644 tools/kvm/powerpc/spapr.h
create mode 100644 tools/kvm/powerpc/spapr_hcall.c
create mode 100644 tools/kvm/powerpc/spapr_rtas.c
diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 4ee4805..e2a7190 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -131,6 +131,8 @@ ifeq ($(uname_M), ppc64)
OBJS += powerpc/irq.o
OBJS += powerpc/kvm.o
OBJS += powerpc/kvm-cpu.o
+ OBJS += powerpc/spapr_hcall.o
+ OBJS += powerpc/spapr_rtas.o
ARCH_INCLUDE := powerpc/include
CFLAGS += -m64
LIBS += -lfdt
diff --git a/tools/kvm/powerpc/kvm-cpu.c b/tools/kvm/powerpc/kvm-cpu.c
index ea99666..ef3db4d 100644
--- a/tools/kvm/powerpc/kvm-cpu.c
+++ b/tools/kvm/powerpc/kvm-cpu.c
@@ -14,6 +14,8 @@
#include "kvm/util.h"
#include "kvm/kvm.h"
+#include "spapr.h"
+
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <signal.h>
@@ -150,6 +152,10 @@ bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu)
bool ret = true;
struct kvm_run *run = vcpu->kvm_run;
switch(run->exit_reason) {
+ case KVM_EXIT_PAPR_HCALL:
+ run->papr_hcall.ret = spapr_hypercall(vcpu, run->papr_hcall.nr,
+ (target_ulong*)run->papr_hcall.args);
+ break;
default:
ret = false;
}
diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
index 95ed1cc..b3570a9 100644
--- a/tools/kvm/powerpc/kvm.c
+++ b/tools/kvm/powerpc/kvm.c
@@ -14,6 +14,8 @@
#include "kvm/kvm.h"
#include "kvm/util.h"
+#include "spapr.h"
+
#include <linux/kvm.h>
#include <sys/types.h>
@@ -106,6 +108,11 @@ void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char *hugetlbfs_
if (cap_ppc_rma == 2)
die("Need contiguous RMA allocation on this hardware, "
"which is not yet supported.");
+
+ /* Do these before FDT setup, IRQ setup, etc. */
+ /* FIXME: SPAPR-specific */
+ hypercall_init();
+ register_core_rtas();
}
void kvm__irq_line(struct kvm *kvm, int irq, int level)
@@ -182,6 +189,11 @@ static uint32_t mfpvr(void)
return r;
}
+/*
+ * Set up the FDT for the kernel: This function is currently fairly SPAPR-heavy,
+ * and whilst most PPC targets will require CPU/memory nodes, others like RTAS
+ * should eventually be added separately.
+ */
static void setup_fdt(struct kvm *kvm)
{
uint64_t mem_reg_property[] = { 0, cpu_to_be64(kvm->ram_size) };
@@ -208,6 +220,20 @@ static void setup_fdt(struct kvm *kvm)
_FDT(fdt_property_cell(fdt, "#address-cells", 0x2));
_FDT(fdt_property_cell(fdt, "#size-cells", 0x2));
+ /* RTAS */
+ _FDT(fdt_begin_node(fdt, "rtas"));
+ /* This is what the kernel uses to switch 'We're an LPAR'! */
+ _FDT(fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop_kvm,
+ sizeof(hypertas_prop_kvm)));
+ _FDT(fdt_property_cell(fdt, "linux,rtas-base", kvm->rtas_gra));
+ _FDT(fdt_property_cell(fdt, "linux,rtas-entry", kvm->rtas_gra));
+ _FDT(fdt_property_cell(fdt, "rtas-size", kvm->rtas_size));
+ /* Now add properties for all RTAS tokens: */
+ if (spapr_rtas_fdt_setup(kvm, fdt))
+ die("Couldn't create RTAS FDT properties\n");
+
+ _FDT(fdt_end_node(fdt));
+
/* /chosen */
_FDT(fdt_begin_node(fdt, "chosen"));
/* cmdline */
@@ -319,7 +345,25 @@ static void setup_fdt(struct kvm *kvm)
*/
void kvm__arch_setup_firmware(struct kvm *kvm)
{
- /* Load RTAS */
+ /*
+ * Set up RTAS stub. All it is is a single hypercall:
+ * 0: 7c 64 1b 78 mr r4,r3
+ * 4: 3c 60 00 00 lis r3,0
+ * 8: 60 63 f0 00 ori r3,r3,61440
+ * c: 44 00 00 22 sc 1
+ * 10: 4e 80 00 20 blr
+ */
+ uint32_t *rtas = guest_flat_to_host(kvm, kvm->rtas_gra);
+
+ rtas[0] = 0x7c641b78;
+ rtas[1] = 0x3c600000;
+ rtas[2] = 0x6063f000;
+ rtas[3] = 0x44000022;
+ rtas[4] = 0x4e800020;
+ kvm->rtas_size = 20;
+
+ pr_info("Set up %ld bytes of RTAS at 0x%lx\n",
+ kvm->rtas_size, kvm->rtas_gra);
/* Load SLOF */
diff --git a/tools/kvm/powerpc/spapr.h b/tools/kvm/powerpc/spapr.h
new file mode 100644
index 0000000..57cece1
--- /dev/null
+++ b/tools/kvm/powerpc/spapr.h
@@ -0,0 +1,105 @@
+/*
+ * SPAPR definitions and declarations
+ *
+ * Borrowed heavily from QEMU's spapr.h,
+ * Copyright (c) 2010 David Gibson, IBM Corporation.
+ *
+ * Modifications by Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#if !defined(__HW_SPAPR_H__)
+#define __HW_SPAPR_H__
+
+#include <inttypes.h>
+#include "kvm/kvm.h"
+#include "kvm/kvm-cpu.h"
+
+typedef unsigned long target_ulong;
+typedef uintptr_t target_phys_addr_t;
+
+
+#define H_SUCCESS 0
+#define H_HARDWARE -1 /* Hardware error */
+#define H_FUNCTION -2 /* Function not supported */
+#define H_PRIVILEGE -3 /* Caller not privileged */
+#define H_PARAMETER -4 /* Parameter invalid, out-of-range or conflicting */
+
+/* pSeries hypervisor opcodes */
+#define H_SET_DABR 0x28
+#define H_LOGICAL_CI_LOAD 0x3c
+#define H_LOGICAL_CI_STORE 0x40
+#define H_LOGICAL_CACHE_LOAD 0x44
+#define H_LOGICAL_CACHE_STORE 0x48
+#define H_LOGICAL_ICBI 0x4c
+#define H_LOGICAL_DCBF 0x50
+#define H_GET_TERM_CHAR 0x54
+#define H_PUT_TERM_CHAR 0x58
+
+/* XICS/IRQ controller hcalls */
+#define H_EOI 0x64
+#define H_CPPR 0x68
+#define H_IPI 0x6c
+#define H_IPOLL 0x70
+#define H_XIRR 0x74
+
+#define MAX_HCALL_OPCODE H_XIRR
+
+/*
+ * The hcalls above are standardized in PAPR and implemented by pHyp
+ * as well.
+ *
+ * We also need some hcalls which are specific to qemu / KVM-on-POWER.
+ * So far we just need one for H_RTAS, but in future we'll need more
+ * for extensions like virtio. We put those into the 0xf000-0xfffc
+ * range which is reserved by PAPR for "platform-specific" hcalls.
+ */
+#define KVMPPC_HCALL_BASE 0xf000
+#define KVMPPC_H_RTAS (KVMPPC_HCALL_BASE + 0x0)
+#define KVMPPC_HCALL_MAX KVMPPC_H_RTAS
+
+#define DEBUG_SPAPR_HCALLS
+
+#ifdef DEBUG_SPAPR_HCALLS
+#define hcall_dprintf(fmt, ...) \
+ do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define hcall_dprintf(fmt, ...) \
+ do { } while (0)
+#endif
+
+typedef target_ulong (*spapr_hcall_fn)(struct kvm_cpu *vcpu,
+ target_ulong opcode,
+ target_ulong *args);
+
+void hypercall_init(void);
+void register_core_rtas(void);
+
+void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn);
+target_ulong spapr_hypercall(struct kvm_cpu *vcpu, target_ulong opcode,
+ target_ulong *args);
+
+int spapr_rtas_fdt_setup(struct kvm *kvm, void *fdt);
+
+static inline uint32_t rtas_ld(struct kvm *kvm, target_ulong phys, int n)
+{
+ return *((uint32_t *)guest_flat_to_host(kvm, phys + 4*n));
+}
+
+static inline void rtas_st(struct kvm *kvm, target_ulong phys, int n, uint32_t val)
+{
+ *((uint32_t *)guest_flat_to_host(kvm, phys + 4*n)) = val;
+}
+
+typedef void (*spapr_rtas_fn)(struct kvm_cpu *vcpu, uint32_t token,
+ uint32_t nargs, target_ulong args,
+ uint32_t nret, target_ulong rets);
+void spapr_rtas_register(const char *name, spapr_rtas_fn fn);
+target_ulong spapr_rtas_call(struct kvm_cpu *vcpu,
+ uint32_t token, uint32_t nargs, target_ulong args,
+ uint32_t nret, target_ulong rets);
+
+#endif /* !defined (__HW_SPAPR_H__) */
diff --git a/tools/kvm/powerpc/spapr_hcall.c b/tools/kvm/powerpc/spapr_hcall.c
new file mode 100644
index 0000000..f948400
--- /dev/null
+++ b/tools/kvm/powerpc/spapr_hcall.c
@@ -0,0 +1,132 @@
+/*
+ * SPAPR hypercalls
+ *
+ * Borrowed heavily from QEMU's spapr_hcall.c,
+ * Copyright (c) 2010 David Gibson, IBM Corporation.
+ *
+ * Copyright (c) 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "spapr.h"
+#include "kvm/util.h"
+#include "kvm/kvm.h"
+#include "kvm/kvm-cpu.h"
+
+#include <stdio.h>
+#include <assert.h>
+
+static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
+static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX -
+ KVMPPC_HCALL_BASE + 1];
+
+static target_ulong h_set_dabr(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
+{
+ pr_warning("Implement %s!\n", __PRETTY_FUNCTION__);
+ return H_HARDWARE;
+}
+
+static target_ulong h_rtas(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
+{
+ target_ulong rtas_r3 = args[0];
+ /*
+ * Pointer read from phys mem; these ptrs cannot be MMIO (!) so just
+ * reference guest RAM directly.
+ */
+ uint32_t token, nargs, nret;
+
+ token = rtas_ld(vcpu->kvm, rtas_r3, 0);
+ nargs = rtas_ld(vcpu->kvm, rtas_r3, 1);
+ nret = rtas_ld(vcpu->kvm, rtas_r3, 2);
+
+ return spapr_rtas_call(vcpu, token, nargs, rtas_r3 + 12,
+ nret, rtas_r3 + 12 + 4*nargs);
+}
+
+static target_ulong h_logical_load(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
+{
+ /* SLOF will require these, though kernel doesn't. */
+ die(__PRETTY_FUNCTION__);
+ return H_PARAMETER;
+}
+
+static target_ulong h_logical_store(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
+{
+ /* SLOF will require these, though kernel doesn't. */
+ die(__PRETTY_FUNCTION__);
+ return H_PARAMETER;
+}
+
+static target_ulong h_logical_icbi(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
+{
+ /* Nothing to do on emulation, KVM will trap this in the kernel */
+ return H_SUCCESS;
+}
+
+static target_ulong h_logical_dcbf(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
+{
+ /* Nothing to do on emulation, KVM will trap this in the kernel */
+ return H_SUCCESS;
+}
+
+void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn)
+{
+ spapr_hcall_fn *slot;
+
+ if (opcode <= MAX_HCALL_OPCODE) {
+ assert((opcode & 0x3) == 0);
+
+ slot = &papr_hypercall_table[opcode / 4];
+ } else {
+ assert((opcode >= KVMPPC_HCALL_BASE) &&
+ (opcode <= KVMPPC_HCALL_MAX));
+
+ slot = &kvmppc_hypercall_table[opcode - KVMPPC_HCALL_BASE];
+ }
+
+ assert(!(*slot) || (fn == *slot));
+ *slot = fn;
+}
+
+target_ulong spapr_hypercall(struct kvm_cpu *vcpu, target_ulong opcode,
+ target_ulong *args)
+{
+ if ((opcode <= MAX_HCALL_OPCODE)
+ && ((opcode & 0x3) == 0)) {
+ spapr_hcall_fn fn = papr_hypercall_table[opcode / 4];
+
+ if (fn) {
+ return fn(vcpu, opcode, args);
+ }
+ } else if ((opcode >= KVMPPC_HCALL_BASE) &&
+ (opcode <= KVMPPC_HCALL_MAX)) {
+ spapr_hcall_fn fn = kvmppc_hypercall_table[opcode -
+ KVMPPC_HCALL_BASE];
+
+ if (fn) {
+ return fn(vcpu, opcode, args);
+ }
+ }
+
+ hcall_dprintf("Unimplemented hcall 0x%lx\n", opcode);
+ return H_FUNCTION;
+}
+
+void hypercall_init(void)
+{
+ /* hcall-dabr */
+ spapr_register_hypercall(H_SET_DABR, h_set_dabr);
+
+ spapr_register_hypercall(H_LOGICAL_CI_LOAD, h_logical_load);
+ spapr_register_hypercall(H_LOGICAL_CI_STORE, h_logical_store);
+ spapr_register_hypercall(H_LOGICAL_CACHE_LOAD, h_logical_load);
+ spapr_register_hypercall(H_LOGICAL_CACHE_STORE, h_logical_store);
+ spapr_register_hypercall(H_LOGICAL_ICBI, h_logical_icbi);
+ spapr_register_hypercall(H_LOGICAL_DCBF, h_logical_dcbf);
+
+ /* KVM-PPC specific hcalls */
+ spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas);
+}
diff --git a/tools/kvm/powerpc/spapr_rtas.c b/tools/kvm/powerpc/spapr_rtas.c
new file mode 100644
index 0000000..72c6b02
--- /dev/null
+++ b/tools/kvm/powerpc/spapr_rtas.c
@@ -0,0 +1,230 @@
+/*
+ * SPAPR base RTAS calls
+ *
+ * Borrowed heavily from QEMU's spapr_rtas.c
+ * Copyright (c) 2010-2011 David Gibson, IBM Corporation.
+ *
+ * Modifications copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "kvm/kvm.h"
+#include "kvm/kvm-cpu.h"
+#include "kvm/util.h"
+#include "kvm/term.h"
+
+#include "spapr.h"
+
+#include <stdio.h>
+#include <assert.h>
+#include <libfdt.h>
+
+#define TOKEN_BASE 0x2000
+#define TOKEN_MAX 0x100
+
+#define RTAS_CONSOLE
+
+static struct rtas_call {
+ const char *name;
+ spapr_rtas_fn fn;
+} rtas_table[TOKEN_MAX];
+
+struct rtas_call *rtas_next = rtas_table;
+
+
+static void rtas_display_character(struct kvm_cpu *vcpu,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ char c = rtas_ld(vcpu->kvm, args, 0);
+ term_putc(CONSOLE_HV, &c, 1, 0);
+ rtas_st(vcpu->kvm, rets, 0, 0);
+}
+
+#ifdef RTAS_CONSOLE
+static void rtas_put_term_char(struct kvm_cpu *vcpu,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ char c = rtas_ld(vcpu->kvm, args, 0);
+ term_putc(CONSOLE_HV, &c, 1, 0);
+ rtas_st(vcpu->kvm, rets, 0, 0);
+}
+
+static void rtas_get_term_char(struct kvm_cpu *vcpu,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ int c;
+ if (term_readable(CONSOLE_HV, 0) &&
+ (c = term_getc(CONSOLE_HV, 0)) >= 0) {
+ rtas_st(vcpu->kvm, rets, 0, 0);
+ rtas_st(vcpu->kvm, rets, 1, c);
+ } else {
+ rtas_st(vcpu->kvm, rets, 0, -2);
+ }
+}
+#endif
+
+static void rtas_get_time_of_day(struct kvm_cpu *vcpu,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ struct tm tm;
+ time_t tnow;
+
+ if (nret != 8) {
+ rtas_st(vcpu->kvm, rets, 0, -3);
+ return;
+ }
+
+ tnow = time(NULL);
+ /* Guest time is currently not offset in any way. */
+ gmtime_r(&tnow, &tm);
+
+ rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+ rtas_st(vcpu->kvm, rets, 1, tm.tm_year + 1900);
+ rtas_st(vcpu->kvm, rets, 2, tm.tm_mon + 1);
+ rtas_st(vcpu->kvm, rets, 3, tm.tm_mday);
+ rtas_st(vcpu->kvm, rets, 4, tm.tm_hour);
+ rtas_st(vcpu->kvm, rets, 5, tm.tm_min);
+ rtas_st(vcpu->kvm, rets, 6, tm.tm_sec);
+ rtas_st(vcpu->kvm, rets, 7, 0);
+}
+
+static void rtas_set_time_of_day(struct kvm_cpu *vcpu,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ pr_warning("%s called; TOD set ignored.\n", __FUNCTION__);
+}
+
+static void rtas_power_off(struct kvm_cpu *vcpu,
+ uint32_t token, uint32_t nargs, target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ if (nargs != 2 || nret != 1) {
+ rtas_st(vcpu->kvm, rets, 0, -3);
+ return;
+ }
+ kvm_cpu__reboot();
+}
+
+static void rtas_query_cpu_stopped_state(struct kvm_cpu *vcpu,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ unsigned long id;
+
+ if (nargs != 1 || nret != 2) {
+ rtas_st(vcpu->kvm, rets, 0, -3);
+ return;
+ }
+
+ id = rtas_ld(vcpu->kvm, args, 0);
+
+ /* We do start all CPUs. So just return true. */
+ rtas_st(vcpu->kvm, rets, 0, 0);
+ rtas_st(vcpu->kvm, rets, 1, 2);
+}
+
+static void rtas_start_cpu(struct kvm_cpu *vcpu,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ die(__FUNCTION__);
+}
+
+target_ulong spapr_rtas_call(struct kvm_cpu *vcpu,
+ uint32_t token, uint32_t nargs, target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ if ((token >= TOKEN_BASE)
+ && ((token - TOKEN_BASE) < TOKEN_MAX)) {
+ struct rtas_call *call = rtas_table + (token - TOKEN_BASE);
+
+ if (call->fn) {
+ call->fn(vcpu, token, nargs, args, nret, rets);
+ return H_SUCCESS;
+ }
+ }
+
+ /*
+ * HACK: Some Linux early debug code uses RTAS display-character,
+ * but assumes the token value is 0xa (which it is on some real
+ * machines) without looking it up in the device tree. This
+ * special case makes this work
+ */
+ if (token == 0xa) {
+ rtas_display_character(vcpu, 0xa, nargs, args, nret, rets);
+ return H_SUCCESS;
+ }
+
+ hcall_dprintf("Unknown RTAS token 0x%x\n", token);
+ rtas_st(vcpu->kvm, rets, 0, -3);
+ return H_PARAMETER;
+}
+
+void spapr_rtas_register(const char *name, spapr_rtas_fn fn)
+{
+ assert(rtas_next < (rtas_table + TOKEN_MAX));
+
+ rtas_next->name = name;
+ rtas_next->fn = fn;
+
+ rtas_next++;
+}
+
+/*
+ * This is called from the context of an open /rtas node, in order to add
+ * properties for the rtas call tokens.
+ */
+int spapr_rtas_fdt_setup(struct kvm *kvm, void *fdt)
+{
+ int ret;
+ int i;
+
+ for (i = 0; i < TOKEN_MAX; i++) {
+ struct rtas_call *call = &rtas_table[i];
+
+ if (!call->fn) {
+ continue;
+ }
+
+ ret = fdt_property_cell(fdt, call->name, i + TOKEN_BASE);
+
+ if (ret < 0) {
+ pr_warning("Couldn't add rtas token for %s: %s\n",
+ call->name, fdt_strerror(ret));
+ return ret;
+ }
+
+ }
+ return 0;
+}
+
+void register_core_rtas(void)
+{
+ spapr_rtas_register("display-character", rtas_display_character);
+ spapr_rtas_register("get-time-of-day", rtas_get_time_of_day);
+ spapr_rtas_register("set-time-of-day", rtas_set_time_of_day);
+ spapr_rtas_register("power-off", rtas_power_off);
+ spapr_rtas_register("query-cpu-stopped-state",
+ rtas_query_cpu_stopped_state);
+ spapr_rtas_register("start-cpu", rtas_start_cpu);
+#ifdef RTAS_CONSOLE
+ /* These are unused: We do console I/O via hcalls, not rtas. */
+ spapr_rtas_register("put-term-char", rtas_put_term_char);
+ spapr_rtas_register("get-term-char", rtas_get_term_char);
+#endif
+}
--
1.7.0.4
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH V2 2/6] kvm tools: Add SPAPR PPC64 hcall & rtascall structure
2011-12-13 7:10 ` [PATCH V2 2/6] kvm tools: Add SPAPR PPC64 hcall & rtascall structure Matt Evans
@ 2011-12-14 2:32 ` David Gibson
0 siblings, 0 replies; 12+ messages in thread
From: David Gibson @ 2011-12-14 2:32 UTC (permalink / raw)
To: Matt Evans
Cc: kvm, kvm-ppc, penberg, asias.hejun, levinsasha928, gorcunov, aik
On Tue, Dec 13, 2011 at 06:10:46PM +1100, Matt Evans wrote:
> This patch adds the basic structure for HV calls, their registration and some of
> the simpler calls. A similar layout for RTAS calls is also added, again with
> some of the simpler RTAS calls used by the guest. The SPAPR RTAS stub is
> generated inline. Also, nodes for RTAS are added to the device
> tree.
[snip]
> diff --git a/tools/kvm/powerpc/spapr.h b/tools/kvm/powerpc/spapr.h
> new file mode 100644
> index 0000000..57cece1
> --- /dev/null
> +++ b/tools/kvm/powerpc/spapr.h
> @@ -0,0 +1,105 @@
> +/*
> + * SPAPR definitions and declarations
> + *
> + * Borrowed heavily from QEMU's spapr.h,
> + * Copyright (c) 2010 David Gibson, IBM Corporation.
So, most of the content of this file in qemu, I in turn took from
arch/powerpc/include/asm/hvcall.h in the kernel tree. You might be
better off using that directly.
[snip]
> +static target_ulong h_logical_icbi(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
> +{
> + /* Nothing to do on emulation, KVM will trap this in the kernel */
> + return H_SUCCESS;
hcalls that need to be handled by the host kernel should probably dump
an error here, rather than silently doing nothing, since if the host
kernel does handle them here they should never reach userspace at all.
> +}
> +
> +static target_ulong h_logical_dcbf(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
> +{
> + /* Nothing to do on emulation, KVM will trap this in the kernel */
> + return H_SUCCESS;
> +}
> +
> +void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn)
> +{
> + spapr_hcall_fn *slot;
> +
> + if (opcode <= MAX_HCALL_OPCODE) {
> + assert((opcode & 0x3) == 0);
> +
> + slot = &papr_hypercall_table[opcode / 4];
> + } else {
> + assert((opcode >= KVMPPC_HCALL_BASE) &&
> + (opcode <= KVMPPC_HCALL_MAX));
> +
> + slot = &kvmppc_hypercall_table[opcode - KVMPPC_HCALL_BASE];
> + }
> +
> + assert(!(*slot) || (fn == *slot));
> + *slot = fn;
> +}
> +
> +target_ulong spapr_hypercall(struct kvm_cpu *vcpu, target_ulong opcode,
> + target_ulong *args)
> +{
> + if ((opcode <= MAX_HCALL_OPCODE)
> + && ((opcode & 0x3) == 0)) {
> + spapr_hcall_fn fn = papr_hypercall_table[opcode / 4];
> +
> + if (fn) {
> + return fn(vcpu, opcode, args);
> + }
> + } else if ((opcode >= KVMPPC_HCALL_BASE) &&
> + (opcode <= KVMPPC_HCALL_MAX)) {
> + spapr_hcall_fn fn = kvmppc_hypercall_table[opcode -
> + KVMPPC_HCALL_BASE];
> +
> + if (fn) {
> + return fn(vcpu, opcode, args);
> + }
> + }
> +
> + hcall_dprintf("Unimplemented hcall 0x%lx\n", opcode);
> + return H_FUNCTION;
> +}
> +
> +void hypercall_init(void)
> +{
> + /* hcall-dabr */
> + spapr_register_hypercall(H_SET_DABR, h_set_dabr);
> +
> + spapr_register_hypercall(H_LOGICAL_CI_LOAD, h_logical_load);
> + spapr_register_hypercall(H_LOGICAL_CI_STORE, h_logical_store);
> + spapr_register_hypercall(H_LOGICAL_CACHE_LOAD, h_logical_load);
> + spapr_register_hypercall(H_LOGICAL_CACHE_STORE, h_logical_store);
> + spapr_register_hypercall(H_LOGICAL_ICBI, h_logical_icbi);
> + spapr_register_hypercall(H_LOGICAL_DCBF, h_logical_dcbf);
> +
> + /* KVM-PPC specific hcalls */
> + spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas);
> +}
> diff --git a/tools/kvm/powerpc/spapr_rtas.c b/tools/kvm/powerpc/spapr_rtas.c
> new file mode 100644
> index 0000000..72c6b02
> --- /dev/null
> +++ b/tools/kvm/powerpc/spapr_rtas.c
> @@ -0,0 +1,230 @@
> +/*
> + * SPAPR base RTAS calls
> + *
> + * Borrowed heavily from QEMU's spapr_rtas.c
> + * Copyright (c) 2010-2011 David Gibson, IBM Corporation.
> + *
> + * Modifications copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published
> + * by the Free Software Foundation.
> + */
> +
> +#include "kvm/kvm.h"
> +#include "kvm/kvm-cpu.h"
> +#include "kvm/util.h"
> +#include "kvm/term.h"
> +
> +#include "spapr.h"
> +
> +#include <stdio.h>
> +#include <assert.h>
> +#include <libfdt.h>
> +
> +#define TOKEN_BASE 0x2000
> +#define TOKEN_MAX 0x100
> +
> +#define RTAS_CONSOLE
> +
> +static struct rtas_call {
> + const char *name;
> + spapr_rtas_fn fn;
> +} rtas_table[TOKEN_MAX];
> +
> +struct rtas_call *rtas_next = rtas_table;
> +
> +
> +static void rtas_display_character(struct kvm_cpu *vcpu,
> + uint32_t token, uint32_t nargs,
> + target_ulong args,
> + uint32_t nret, target_ulong rets)
> +{
> + char c = rtas_ld(vcpu->kvm, args, 0);
> + term_putc(CONSOLE_HV, &c, 1, 0);
> + rtas_st(vcpu->kvm, rets, 0, 0);
> +}
> +
> +#ifdef RTAS_CONSOLE
> +static void rtas_put_term_char(struct kvm_cpu *vcpu,
> + uint32_t token, uint32_t nargs,
> + target_ulong args,
> + uint32_t nret, target_ulong rets)
> +{
> + char c = rtas_ld(vcpu->kvm, args, 0);
> + term_putc(CONSOLE_HV, &c, 1, 0);
> + rtas_st(vcpu->kvm, rets, 0, 0);
> +}
> +
> +static void rtas_get_term_char(struct kvm_cpu *vcpu,
> + uint32_t token, uint32_t nargs,
> + target_ulong args,
> + uint32_t nret, target_ulong rets)
> +{
> + int c;
> + if (term_readable(CONSOLE_HV, 0) &&
> + (c = term_getc(CONSOLE_HV, 0)) >= 0) {
> + rtas_st(vcpu->kvm, rets, 0, 0);
> + rtas_st(vcpu->kvm, rets, 1, c);
> + } else {
> + rtas_st(vcpu->kvm, rets, 0, -2);
> + }
> +}
> +#endif
> +
> +static void rtas_get_time_of_day(struct kvm_cpu *vcpu,
> + uint32_t token, uint32_t nargs,
> + target_ulong args,
> + uint32_t nret, target_ulong rets)
> +{
> + struct tm tm;
> + time_t tnow;
> +
> + if (nret != 8) {
> + rtas_st(vcpu->kvm, rets, 0, -3);
> + return;
> + }
> +
> + tnow = time(NULL);
> + /* Guest time is currently not offset in any way. */
> + gmtime_r(&tnow, &tm);
> +
> + rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
> + rtas_st(vcpu->kvm, rets, 1, tm.tm_year + 1900);
> + rtas_st(vcpu->kvm, rets, 2, tm.tm_mon + 1);
> + rtas_st(vcpu->kvm, rets, 3, tm.tm_mday);
> + rtas_st(vcpu->kvm, rets, 4, tm.tm_hour);
> + rtas_st(vcpu->kvm, rets, 5, tm.tm_min);
> + rtas_st(vcpu->kvm, rets, 6, tm.tm_sec);
> + rtas_st(vcpu->kvm, rets, 7, 0);
> +}
> +
> +static void rtas_set_time_of_day(struct kvm_cpu *vcpu,
> + uint32_t token, uint32_t nargs,
> + target_ulong args,
> + uint32_t nret, target_ulong rets)
> +{
> + pr_warning("%s called; TOD set ignored.\n", __FUNCTION__);
> +}
> +
> +static void rtas_power_off(struct kvm_cpu *vcpu,
> + uint32_t token, uint32_t nargs, target_ulong args,
> + uint32_t nret, target_ulong rets)
> +{
> + if (nargs != 2 || nret != 1) {
> + rtas_st(vcpu->kvm, rets, 0, -3);
> + return;
> + }
> + kvm_cpu__reboot();
> +}
> +
> +static void rtas_query_cpu_stopped_state(struct kvm_cpu *vcpu,
> + uint32_t token, uint32_t nargs,
> + target_ulong args,
> + uint32_t nret, target_ulong rets)
> +{
> + unsigned long id;
> +
> + if (nargs != 1 || nret != 2) {
> + rtas_st(vcpu->kvm, rets, 0, -3);
> + return;
> + }
> +
> + id = rtas_ld(vcpu->kvm, args, 0);
> +
> + /* We do start all CPUs. So just return true. */
> + rtas_st(vcpu->kvm, rets, 0, 0);
> + rtas_st(vcpu->kvm, rets, 1, 2);
> +}
> +
> +static void rtas_start_cpu(struct kvm_cpu *vcpu,
> + uint32_t token, uint32_t nargs,
> + target_ulong args,
> + uint32_t nret, target_ulong rets)
> +{
> + die(__FUNCTION__);
> +}
> +
> +target_ulong spapr_rtas_call(struct kvm_cpu *vcpu,
> + uint32_t token, uint32_t nargs, target_ulong args,
> + uint32_t nret, target_ulong rets)
> +{
> + if ((token >= TOKEN_BASE)
> + && ((token - TOKEN_BASE) < TOKEN_MAX)) {
> + struct rtas_call *call = rtas_table + (token - TOKEN_BASE);
> +
> + if (call->fn) {
> + call->fn(vcpu, token, nargs, args, nret, rets);
> + return H_SUCCESS;
> + }
> + }
> +
> + /*
> + * HACK: Some Linux early debug code uses RTAS display-character,
> + * but assumes the token value is 0xa (which it is on some real
> + * machines) without looking it up in the device tree. This
> + * special case makes this work
> + */
> + if (token == 0xa) {
> + rtas_display_character(vcpu, 0xa, nargs, args, nret, rets);
> + return H_SUCCESS;
> + }
> +
> + hcall_dprintf("Unknown RTAS token 0x%x\n", token);
> + rtas_st(vcpu->kvm, rets, 0, -3);
> + return H_PARAMETER;
> +}
> +
> +void spapr_rtas_register(const char *name, spapr_rtas_fn fn)
> +{
> + assert(rtas_next < (rtas_table + TOKEN_MAX));
> +
> + rtas_next->name = name;
> + rtas_next->fn = fn;
> +
> + rtas_next++;
> +}
> +
> +/*
> + * This is called from the context of an open /rtas node, in order to add
> + * properties for the rtas call tokens.
> + */
> +int spapr_rtas_fdt_setup(struct kvm *kvm, void *fdt)
> +{
> + int ret;
> + int i;
> +
> + for (i = 0; i < TOKEN_MAX; i++) {
> + struct rtas_call *call = &rtas_table[i];
> +
> + if (!call->fn) {
> + continue;
> + }
> +
> + ret = fdt_property_cell(fdt, call->name, i + TOKEN_BASE);
> +
> + if (ret < 0) {
> + pr_warning("Couldn't add rtas token for %s: %s\n",
> + call->name, fdt_strerror(ret));
> + return ret;
> + }
> +
> + }
> + return 0;
> +}
> +
> +void register_core_rtas(void)
> +{
> + spapr_rtas_register("display-character", rtas_display_character);
> + spapr_rtas_register("get-time-of-day", rtas_get_time_of_day);
> + spapr_rtas_register("set-time-of-day", rtas_set_time_of_day);
> + spapr_rtas_register("power-off", rtas_power_off);
> + spapr_rtas_register("query-cpu-stopped-state",
> + rtas_query_cpu_stopped_state);
> + spapr_rtas_register("start-cpu", rtas_start_cpu);
> +#ifdef RTAS_CONSOLE
> + /* These are unused: We do console I/O via hcalls, not rtas. */
> + spapr_rtas_register("put-term-char", rtas_put_term_char);
> + spapr_rtas_register("get-term-char", rtas_get_term_char);
> +#endif
> +}
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V2 3/6] kvm tools: Add SPAPR PPC64 HV console
2011-12-13 7:10 [PATCH V2 0/6] Add initial SPAPR PPC64 architecture support Matt Evans
2011-12-13 7:10 ` [PATCH V2 1/6] kvm tools: Generate SPAPR PPC64 guest device tree Matt Evans
2011-12-13 7:10 ` [PATCH V2 2/6] kvm tools: Add SPAPR PPC64 hcall & rtascall structure Matt Evans
@ 2011-12-13 7:10 ` Matt Evans
2011-12-13 7:10 ` [PATCH V2 4/6] kvm tools: Add PPC64 XICS interrupt controller support Matt Evans
` (2 subsequent siblings)
5 siblings, 0 replies; 12+ messages in thread
From: Matt Evans @ 2011-12-13 7:10 UTC (permalink / raw)
To: kvm, kvm-ppc; +Cc: penberg, asias.hejun, levinsasha928, gorcunov, david, aik
This adds the console code, plus VIO HV terminal nodes are added to
the device tree so the guest kernel will pick it up.
Signed-off-by: Matt Evans <matt@ozlabs.org>
---
tools/kvm/Makefile | 1 +
tools/kvm/powerpc/kvm.c | 33 ++++++++++++
tools/kvm/powerpc/spapr_hvcons.c | 102 ++++++++++++++++++++++++++++++++++++++
tools/kvm/powerpc/spapr_hvcons.h | 19 +++++++
4 files changed, 155 insertions(+), 0 deletions(-)
create mode 100644 tools/kvm/powerpc/spapr_hvcons.c
create mode 100644 tools/kvm/powerpc/spapr_hvcons.h
diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index e2a7190..0a576be 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -133,6 +133,7 @@ ifeq ($(uname_M), ppc64)
OBJS += powerpc/kvm-cpu.o
OBJS += powerpc/spapr_hcall.o
OBJS += powerpc/spapr_rtas.o
+ OBJS += powerpc/spapr_hvcons.o
ARCH_INCLUDE := powerpc/include
CFLAGS += -m64
LIBS += -lfdt
diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
index b3570a9..06fccef 100644
--- a/tools/kvm/powerpc/kvm.c
+++ b/tools/kvm/powerpc/kvm.c
@@ -15,6 +15,7 @@
#include "kvm/util.h"
#include "spapr.h"
+#include "spapr_hvcons.h"
#include <linux/kvm.h>
@@ -113,6 +114,8 @@ void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char *hugetlbfs_
/* FIXME: SPAPR-specific */
hypercall_init();
register_core_rtas();
+ /* Now that hypercalls are initialised, register a couple for the console: */
+ spapr_hvcons_init();
}
void kvm__irq_line(struct kvm *kvm, int irq, int level)
@@ -126,6 +129,12 @@ void kvm__irq_trigger(struct kvm *kvm, int irq)
kvm__irq_line(kvm, irq, 0);
}
+void kvm__arch_periodic_poll(struct kvm *kvm)
+{
+ /* FIXME: Should register callbacks to platform-specific polls */
+ spapr_hvcons_poll(kvm);
+}
+
int load_flat_binary(struct kvm *kvm, int fd_kernel, int fd_initrd, const char *kernel_cmdline)
{
void *p;
@@ -248,6 +257,13 @@ static void setup_fdt(struct kvm *kvm)
_FDT(fdt_property(fdt, "linux,initrd-end",
&ird_end_prop, sizeof(ird_end_prop)));
}
+
+ /*
+ * stdout-path: This is assuming we're using the HV console. Also, the
+ * address is hardwired until we do a VIO bus.
+ */
+ _FDT(fdt_property_string(fdt, "linux,stdout-path",
+ "/vdevice/vty@30000000"));
_FDT(fdt_end_node(fdt));
/*
@@ -331,6 +347,23 @@ static void setup_fdt(struct kvm *kvm)
}
_FDT(fdt_end_node(fdt));
+ /*
+ * VIO: See comment in linux,stdout-path; we don't yet represent a VIO
+ * bus/address allocation so addresses are hardwired here.
+ */
+ _FDT(fdt_begin_node(fdt, "vdevice"));
+ _FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
+ _FDT(fdt_property_cell(fdt, "#size-cells", 0x0));
+ _FDT(fdt_property_string(fdt, "device_type", "vdevice"));
+ _FDT(fdt_property_string(fdt, "compatible", "IBM,vdevice"));
+ _FDT(fdt_begin_node(fdt, "vty@30000000"));
+ _FDT(fdt_property_string(fdt, "name", "vty"));
+ _FDT(fdt_property_string(fdt, "device_type", "serial"));
+ _FDT(fdt_property_string(fdt, "compatible", "hvterm1"));
+ _FDT(fdt_property_cell(fdt, "reg", 0x30000000));
+ _FDT(fdt_end_node(fdt));
+ _FDT(fdt_end_node(fdt));
+
/* Finalise: */
_FDT(fdt_end_node(fdt)); /* Root node */
_FDT(fdt_finish(fdt));
diff --git a/tools/kvm/powerpc/spapr_hvcons.c b/tools/kvm/powerpc/spapr_hvcons.c
new file mode 100644
index 0000000..511dbe1
--- /dev/null
+++ b/tools/kvm/powerpc/spapr_hvcons.c
@@ -0,0 +1,102 @@
+/*
+ * SPAPR HV console
+ *
+ * Borrowed lightly from QEMU's spapr_vty.c, Copyright (c) 2010 David Gibson,
+ * IBM Corporation.
+ *
+ * Copyright (c) 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "kvm/term.h"
+#include "kvm/kvm.h"
+#include "kvm/kvm-cpu.h"
+#include "kvm/util.h"
+#include "spapr.h"
+#include "spapr_hvcons.h"
+
+#include <stdio.h>
+#include <sys/uio.h>
+#include <errno.h>
+
+#include <linux/byteorder.h>
+
+union hv_chario {
+ struct {
+ uint64_t char0_7;
+ uint64_t char8_15;
+ } a;
+ uint8_t buf[16];
+};
+
+static unsigned long h_put_term_char(struct kvm_cpu *vcpu, unsigned long opcode, unsigned long *args)
+{
+ /* To do: Read register from args[0], and check it. */
+ unsigned long len = args[1];
+ union hv_chario data;
+ struct iovec iov;
+
+ if (len > 16) {
+ return H_PARAMETER;
+ }
+ data.a.char0_7 = cpu_to_be64(args[2]);
+ data.a.char8_15 = cpu_to_be64(args[3]);
+
+ iov.iov_base = data.buf;
+ iov.iov_len = len;
+ do {
+ int ret;
+
+ ret = term_putc_iov(CONSOLE_HV, &iov, 1, 0);
+ if (ret < 0) {
+ die("term_putc_iov error %d!\n", errno);
+ }
+ iov.iov_base += ret;
+ iov.iov_len -= ret;
+ } while (iov.iov_len > 0);
+
+ return H_SUCCESS;
+}
+
+
+static unsigned long h_get_term_char(struct kvm_cpu *vcpu, unsigned long opcode, unsigned long *args)
+{
+ /* To do: Read register from args[0], and check it. */
+ unsigned long *len = args + 0;
+ unsigned long *char0_7 = args + 1;
+ unsigned long *char8_15 = args + 2;
+ union hv_chario data;
+ struct iovec iov;
+
+ if (term_readable(CONSOLE_HV, 0)) {
+ iov.iov_base = data.buf;
+ iov.iov_len = 16;
+
+ *len = term_getc_iov(CONSOLE_HV, &iov, 1, 0);
+ *char0_7 = be64_to_cpu(data.a.char0_7);
+ *char8_15 = be64_to_cpu(data.a.char8_15);
+ } else {
+ *len = 0;
+ }
+
+ return H_SUCCESS;
+}
+
+void spapr_hvcons_poll(struct kvm *kvm)
+{
+ if (term_readable(CONSOLE_HV, 0)) {
+ /*
+ * We can inject an IRQ to guest here if we want. The guest
+ * will happily poll, though, so not required.
+ */
+ }
+}
+
+void spapr_hvcons_init(void)
+{
+ spapr_register_hypercall(H_PUT_TERM_CHAR, h_put_term_char);
+ spapr_register_hypercall(H_GET_TERM_CHAR, h_get_term_char);
+}
diff --git a/tools/kvm/powerpc/spapr_hvcons.h b/tools/kvm/powerpc/spapr_hvcons.h
new file mode 100644
index 0000000..d3e4414
--- /dev/null
+++ b/tools/kvm/powerpc/spapr_hvcons.h
@@ -0,0 +1,19 @@
+/*
+ * SPAPR HV console
+ *
+ * Copyright (c) 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef spapr_hvcons_H
+#define spapr_hvcons_H
+
+#include "kvm/kvm.h"
+
+void spapr_hvcons_init(void);
+void spapr_hvcons_poll(struct kvm *kvm);
+
+#endif
--
1.7.0.4
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH V2 4/6] kvm tools: Add PPC64 XICS interrupt controller support
2011-12-13 7:10 [PATCH V2 0/6] Add initial SPAPR PPC64 architecture support Matt Evans
` (2 preceding siblings ...)
2011-12-13 7:10 ` [PATCH V2 3/6] kvm tools: Add SPAPR PPC64 HV console Matt Evans
@ 2011-12-13 7:10 ` Matt Evans
2011-12-14 2:35 ` David Gibson
2011-12-13 7:10 ` [PATCH V2 5/6] kvm tools: Add PPC64 PCI Host Bridge Matt Evans
2011-12-13 7:10 ` [PATCH V2 6/6] kvm tools: Add PPC64 kvm_cpu__emulate_io() Matt Evans
5 siblings, 1 reply; 12+ messages in thread
From: Matt Evans @ 2011-12-13 7:10 UTC (permalink / raw)
To: kvm, kvm-ppc; +Cc: penberg, asias.hejun, levinsasha928, gorcunov, david, aik
This patch adds XICS emulation code (heavily borrowed from QEMU), and wires
this into kvm_cpu__irq() to fire a CPU IRQ via KVM. A device tree entry is
also added. IPIs work, xics_alloc_irqnum() is added to allocate an external
IRQ (which will later be used by the PHB PCI code) and finally, kvm__irq_line()
can be called to raise an IRQ on XICS.
Signed-off-by: Matt Evans <matt@ozlabs.org>
---
tools/kvm/Makefile | 1 +
tools/kvm/powerpc/include/kvm/kvm-arch.h | 1 +
tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h | 2 +
tools/kvm/powerpc/irq.c | 17 +-
tools/kvm/powerpc/kvm-cpu.c | 11 +
tools/kvm/powerpc/kvm.c | 26 +-
tools/kvm/powerpc/xics.c | 514 ++++++++++++++++++++++++++
tools/kvm/powerpc/xics.h | 23 ++
8 files changed, 589 insertions(+), 6 deletions(-)
create mode 100644 tools/kvm/powerpc/xics.c
create mode 100644 tools/kvm/powerpc/xics.h
diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 0a576be..0d42acf 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -134,6 +134,7 @@ ifeq ($(uname_M), ppc64)
OBJS += powerpc/spapr_hcall.o
OBJS += powerpc/spapr_rtas.o
OBJS += powerpc/spapr_hvcons.o
+ OBJS += powerpc/xics.o
ARCH_INCLUDE := powerpc/include
CFLAGS += -m64
LIBS += -lfdt
diff --git a/tools/kvm/powerpc/include/kvm/kvm-arch.h b/tools/kvm/powerpc/include/kvm/kvm-arch.h
index 33a3827..e070c3f 100644
--- a/tools/kvm/powerpc/include/kvm/kvm-arch.h
+++ b/tools/kvm/powerpc/include/kvm/kvm-arch.h
@@ -67,6 +67,7 @@ struct kvm {
unsigned long initrd_gra;
unsigned long initrd_size;
const char *name;
+ struct icp_state *icp;
};
/* Helper for the various bits of code that generate FDT nodes */
diff --git a/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h b/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
index 64e4510..c1c6539 100644
--- a/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
+++ b/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
@@ -36,6 +36,8 @@
#define MSR_RI (1UL<<1)
#define MSR_LE (1UL<<0)
+#define POWER7_EXT_IRQ 0
+
struct kvm;
struct kvm_cpu {
diff --git a/tools/kvm/powerpc/irq.c b/tools/kvm/powerpc/irq.c
index 46aa64f..a1047d4 100644
--- a/tools/kvm/powerpc/irq.c
+++ b/tools/kvm/powerpc/irq.c
@@ -21,6 +21,15 @@
#include <stddef.h>
#include <stdlib.h>
+#include "xics.h"
+
+#define XICS_IRQS 1024
+
+/*
+ * FIXME: The code in this file assumes an SPAPR guest, using XICS. Make
+ * generic & cope with multiple PPC platform types.
+ */
+
int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)
{
fprintf(stderr, "irq__register_device(%d, [%d], [%d], [%d]\n",
@@ -30,7 +39,13 @@ int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)
void irq__init(struct kvm *kvm)
{
- fprintf(stderr, __func__);
+ /*
+ * kvm->nr_cpus is now valid; for /now/, pass
+ * this to xics_system_init(), which assumes servers
+ * are numbered 0..nrcpus. This may not really be true,
+ * but it is OK currently.
+ */
+ kvm->icp = xics_system_init(XICS_IRQS, kvm->nrcpus);
}
int irq__add_msix_route(struct kvm *kvm, struct msi_msg *msg)
diff --git a/tools/kvm/powerpc/kvm-cpu.c b/tools/kvm/powerpc/kvm-cpu.c
index ef3db4d..5ef1cbf 100644
--- a/tools/kvm/powerpc/kvm-cpu.c
+++ b/tools/kvm/powerpc/kvm-cpu.c
@@ -15,6 +15,7 @@
#include "kvm/kvm.h"
#include "spapr.h"
+#include "xics.h"
#include <sys/ioctl.h>
#include <sys/mman.h>
@@ -89,6 +90,9 @@ struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id)
*/
vcpu->is_running = true;
+ /* Register with IRQ controller (FIXME, assumes XICS) */
+ xics_cpu_register(vcpu);
+
return vcpu;
}
@@ -141,6 +145,13 @@ void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
/* kvm_cpu__irq - set KVM's IRQ flag on this vcpu */
void kvm_cpu__irq(struct kvm_cpu *vcpu, int pin, int level)
{
+ unsigned int virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
+
+ /* FIXME: POWER-specific */
+ if (pin != POWER7_EXT_IRQ)
+ return;
+ if (ioctl(vcpu->vcpu_fd, KVM_INTERRUPT, &virq) < 0)
+ pr_warning("Could not KVM_INTERRUPT.");
}
void kvm_cpu__arch_nmi(struct kvm_cpu *cpu)
diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
index 06fccef..30443c7 100644
--- a/tools/kvm/powerpc/kvm.c
+++ b/tools/kvm/powerpc/kvm.c
@@ -39,9 +39,13 @@
#define HUGETLBFS_PATH "/var/lib/hugetlbfs/global/pagesize-16MB/"
+#define PHANDLE_XICP 0x00001111
+
static char kern_cmdline[2048];
struct kvm_ext kvm_req_ext[] = {
+ { DEFINE_KVM_EXT(KVM_CAP_PPC_UNSET_IRQ) },
+ { DEFINE_KVM_EXT(KVM_CAP_PPC_IRQ_LEVEL) },
{ 0, 0 }
};
@@ -118,11 +122,6 @@ void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char *hugetlbfs_
spapr_hvcons_init();
}
-void kvm__irq_line(struct kvm *kvm, int irq, int level)
-{
- fprintf(stderr, "irq_line(%d, %d)\n", irq, level);
-}
-
void kvm__irq_trigger(struct kvm *kvm, int irq)
{
kvm__irq_line(kvm, irq, 1);
@@ -207,6 +206,7 @@ static void setup_fdt(struct kvm *kvm)
{
uint64_t mem_reg_property[] = { 0, cpu_to_be64(kvm->ram_size) };
int smp_cpus = kvm->nrcpus;
+ uint32_t int_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
char hypertas_prop_kvm[] = "hcall-pft\0hcall-term\0"
"hcall-dabr\0hcall-interrupt\0hcall-tce\0hcall-vio\0"
"hcall-splpar\0hcall-bulk";
@@ -347,6 +347,22 @@ static void setup_fdt(struct kvm *kvm)
}
_FDT(fdt_end_node(fdt));
+ /* IRQ controller */
+ _FDT(fdt_begin_node(fdt, "interrupt-controller@0"));
+
+ _FDT(fdt_property_string(fdt, "device_type",
+ "PowerPC-External-Interrupt-Presentation"));
+ _FDT(fdt_property_string(fdt, "compatible", "IBM,ppc-xicp"));
+ _FDT(fdt_property_cell(fdt, "reg", 0));
+ _FDT(fdt_property(fdt, "interrupt-controller", NULL, 0));
+ _FDT(fdt_property(fdt, "ibm,interrupt-server-ranges",
+ int_server_ranges_prop,
+ sizeof(int_server_ranges_prop)));
+ _FDT(fdt_property_cell(fdt, "#interrupt-cells", 2));
+ _FDT(fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP));
+ _FDT(fdt_property_cell(fdt, "phandle", PHANDLE_XICP));
+ _FDT(fdt_end_node(fdt));
+
/*
* VIO: See comment in linux,stdout-path; we don't yet represent a VIO
* bus/address allocation so addresses are hardwired here.
diff --git a/tools/kvm/powerpc/xics.c b/tools/kvm/powerpc/xics.c
new file mode 100644
index 0000000..2d70d3c
--- /dev/null
+++ b/tools/kvm/powerpc/xics.c
@@ -0,0 +1,514 @@
+/*
+ * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics
+ *
+ * Borrowed heavily from QEMU's xics.c,
+ * Copyright (c) 2010,2011 David Gibson, IBM Corporation.
+ *
+ * Modifications copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "spapr.h"
+#include "xics.h"
+#include "kvm/util.h"
+
+#include <stdio.h>
+#include <malloc.h>
+
+
+/* #define DEBUG_XICS yes */
+#ifdef DEBUG_XICS
+#define xics_dprintf(fmt, ...) \
+ do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define xics_dprintf(fmt, ...) \
+ do { } while (0)
+#endif
+
+/*
+ * ICP: Presentation layer
+ */
+
+struct icp_server_state {
+ uint32_t xirr;
+ uint8_t pending_priority;
+ uint8_t mfrr;
+ struct kvm_cpu *cpu;
+};
+
+#define XICS_IRQ_OFFSET 16
+#define XISR_MASK 0x00ffffff
+#define CPPR_MASK 0xff000000
+
+#define XISR(ss) (((ss)->xirr) & XISR_MASK)
+#define CPPR(ss) (((ss)->xirr) >> 24)
+
+struct ics_state;
+
+struct icp_state {
+ unsigned long nr_servers;
+ struct icp_server_state *ss;
+ struct ics_state *ics;
+};
+
+static void ics_reject(struct ics_state *ics, int nr);
+static void ics_resend(struct ics_state *ics);
+static void ics_eoi(struct ics_state *ics, int nr);
+
+static inline void cpu_irq_raise(struct kvm_cpu *vcpu)
+{
+ xics_dprintf("INT1[%p]\n", vcpu);
+ kvm_cpu__irq(vcpu, POWER7_EXT_IRQ, 1);
+}
+
+static inline void cpu_irq_lower(struct kvm_cpu *vcpu)
+{
+ xics_dprintf("INT0[%p]\n", vcpu);
+ kvm_cpu__irq(vcpu, POWER7_EXT_IRQ, 0);
+}
+
+static void icp_check_ipi(struct icp_state *icp, int server)
+{
+ struct icp_server_state *ss = icp->ss + server;
+
+ if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
+ return;
+ }
+
+ if (XISR(ss)) {
+ ics_reject(icp->ics, XISR(ss));
+ }
+
+ ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
+ ss->pending_priority = ss->mfrr;
+ cpu_irq_raise(ss->cpu);
+}
+
+static void icp_resend(struct icp_state *icp, int server)
+{
+ struct icp_server_state *ss = icp->ss + server;
+
+ if (ss->mfrr < CPPR(ss)) {
+ icp_check_ipi(icp, server);
+ }
+ ics_resend(icp->ics);
+}
+
+static void icp_set_cppr(struct icp_state *icp, int server, uint8_t cppr)
+{
+ struct icp_server_state *ss = icp->ss + server;
+ uint8_t old_cppr;
+ uint32_t old_xisr;
+
+ old_cppr = CPPR(ss);
+ ss->xirr = (ss->xirr & ~CPPR_MASK) | (cppr << 24);
+
+ if (cppr < old_cppr) {
+ if (XISR(ss) && (cppr <= ss->pending_priority)) {
+ old_xisr = XISR(ss);
+ ss->xirr &= ~XISR_MASK; /* Clear XISR */
+ cpu_irq_lower(ss->cpu);
+ ics_reject(icp->ics, old_xisr);
+ }
+ } else {
+ if (!XISR(ss)) {
+ icp_resend(icp, server);
+ }
+ }
+}
+
+static void icp_set_mfrr(struct icp_state *icp, int nr, uint8_t mfrr)
+{
+ struct icp_server_state *ss = icp->ss + nr;
+
+ ss->mfrr = mfrr;
+ if (mfrr < CPPR(ss)) {
+ icp_check_ipi(icp, nr);
+ }
+}
+
+static uint32_t icp_accept(struct icp_server_state *ss)
+{
+ uint32_t xirr;
+
+ cpu_irq_lower(ss->cpu);
+ xirr = ss->xirr;
+ ss->xirr = ss->pending_priority << 24;
+ return xirr;
+}
+
+static void icp_eoi(struct icp_state *icp, int server, uint32_t xirr)
+{
+ struct icp_server_state *ss = icp->ss + server;
+
+ ics_eoi(icp->ics, xirr & XISR_MASK);
+ /* Send EOI -> ICS */
+ ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
+ if (!XISR(ss)) {
+ icp_resend(icp, server);
+ }
+}
+
+static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t priority)
+{
+ struct icp_server_state *ss = icp->ss + server;
+ xics_dprintf("icp_irq(nr %d, server %d, prio 0x%x)\n", nr, server, priority);
+ if ((priority >= CPPR(ss))
+ || (XISR(ss) && (ss->pending_priority <= priority))) {
+ xics_dprintf("reject %d, CPPR 0x%x, XISR 0x%x, pprio 0x%x, prio 0x%x\n",
+ nr, CPPR(ss), XISR(ss), ss->pending_priority, priority);
+ ics_reject(icp->ics, nr);
+ } else {
+ if (XISR(ss)) {
+ xics_dprintf("reject %d, CPPR 0x%x, XISR 0x%x, pprio 0x%x, prio 0x%x\n",
+ nr, CPPR(ss), XISR(ss), ss->pending_priority, priority);
+ ics_reject(icp->ics, XISR(ss));
+ }
+ ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
+ ss->pending_priority = priority;
+ cpu_irq_raise(ss->cpu);
+ }
+}
+
+/*
+ * ICS: Source layer
+ */
+
+struct ics_irq_state {
+ int server;
+ uint8_t priority;
+ uint8_t saved_priority;
+ int rejected:1;
+ int masked_pending:1;
+};
+
+struct ics_state {
+ unsigned int nr_irqs;
+ unsigned int offset;
+ struct ics_irq_state *irqs;
+ struct icp_state *icp;
+};
+
+static int ics_valid_irq(struct ics_state *ics, uint32_t nr)
+{
+ return (nr >= ics->offset)
+ && (nr < (ics->offset + ics->nr_irqs));
+}
+
+static void ics_set_irq_msi(struct ics_state *ics, int srcno, int val)
+{
+ struct ics_irq_state *irq = ics->irqs + srcno;
+
+ if (val) {
+ if (irq->priority == 0xff) {
+ xics_dprintf(" irq pri ff, masked pending\n");
+ irq->masked_pending = 1;
+ } else {
+ icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
+ }
+ }
+}
+
+static void ics_reject_msi(struct ics_state *ics, int nr)
+{
+ struct ics_irq_state *irq = ics->irqs + nr - ics->offset;
+
+ irq->rejected = 1;
+}
+
+static void ics_resend_msi(struct ics_state *ics)
+{
+ unsigned int i;
+
+ for (i = 0; i < ics->nr_irqs; i++) {
+ struct ics_irq_state *irq = ics->irqs + i;
+
+ /* FIXME: filter by server#? */
+ if (irq->rejected) {
+ irq->rejected = 0;
+ if (irq->priority != 0xff) {
+ icp_irq(ics->icp, irq->server, i + ics->offset, irq->priority);
+ }
+ }
+ }
+}
+
+static void ics_write_xive_msi(struct ics_state *ics, int nr, int server,
+ uint8_t priority)
+{
+ struct ics_irq_state *irq = ics->irqs + nr - ics->offset;
+
+ irq->server = server;
+ irq->priority = priority;
+ xics_dprintf("ics_write_xive_msi(nr %d, server %d, pri 0x%x)\n", nr, server, priority);
+
+ if (!irq->masked_pending || (priority == 0xff)) {
+ return;
+ }
+
+ irq->masked_pending = 0;
+ icp_irq(ics->icp, server, nr, priority);
+}
+
+static void ics_reject(struct ics_state *ics, int nr)
+{
+ ics_reject_msi(ics, nr);
+}
+
+static void ics_resend(struct ics_state *ics)
+{
+ ics_resend_msi(ics);
+}
+
+static void ics_eoi(struct ics_state *ics, int nr)
+{
+}
+
+/*
+ * Exported functions
+ */
+
+static int allocated_irqnum = XICS_IRQ_OFFSET;
+
+/*
+ * xics_alloc_irqnum(): This is hacky. The problem boils down to the PCI device
+ * code which just calls kvm__irq_line( .. pcidev->pci_hdr.irq_line ..) at will.
+ * Each PCI device's IRQ line is allocated by irq__register_device() (which
+ * allocates an IRQ AND allocates a.. PCI device num..).
+ *
+ * In future I'd like to at least mimic some kind of 'upstream IRQ controller'
+ * whereby PCI devices let their PHB know when they want to IRQ, and that
+ * percolates up.
+ *
+ * For now, allocate a REAL xics irq number and (via irq__register_device) push
+ * that into the config space. 8 bits only though!
+ */
+int xics_alloc_irqnum(void)
+{
+ int irq = allocated_irqnum++;
+
+ if (irq > 255)
+ die("Huge numbers of IRQs aren't supported with the daft kvmtool IRQ system.");
+
+ return irq;
+}
+
+static target_ulong h_cppr(struct kvm_cpu *vcpu,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong cppr = args[0];
+
+ xics_dprintf("h_cppr(%lx)\n", cppr);
+ icp_set_cppr(vcpu->kvm->icp, vcpu->cpu_id, cppr);
+ return H_SUCCESS;
+}
+
+static target_ulong h_ipi(struct kvm_cpu *vcpu,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong server = args[0];
+ target_ulong mfrr = args[1];
+
+ xics_dprintf("h_ipi(%lx, %lx)\n", server, mfrr);
+ if (server >= vcpu->kvm->icp->nr_servers) {
+ return H_PARAMETER;
+ }
+
+ icp_set_mfrr(vcpu->kvm->icp, server, mfrr);
+ return H_SUCCESS;
+}
+
+static target_ulong h_xirr(struct kvm_cpu *vcpu,
+ target_ulong opcode, target_ulong *args)
+{
+ uint32_t xirr = icp_accept(vcpu->kvm->icp->ss + vcpu->cpu_id);
+
+ xics_dprintf("h_xirr() = %x\n", xirr);
+ args[0] = xirr;
+ return H_SUCCESS;
+}
+
+static target_ulong h_eoi(struct kvm_cpu *vcpu,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong xirr = args[0];
+
+ xics_dprintf("h_eoi(%lx)\n", xirr);
+ icp_eoi(vcpu->kvm->icp, vcpu->cpu_id, xirr);
+ return H_SUCCESS;
+}
+
+static void rtas_set_xive(struct kvm_cpu *vcpu, uint32_t token,
+ uint32_t nargs, target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ struct ics_state *ics = vcpu->kvm->icp->ics;
+ uint32_t nr, server, priority;
+
+ if ((nargs != 3) || (nret != 1)) {
+ rtas_st(vcpu->kvm, rets, 0, -3);
+ return;
+ }
+
+ nr = rtas_ld(vcpu->kvm, args, 0);
+ server = rtas_ld(vcpu->kvm, args, 1);
+ priority = rtas_ld(vcpu->kvm, args, 2);
+
+ xics_dprintf("rtas_set_xive(%x,%x,%x)\n", nr, server, priority);
+ if (!ics_valid_irq(ics, nr) || (server >= ics->icp->nr_servers)
+ || (priority > 0xff)) {
+ rtas_st(vcpu->kvm, rets, 0, -3);
+ return;
+ }
+
+ ics_write_xive_msi(ics, nr, server, priority);
+
+ rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+}
+
+static void rtas_get_xive(struct kvm_cpu *vcpu, uint32_t token,
+ uint32_t nargs, target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ struct ics_state *ics = vcpu->kvm->icp->ics;
+ uint32_t nr;
+
+ if ((nargs != 1) || (nret != 3)) {
+ rtas_st(vcpu->kvm, rets, 0, -3);
+ return;
+ }
+
+ nr = rtas_ld(vcpu->kvm, args, 0);
+
+ if (!ics_valid_irq(ics, nr)) {
+ rtas_st(vcpu->kvm, rets, 0, -3);
+ return;
+ }
+
+ rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+ rtas_st(vcpu->kvm, rets, 1, ics->irqs[nr - ics->offset].server);
+ rtas_st(vcpu->kvm, rets, 2, ics->irqs[nr - ics->offset].priority);
+}
+
+static void rtas_int_off(struct kvm_cpu *vcpu, uint32_t token,
+ uint32_t nargs, target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ struct ics_state *ics = vcpu->kvm->icp->ics;
+ uint32_t nr;
+
+ if ((nargs != 1) || (nret != 1)) {
+ rtas_st(vcpu->kvm, rets, 0, -3);
+ return;
+ }
+
+ nr = rtas_ld(vcpu->kvm, args, 0);
+
+ if (!ics_valid_irq(ics, nr)) {
+ rtas_st(vcpu->kvm, rets, 0, -3);
+ return;
+ }
+
+ /* ME: QEMU wrote xive_msi here, in #if 0. Deleted. */
+
+ rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+}
+
+static void rtas_int_on(struct kvm_cpu *vcpu, uint32_t token,
+ uint32_t nargs, target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ struct ics_state *ics = vcpu->kvm->icp->ics;
+ uint32_t nr;
+
+ if ((nargs != 1) || (nret != 1)) {
+ rtas_st(vcpu->kvm, rets, 0, -3);
+ return;
+ }
+
+ nr = rtas_ld(vcpu->kvm, args, 0);
+
+ if (!ics_valid_irq(ics, nr)) {
+ rtas_st(vcpu->kvm, rets, 0, -3);
+ return;
+ }
+
+ /* ME: QEMU wrote xive_msi here, in #if 0. Deleted. */
+
+ rtas_st(vcpu->kvm, rets, 0, 0); /* Success */
+}
+
+void xics_cpu_register(struct kvm_cpu *vcpu)
+{
+ if (vcpu->cpu_id < vcpu->kvm->icp->nr_servers)
+ vcpu->kvm->icp->ss[vcpu->cpu_id].cpu = vcpu;
+ else
+ die("Setting invalid server for cpuid %ld\n", vcpu->cpu_id);
+}
+
+struct icp_state *xics_system_init(unsigned int nr_irqs, unsigned int nr_cpus)
+{
+ int max_server_num;
+ unsigned int i;
+ struct icp_state *icp;
+ struct ics_state *ics;
+
+ max_server_num = nr_cpus;
+
+ icp = malloc(sizeof(*icp));
+ icp->nr_servers = max_server_num + 1;
+ icp->ss = malloc(icp->nr_servers*sizeof(struct icp_server_state));
+
+ for (i = 0; i < icp->nr_servers; i++) {
+ icp->ss[i].xirr = 0;
+ icp->ss[i].pending_priority = 0;
+ icp->ss[i].cpu = 0;
+ icp->ss[i].mfrr = 0xff;
+ }
+
+ /*
+ * icp->ss[env->cpu_index].cpu is set by CPUs calling in to
+ * xics_cpu_register().
+ */
+
+ ics = malloc(sizeof(*ics));
+ ics->nr_irqs = nr_irqs;
+ ics->offset = XICS_IRQ_OFFSET;
+ ics->irqs = malloc(nr_irqs * sizeof(struct ics_irq_state));
+
+ icp->ics = ics;
+ ics->icp = icp;
+
+ for (i = 0; i < nr_irqs; i++) {
+ ics->irqs[i].server = 0;
+ ics->irqs[i].priority = 0xff;
+ ics->irqs[i].saved_priority = 0xff;
+ ics->irqs[i].rejected = 0;
+ ics->irqs[i].masked_pending = 0;
+ }
+
+ spapr_register_hypercall(H_CPPR, h_cppr);
+ spapr_register_hypercall(H_IPI, h_ipi);
+ spapr_register_hypercall(H_XIRR, h_xirr);
+ spapr_register_hypercall(H_EOI, h_eoi);
+
+ spapr_rtas_register("ibm,set-xive", rtas_set_xive);
+ spapr_rtas_register("ibm,get-xive", rtas_get_xive);
+ spapr_rtas_register("ibm,int-off", rtas_int_off);
+ spapr_rtas_register("ibm,int-on", rtas_int_on);
+
+ return icp;
+}
+
+void kvm__irq_line(struct kvm *kvm, int irq, int level)
+{
+ /*
+ * Route event to ICS, which routes to ICP, which eventually does a
+ * kvm_cpu__irq(vcpu, POWER7_EXT_IRQ, 1)
+ */
+ xics_dprintf("Raising IRQ %d -> %d\n", irq, level);
+ ics_set_irq_msi(kvm->icp->ics, irq - kvm->icp->ics->offset, level);
+}
diff --git a/tools/kvm/powerpc/xics.h b/tools/kvm/powerpc/xics.h
new file mode 100644
index 0000000..144915b
--- /dev/null
+++ b/tools/kvm/powerpc/xics.h
@@ -0,0 +1,23 @@
+/*
+ * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef XICS_H
+#define XICS_H
+
+#define XICS_IPI 0x2
+
+struct kvm_cpu;
+struct icp_state;
+
+struct icp_state *xics_system_init(unsigned int nr_irqs, unsigned int nr_cpus);
+void xics_cpu_register(struct kvm_cpu *vcpu);
+int xics_alloc_irqnum(void);
+
+#endif
--
1.7.0.4
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH V2 4/6] kvm tools: Add PPC64 XICS interrupt controller support
2011-12-13 7:10 ` [PATCH V2 4/6] kvm tools: Add PPC64 XICS interrupt controller support Matt Evans
@ 2011-12-14 2:35 ` David Gibson
2011-12-20 1:16 ` Matt Evans
0 siblings, 1 reply; 12+ messages in thread
From: David Gibson @ 2011-12-14 2:35 UTC (permalink / raw)
To: Matt Evans
Cc: kvm, kvm-ppc, penberg, asias.hejun, levinsasha928, gorcunov, aik
On Tue, Dec 13, 2011 at 06:10:48PM +1100, Matt Evans wrote:
> This patch adds XICS emulation code (heavily borrowed from QEMU), and wires
> this into kvm_cpu__irq() to fire a CPU IRQ via KVM. A device tree entry is
> also added. IPIs work, xics_alloc_irqnum() is added to allocate an external
> IRQ (which will later be used by the PHB PCI code) and finally, kvm__irq_line()
> can be called to raise an IRQ on XICS.\
Hrm, looks like you took a somewhat old version of xics.c from qemu.
It dangerously uses the same variable names for global irq numbers and
numbers local to one ics unit. It used to have at least one bug
caused by confusing the two, which I'm not sure if you've also copied.
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH V2 4/6] kvm tools: Add PPC64 XICS interrupt controller support
2011-12-14 2:35 ` David Gibson
@ 2011-12-20 1:16 ` Matt Evans
2011-12-21 0:39 ` David Gibson
0 siblings, 1 reply; 12+ messages in thread
From: Matt Evans @ 2011-12-20 1:16 UTC (permalink / raw)
To: David Gibson
Cc: kvm, kvm-ppc, penberg, asias.hejun, levinsasha928, gorcunov, aik
Hi David,
On 14/12/11 13:35, David Gibson wrote:
> On Tue, Dec 13, 2011 at 06:10:48PM +1100, Matt Evans wrote:
>> This patch adds XICS emulation code (heavily borrowed from QEMU), and wires
>> this into kvm_cpu__irq() to fire a CPU IRQ via KVM. A device tree entry is
>> also added. IPIs work, xics_alloc_irqnum() is added to allocate an external
>> IRQ (which will later be used by the PHB PCI code) and finally, kvm__irq_line()
>> can be called to raise an IRQ on XICS.\
>
> Hrm, looks like you took a somewhat old version of xics.c from qemu.
> It dangerously uses the same variable names for global irq numbers and
> numbers local to one ics unit. It used to have at least one bug
> caused by confusing the two, which I'm not sure if you've also copied.
Just had a look at the diffs between this and hw/xics.c from the master branch
in your qemu-impreza.git (which I based the kvmtool stuff on) and I can't see
anything standing out.
Is there a particular commit/patch/variable name you have in mind that I can
search for?
Thanks!
Matt
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH V2 4/6] kvm tools: Add PPC64 XICS interrupt controller support
2011-12-20 1:16 ` Matt Evans
@ 2011-12-21 0:39 ` David Gibson
0 siblings, 0 replies; 12+ messages in thread
From: David Gibson @ 2011-12-21 0:39 UTC (permalink / raw)
To: Matt Evans
Cc: kvm, kvm-ppc, penberg, asias.hejun, levinsasha928, gorcunov, aik
On Tue, Dec 20, 2011 at 12:16:40PM +1100, Matt Evans wrote:
> Hi David,
>
> On 14/12/11 13:35, David Gibson wrote:
> > On Tue, Dec 13, 2011 at 06:10:48PM +1100, Matt Evans wrote:
> >> This patch adds XICS emulation code (heavily borrowed from QEMU), and wires
> >> this into kvm_cpu__irq() to fire a CPU IRQ via KVM. A device tree entry is
> >> also added. IPIs work, xics_alloc_irqnum() is added to allocate an external
> >> IRQ (which will later be used by the PHB PCI code) and finally, kvm__irq_line()
> >> can be called to raise an IRQ on XICS.\
> >
> > Hrm, looks like you took a somewhat old version of xics.c from qemu.
> > It dangerously uses the same variable names for global irq numbers and
> > numbers local to one ics unit. It used to have at least one bug
> > caused by confusing the two, which I'm not sure if you've also copied.
>
> Just had a look at the diffs between this and hw/xics.c from the master branch
> in your qemu-impreza.git (which I based the kvmtool stuff on) and I can't see
> anything standing out.
>
> Is there a particular commit/patch/variable name you have in mind that I can
> search for?
Sorry, my mistake, I was looking in the wrong place.
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V2 5/6] kvm tools: Add PPC64 PCI Host Bridge
2011-12-13 7:10 [PATCH V2 0/6] Add initial SPAPR PPC64 architecture support Matt Evans
` (3 preceding siblings ...)
2011-12-13 7:10 ` [PATCH V2 4/6] kvm tools: Add PPC64 XICS interrupt controller support Matt Evans
@ 2011-12-13 7:10 ` Matt Evans
2011-12-13 7:10 ` [PATCH V2 6/6] kvm tools: Add PPC64 kvm_cpu__emulate_io() Matt Evans
5 siblings, 0 replies; 12+ messages in thread
From: Matt Evans @ 2011-12-13 7:10 UTC (permalink / raw)
To: kvm, kvm-ppc; +Cc: penberg, asias.hejun, levinsasha928, gorcunov, david, aik
This provides the PCI bridge, definitions for the address layout of the windows
and wires in IRQs. Once PCI devices are all registered, they are enumerated and
DT nodes generated for each.
Signed-off-by: Matt Evans <matt@ozlabs.org>
---
tools/kvm/powerpc/include/kvm/kvm-arch.h | 3 +
tools/kvm/powerpc/irq.c | 18 ++-
tools/kvm/powerpc/kvm.c | 11 +
tools/kvm/powerpc/spapr.h | 6 +
tools/kvm/powerpc/spapr_pci.c | 423 ++++++++++++++++++++++++++++++
tools/kvm/powerpc/spapr_pci.h | 38 +++
6 files changed, 497 insertions(+), 2 deletions(-)
create mode 100644 tools/kvm/powerpc/spapr_pci.c
create mode 100644 tools/kvm/powerpc/spapr_pci.h
diff --git a/tools/kvm/powerpc/include/kvm/kvm-arch.h b/tools/kvm/powerpc/include/kvm/kvm-arch.h
index e070c3f..26e3231 100644
--- a/tools/kvm/powerpc/include/kvm/kvm-arch.h
+++ b/tools/kvm/powerpc/include/kvm/kvm-arch.h
@@ -42,6 +42,8 @@
*/
#define KVM_PCI_MMIO_AREA 0x1000000
+struct spapr_phb;
+
struct kvm {
int sys_fd; /* For system ioctls(), i.e. /dev/kvm */
int vm_fd; /* For VM ioctls() */
@@ -68,6 +70,7 @@ struct kvm {
unsigned long initrd_size;
const char *name;
struct icp_state *icp;
+ struct spapr_phb *phb;
};
/* Helper for the various bits of code that generate FDT nodes */
diff --git a/tools/kvm/powerpc/irq.c b/tools/kvm/powerpc/irq.c
index a1047d4..dbe1ad2 100644
--- a/tools/kvm/powerpc/irq.c
+++ b/tools/kvm/powerpc/irq.c
@@ -21,7 +21,10 @@
#include <stddef.h>
#include <stdlib.h>
+#include "kvm/pci.h"
+
#include "xics.h"
+#include "spapr_pci.h"
#define XICS_IRQS 1024
@@ -30,10 +33,21 @@
* generic & cope with multiple PPC platform types.
*/
+static int pci_devs = 0;
+
int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)
{
- fprintf(stderr, "irq__register_device(%d, [%d], [%d], [%d]\n",
- dev, *num, *pin, *line);
+ if (pci_devs >= PCI_MAX_DEVICES)
+ die("Hit PCI device limit!\n");
+
+ *num = pci_devs++;
+
+ *pin = 1;
+ /*
+ * Have I said how nasty I find this? Line should be dontcare... PHB
+ * should determine which CPU/XICS IRQ to fire.
+ */
+ *line = xics_alloc_irqnum();
return 0;
}
diff --git a/tools/kvm/powerpc/kvm.c b/tools/kvm/powerpc/kvm.c
index 30443c7..ebd1845 100644
--- a/tools/kvm/powerpc/kvm.c
+++ b/tools/kvm/powerpc/kvm.c
@@ -16,6 +16,7 @@
#include "spapr.h"
#include "spapr_hvcons.h"
+#include "spapr_pci.h"
#include <linux/kvm.h>
@@ -120,6 +121,11 @@ void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char *hugetlbfs_
register_core_rtas();
/* Now that hypercalls are initialised, register a couple for the console: */
spapr_hvcons_init();
+ spapr_create_phb(kvm, "pci", SPAPR_PCI_BUID,
+ SPAPR_PCI_MEM_WIN_ADDR,
+ SPAPR_PCI_MEM_WIN_SIZE,
+ SPAPR_PCI_IO_WIN_ADDR,
+ SPAPR_PCI_IO_WIN_SIZE);
}
void kvm__irq_trigger(struct kvm *kvm, int irq)
@@ -385,6 +391,11 @@ static void setup_fdt(struct kvm *kvm)
_FDT(fdt_finish(fdt));
_FDT(fdt_open_into(fdt, fdt_dest, FDT_MAX_SIZE));
+
+ /* PCI */
+ if (spapr_populate_pci_devices(kvm, PHANDLE_XICP, fdt_dest))
+ die("Fail populating PCI device nodes");
+
_FDT(fdt_add_mem_rsv(fdt_dest, kvm->rtas_gra, kvm->rtas_size));
_FDT(fdt_pack(fdt_dest));
}
diff --git a/tools/kvm/powerpc/spapr.h b/tools/kvm/powerpc/spapr.h
index 57cece1..731668c 100644
--- a/tools/kvm/powerpc/spapr.h
+++ b/tools/kvm/powerpc/spapr.h
@@ -102,4 +102,10 @@ target_ulong spapr_rtas_call(struct kvm_cpu *vcpu,
uint32_t token, uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets);
+#define SPAPR_PCI_BUID 0x800000020000001ULL
+#define SPAPR_PCI_MEM_WIN_ADDR (KVM_MMIO_START + 0xA0000000)
+#define SPAPR_PCI_MEM_WIN_SIZE 0x20000000
+#define SPAPR_PCI_IO_WIN_ADDR (KVM_MMIO_START + 0x80000000)
+#define SPAPR_PCI_IO_WIN_SIZE 0x2000000
+
#endif /* !defined (__HW_SPAPR_H__) */
diff --git a/tools/kvm/powerpc/spapr_pci.c b/tools/kvm/powerpc/spapr_pci.c
new file mode 100644
index 0000000..445a02a
--- /dev/null
+++ b/tools/kvm/powerpc/spapr_pci.c
@@ -0,0 +1,423 @@
+/*
+ * SPAPR PHB emulation, RTAS interface to PCI config space, device tree nodes
+ * for enumerated devices.
+ *
+ * Borrowed heavily from QEMU's spapr_pci.c,
+ * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
+ * Copyright (c) 2011 David Gibson, IBM Corporation.
+ *
+ * Modifications copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "spapr.h"
+#include "spapr_pci.h"
+#include "kvm/util.h"
+#include "kvm/pci.h"
+
+#include <linux/pci_regs.h>
+#include <libfdt.h>
+#include <linux/byteorder.h>
+
+
+/* #define DEBUG_PHB yes */
+#ifdef DEBUG_PHB
+#define phb_dprintf(fmt, ...) \
+ do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define phb_dprintf(fmt, ...) \
+ do { } while (0)
+#endif
+
+static const uint32_t bars[] = {
+ PCI_BASE_ADDRESS_0, PCI_BASE_ADDRESS_1,
+ PCI_BASE_ADDRESS_2, PCI_BASE_ADDRESS_3,
+ PCI_BASE_ADDRESS_4, PCI_BASE_ADDRESS_5
+ /*, PCI_ROM_ADDRESS*/
+};
+
+#define PCI_NUM_REGIONS 7
+
+/* Macros to operate with address in OF binding to PCI */
+#define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p))
+#define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */
+#define b_p(x) b_x((x), 30, 1) /* 1 if prefetchable */
+#define b_t(x) b_x((x), 29, 1) /* 1 if the address is aliased */
+#define b_ss(x) b_x((x), 24, 2) /* the space code */
+#define b_bbbbbbbb(x) b_x((x), 16, 8) /* bus number */
+#define b_ddddd(x) b_x((x), 11, 5) /* device number */
+#define b_fff(x) b_x((x), 8, 3) /* function number */
+#define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */
+
+#define SS_M64 3
+#define SS_M32 2
+#define SS_IO 1
+#define SS_CONFIG 0
+
+
+static struct spapr_phb phb;
+
+
+static void rtas_ibm_read_pci_config(struct kvm_cpu *vcpu,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ uint32_t val = 0;
+ uint64_t buid = ((uint64_t)rtas_ld(vcpu->kvm, args, 1) << 32) | rtas_ld(vcpu->kvm, args, 2);
+ union pci_config_address addr = { .w = rtas_ld(vcpu->kvm, args, 0) };
+ struct pci_device_header *dev = pci__find_dev(addr.device_number);
+ uint32_t size = rtas_ld(vcpu->kvm, args, 3);
+
+ if (buid != phb.buid || !dev || (size > 4)) {
+ phb_dprintf("- cfgRd buid 0x%lx cfg addr 0x%x size %d not found\n",
+ buid, addr.w, size);
+
+ rtas_st(vcpu->kvm, rets, 0, -1);
+ return;
+ }
+ pci__config_rd(vcpu->kvm, addr, &val, size);
+ /* It appears this wants a byteswapped result... */
+ switch (size) {
+ case 4:
+ val = le32_to_cpu(val);
+ break;
+ case 2:
+ val = le16_to_cpu(val>>16);
+ break;
+ case 1:
+ val = val >> 24;
+ break;
+ }
+ phb_dprintf("- cfgRd buid 0x%lx addr 0x%x (/%d): b%d,d%d,f%d,r0x%x, val 0x%x\n",
+ buid, addr.w, size, addr.bus_number, addr.device_number, addr.function_number,
+ addr.register_number, val);
+
+ rtas_st(vcpu->kvm, rets, 0, 0);
+ rtas_st(vcpu->kvm, rets, 1, val);
+}
+
+static void rtas_read_pci_config(struct kvm_cpu *vcpu,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ uint32_t val;
+ union pci_config_address addr = { .w = rtas_ld(vcpu->kvm, args, 0) };
+ struct pci_device_header *dev = pci__find_dev(addr.device_number);
+ uint32_t size = rtas_ld(vcpu->kvm, args, 1);
+
+ if (!dev || (size > 4)) {
+ rtas_st(vcpu->kvm, rets, 0, -1);
+ return;
+ }
+ pci__config_rd(vcpu->kvm, addr, &val, size);
+ switch (size) {
+ case 4:
+ val = le32_to_cpu(val);
+ break;
+ case 2:
+ val = le16_to_cpu(val>>16); /* We're yuck-endian. */
+ break;
+ case 1:
+ val = val >> 24;
+ break;
+ }
+ phb_dprintf("- cfgRd addr 0x%x size %d, val 0x%x\n", addr.w, size, val);
+ rtas_st(vcpu->kvm, rets, 0, 0);
+ rtas_st(vcpu->kvm, rets, 1, val);
+}
+
+static void rtas_ibm_write_pci_config(struct kvm_cpu *vcpu,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ uint64_t buid = ((uint64_t)rtas_ld(vcpu->kvm, args, 1) << 32) | rtas_ld(vcpu->kvm, args, 2);
+ union pci_config_address addr = { .w = rtas_ld(vcpu->kvm, args, 0) };
+ struct pci_device_header *dev = pci__find_dev(addr.device_number);
+ uint32_t size = rtas_ld(vcpu->kvm, args, 3);
+ uint32_t val = rtas_ld(vcpu->kvm, args, 4);
+
+ if (buid != phb.buid || !dev || (size > 4)) {
+ phb_dprintf("- cfgWr buid 0x%lx cfg addr 0x%x/%d error (val 0x%x)\n",
+ buid, addr.w, size, val);
+
+ rtas_st(vcpu->kvm, rets, 0, -1);
+ return;
+ }
+ phb_dprintf("- cfgWr buid 0x%lx addr 0x%x (/%d): b%d,d%d,f%d,r0x%x, val 0x%x\n",
+ buid, addr.w, size, addr.bus_number, addr.device_number, addr.function_number,
+ addr.register_number, val);
+ switch (size) {
+ case 4:
+ val = le32_to_cpu(val);
+ break;
+ case 2:
+ val = le16_to_cpu(val) << 16;
+ break;
+ case 1:
+ val = val >> 24;
+ break;
+ }
+ pci__config_wr(vcpu->kvm, addr, &val, size);
+ rtas_st(vcpu->kvm, rets, 0, 0);
+}
+
+static void rtas_write_pci_config(struct kvm_cpu *vcpu,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ union pci_config_address addr = { .w = rtas_ld(vcpu->kvm, args, 0) };
+ struct pci_device_header *dev = pci__find_dev(addr.device_number);
+ uint32_t size = rtas_ld(vcpu->kvm, args, 1);
+ uint32_t val = rtas_ld(vcpu->kvm, args, 2);
+
+ if (!dev || (size > 4)) {
+ rtas_st(vcpu->kvm, rets, 0, -1);
+ return;
+ }
+
+ phb_dprintf("- cfgWr addr 0x%x (/%d): b%d,d%d,f%d,r0x%x, val 0x%x\n",
+ addr.w, size, addr.bus_number, addr.device_number, addr.function_number,
+ addr.register_number, val);
+ switch (size) {
+ case 4:
+ val = le32_to_cpu(val);
+ break;
+ case 2:
+ val = le16_to_cpu(val) << 16;
+ break;
+ case 1:
+ val = val >> 24;
+ break;
+ }
+ pci__config_wr(vcpu->kvm, addr, &val, size);
+ rtas_st(vcpu->kvm, rets, 0, 0);
+}
+
+void spapr_create_phb(struct kvm *kvm,
+ const char *busname, uint64_t buid,
+ uint64_t mem_win_addr, uint64_t mem_win_size,
+ uint64_t io_win_addr, uint64_t io_win_size)
+{
+ /*
+ * Since kvmtool doesn't really have any concept of buses etc.,
+ * there's nothing to register here. Just register RTAS.
+ */
+ spapr_rtas_register("read-pci-config", rtas_read_pci_config);
+ spapr_rtas_register("write-pci-config", rtas_write_pci_config);
+ spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
+ spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
+
+ phb.buid = buid;
+ phb.mem_addr = mem_win_addr;
+ phb.mem_size = mem_win_size;
+ phb.io_addr = io_win_addr;
+ phb.io_size = io_win_size;
+
+ kvm->phb = &phb;
+}
+
+static uint32_t bar_to_ss(unsigned long bar)
+{
+ if ((bar & PCI_BASE_ADDRESS_SPACE) ==
+ PCI_BASE_ADDRESS_SPACE_IO)
+ return SS_IO;
+ else if (bar & PCI_BASE_ADDRESS_MEM_TYPE_64)
+ return SS_M64;
+ else
+ return SS_M32;
+}
+
+static unsigned long bar_to_addr(unsigned long bar)
+{
+ if ((bar & PCI_BASE_ADDRESS_SPACE) ==
+ PCI_BASE_ADDRESS_SPACE_IO)
+ return bar & PCI_BASE_ADDRESS_IO_MASK;
+ else
+ return bar & PCI_BASE_ADDRESS_MEM_MASK;
+}
+
+int spapr_populate_pci_devices(struct kvm *kvm,
+ uint32_t xics_phandle,
+ void *fdt)
+{
+ int bus_off, node_off = 0, devid, fn, i, n, devices;
+ char nodename[256];
+ struct {
+ uint32_t hi;
+ uint64_t addr;
+ uint64_t size;
+ } __attribute__((packed)) reg[PCI_NUM_REGIONS + 1],
+ assigned_addresses[PCI_NUM_REGIONS];
+ uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
+ struct {
+ uint32_t hi;
+ uint64_t child;
+ uint64_t parent;
+ uint64_t size;
+ } __attribute__((packed)) ranges[] = {
+ {
+ cpu_to_be32(b_ss(1)), cpu_to_be64(0),
+ cpu_to_be64(phb.io_addr),
+ cpu_to_be64(phb.io_size),
+ },
+ {
+ cpu_to_be32(b_ss(2)), cpu_to_be64(0),
+ cpu_to_be64(phb.mem_addr),
+ cpu_to_be64(phb.mem_size),
+ },
+ };
+ uint64_t bus_reg[] = { cpu_to_be64(phb.buid), 0 };
+ uint32_t interrupt_map_mask[] = {
+ cpu_to_be32(b_ddddd(-1)|b_fff(-1)), 0x0, 0x0, 0x0};
+ uint32_t interrupt_map[SPAPR_PCI_NUM_LSI][7];
+
+ /* Start populating the FDT */
+ sprintf(nodename, "pci@%" PRIx64, phb.buid);
+ bus_off = fdt_add_subnode(fdt, 0, nodename);
+ if (bus_off < 0) {
+ die("error making bus subnode, %s\n", fdt_strerror(bus_off));
+ return bus_off;
+ }
+
+ /* Write PHB properties */
+ _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
+ _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
+ _FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3));
+ _FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2));
+ _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
+ _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
+ _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
+ _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof(ranges)));
+ _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
+ _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
+ &interrupt_map_mask, sizeof(interrupt_map_mask)));
+
+ /* Populate PCI devices and allocate IRQs */
+ devices = 0;
+
+ for (devid = 0; devid < PCI_MAX_DEVICES; devid++) {
+ uint32_t *irqmap = interrupt_map[devices];
+ struct pci_device_header *hdr = pci__find_dev(devid);
+
+ if (!hdr)
+ continue;
+
+ fn = 0; /* kvmtool doesn't yet do multifunction devices */
+
+ sprintf(nodename, "pci@%u,%u", devid, fn);
+
+ /* Allocate interrupt from the map */
+ if (devid > SPAPR_PCI_NUM_LSI) {
+ die("Unexpected behaviour in spapr_populate_pci_devices,"
+ "wrong devid %u\n", devid);
+ }
+ irqmap[0] = cpu_to_be32(b_ddddd(devid)|b_fff(fn));
+ irqmap[1] = 0;
+ irqmap[2] = 0;
+ irqmap[3] = 0;
+ irqmap[4] = cpu_to_be32(xics_phandle);
+ /*
+ * This is nasty; the PCI devs are set up such that their own
+ * header's irq_line indicates the direct XICS IRQ number to
+ * use. There REALLY needs to be a hierarchical system in place
+ * to 'raise' an IRQ on the bridge which indexes/looks up which
+ * XICS IRQ to fire.
+ */
+ irqmap[5] = cpu_to_be32(hdr->irq_line);
+ irqmap[6] = cpu_to_be32(0x8);
+
+ /* Add node to FDT */
+ node_off = fdt_add_subnode(fdt, bus_off, nodename);
+ if (node_off < 0) {
+ die("error making node subnode, %s\n", fdt_strerror(bus_off));
+ return node_off;
+ }
+
+ _FDT(fdt_setprop_cell(fdt, node_off, "vendor-id",
+ le16_to_cpu(hdr->vendor_id)));
+ _FDT(fdt_setprop_cell(fdt, node_off, "device-id",
+ le16_to_cpu(hdr->device_id)));
+ _FDT(fdt_setprop_cell(fdt, node_off, "revision-id",
+ hdr->revision_id));
+ _FDT(fdt_setprop_cell(fdt, node_off, "class-code",
+ hdr->class[0] | (hdr->class[1] << 8) | (hdr->class[2] << 16)));
+ _FDT(fdt_setprop_cell(fdt, node_off, "subsystem-id",
+ le16_to_cpu(hdr->subsys_id)));
+ _FDT(fdt_setprop_cell(fdt, node_off, "subsystem-vendor-id",
+ le16_to_cpu(hdr->subsys_vendor_id)));
+
+ /* Config space region comes first */
+ reg[0].hi = cpu_to_be32(
+ b_n(0) |
+ b_p(0) |
+ b_t(0) |
+ b_ss(SS_CONFIG) |
+ b_bbbbbbbb(0) |
+ b_ddddd(devid) |
+ b_fff(fn));
+ reg[0].addr = 0;
+ reg[0].size = 0;
+
+ n = 0;
+ /* Six BARs, no ROM supported, addresses are 32bit */
+ for (i = 0; i < 6; ++i) {
+ if (0 == hdr->bar[i]) {
+ continue;
+ }
+
+ reg[n+1].hi = cpu_to_be32(
+ b_n(0) |
+ b_p(0) |
+ b_t(0) |
+ b_ss(bar_to_ss(le32_to_cpu(hdr->bar[i]))) |
+ b_bbbbbbbb(0) |
+ b_ddddd(devid) |
+ b_fff(fn) |
+ b_rrrrrrrr(bars[i]));
+ reg[n+1].addr = 0;
+ reg[n+1].size = cpu_to_be64(hdr->bar_size[i]);
+
+ assigned_addresses[n].hi = cpu_to_be32(
+ b_n(1) |
+ b_p(0) |
+ b_t(0) |
+ b_ss(bar_to_ss(le32_to_cpu(hdr->bar[i]))) |
+ b_bbbbbbbb(0) |
+ b_ddddd(devid) |
+ b_fff(fn) |
+ b_rrrrrrrr(bars[i]));
+
+ /*
+ * Writing zeroes to assigned_addresses causes the guest kernel to
+ * reassign BARs
+ */
+ assigned_addresses[n].addr = cpu_to_be64(bar_to_addr(le32_to_cpu(hdr->bar[i])));
+ assigned_addresses[n].size = reg[n+1].size;
+
+ ++n;
+ }
+ _FDT(fdt_setprop(fdt, node_off, "reg", reg, sizeof(reg[0])*(n+1)));
+ _FDT(fdt_setprop(fdt, node_off, "assigned-addresses",
+ assigned_addresses,
+ sizeof(assigned_addresses[0])*(n)));
+ _FDT(fdt_setprop_cell(fdt, node_off, "interrupts",
+ hdr->irq_pin));
+
+ /* We don't set ibm,dma-window property as we don't have an IOMMU. */
+
+ ++devices;
+ }
+
+ /* Write interrupt map */
+ _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
+ devices * sizeof(interrupt_map[0])));
+
+ return 0;
+}
diff --git a/tools/kvm/powerpc/spapr_pci.h b/tools/kvm/powerpc/spapr_pci.h
new file mode 100644
index 0000000..8a71f71
--- /dev/null
+++ b/tools/kvm/powerpc/spapr_pci.h
@@ -0,0 +1,38 @@
+/*
+ * SPAPR PHB definitions
+ *
+ * Modifications by Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef SPAPR_PCI_H
+#define SPAPR_PCI_H
+
+#include "kvm/kvm.h"
+#include <inttypes.h>
+
+/* With XICS, we can easily accomodate 1 IRQ per PCI device. */
+
+#define SPAPR_PCI_NUM_LSI 256
+
+struct spapr_phb {
+ uint64_t buid;
+ uint64_t mem_addr;
+ uint64_t mem_size;
+ uint64_t io_addr;
+ uint64_t io_size;
+};
+
+void spapr_create_phb(struct kvm *kvm,
+ const char *busname, uint64_t buid,
+ uint64_t mem_win_addr, uint64_t mem_win_size,
+ uint64_t io_win_addr, uint64_t io_win_size);
+
+int spapr_populate_pci_devices(struct kvm *kvm,
+ uint32_t xics_phandle,
+ void *fdt);
+
+#endif
--
1.7.0.4
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH V2 6/6] kvm tools: Add PPC64 kvm_cpu__emulate_io()
2011-12-13 7:10 [PATCH V2 0/6] Add initial SPAPR PPC64 architecture support Matt Evans
` (4 preceding siblings ...)
2011-12-13 7:10 ` [PATCH V2 5/6] kvm tools: Add PPC64 PCI Host Bridge Matt Evans
@ 2011-12-13 7:10 ` Matt Evans
5 siblings, 0 replies; 12+ messages in thread
From: Matt Evans @ 2011-12-13 7:10 UTC (permalink / raw)
To: kvm, kvm-ppc; +Cc: penberg, asias.hejun, levinsasha928, gorcunov, david, aik
This is the final piece of the puzzle for PPC SPAPR PCI; this
function splits MMIO accesses into the two PHB windows & directs
things to MMIO/IO emulation as appropriate.
Signed-off-by: Matt Evans <matt@ozlabs.org>
---
tools/kvm/Makefile | 1 +
tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h | 10 ++++++++-
tools/kvm/powerpc/kvm-cpu.c | 29 ++++++++++++++++++++++++++
3 files changed, 39 insertions(+), 1 deletions(-)
diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 0d42acf..6f3485b 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -134,6 +134,7 @@ ifeq ($(uname_M), ppc64)
OBJS += powerpc/spapr_hcall.o
OBJS += powerpc/spapr_rtas.o
OBJS += powerpc/spapr_hvcons.o
+ OBJS += powerpc/spapr_pci.o
OBJS += powerpc/xics.o
ARCH_INCLUDE := powerpc/include
CFLAGS += -m64
diff --git a/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h b/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
index c1c6539..7520c04 100644
--- a/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
+++ b/tools/kvm/powerpc/include/kvm/kvm-cpu-arch.h
@@ -14,7 +14,7 @@
/* Architecture-specific kvm_cpu definitions. */
#include <linux/kvm.h> /* for struct kvm_regs */
-
+#include <stdbool.h>
#include <pthread.h>
#define MSR_SF (1UL<<63)
@@ -65,4 +65,12 @@ struct kvm_cpu {
void kvm_cpu__irq(struct kvm_cpu *vcpu, int pin, int level);
+/* This is never actually called on PPC. */
+static inline bool kvm_cpu__emulate_io(struct kvm *kvm, u16 port, void *data, int direction, int size, u32 count)
+{
+ return false;
+}
+
+bool kvm_cpu__emulate_mmio(struct kvm *kvm, u64 phys_addr, u8 *data, u32 len, u8 is_write);
+
#endif /* KVM__KVM_CPU_ARCH_H */
diff --git a/tools/kvm/powerpc/kvm-cpu.c b/tools/kvm/powerpc/kvm-cpu.c
index 5ef1cbf..4da4990 100644
--- a/tools/kvm/powerpc/kvm-cpu.c
+++ b/tools/kvm/powerpc/kvm-cpu.c
@@ -24,6 +24,7 @@
#include <string.h>
#include <errno.h>
#include <stdio.h>
+#include <assert.h>
static int debug_fd;
@@ -173,6 +174,34 @@ bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu)
return ret;
}
+bool kvm_cpu__emulate_mmio(struct kvm *kvm, u64 phys_addr, u8 *data, u32 len, u8 is_write)
+{
+ /*
+ * FIXME: This function will need to be split in order to support
+ * various PowerPC platforms/PHB types, etc. It currently assumes SPAPR
+ * PPC64 guest.
+ */
+ bool ret = false;
+
+ if ((phys_addr >= SPAPR_PCI_IO_WIN_ADDR) &&
+ (phys_addr < SPAPR_PCI_IO_WIN_ADDR + SPAPR_PCI_IO_WIN_SIZE)) {
+ ret = kvm__emulate_io(kvm, phys_addr - SPAPR_PCI_IO_WIN_ADDR,
+ data, is_write ? KVM_EXIT_IO_OUT :
+ KVM_EXIT_IO_IN,
+ len, 1);
+ } else if ((phys_addr >= SPAPR_PCI_MEM_WIN_ADDR) &&
+ (phys_addr < SPAPR_PCI_MEM_WIN_ADDR +
+ SPAPR_PCI_MEM_WIN_SIZE)) {
+ ret = kvm__emulate_mmio(kvm, phys_addr - SPAPR_PCI_MEM_WIN_ADDR,
+ data, len, is_write);
+ } else {
+ pr_warning("MMIO %s unknown address %llx (size %d)!\n",
+ is_write ? "write to" : "read from",
+ phys_addr, len);
+ }
+ return ret;
+}
+
#define CONDSTR_BIT(m, b) (((m) & MSR_##b) ? #b" " : "")
void kvm_cpu__show_registers(struct kvm_cpu *vcpu)
--
1.7.0.4
^ permalink raw reply related [flat|nested] 12+ messages in thread