From: Alexander Graf <agraf@suse.de>
To: David Gibson <david@gibson.dropbear.id.au>
Cc: paulus@samba.org, qemu-devel@nongnu.org, anton@samba.org
Subject: [Qemu-devel] Re: [PATCH 15/26] Virtual hash page table handling on pSeries machine
Date: Wed, 16 Mar 2011 16:03:47 +0100 [thread overview]
Message-ID: <4D80D153.7050606@suse.de> (raw)
In-Reply-To: <1300251423-6715-16-git-send-email-david@gibson.dropbear.id.au>
On 03/16/2011 05:56 AM, David Gibson wrote:
> On pSeries logical partitions, excepting the old POWER4-style full system
> partitions, the guest does not have direct access to the hardware page
> table. Instead, the pagetable exists in hypervisor memory, and the guest
> must manipulate it with hypercalls.
>
> However, our current pSeries emulation more closely resembles the old
> style where the guest must set up and handle the pagetables itself. This
> patch converts it to act like a modern partition.
>
> This involves two things: first, the hash translation path is modified to
> permit the has table to be stored externally to the emulated machine's
> RAM. The pSeries machine init code configures the CPUs to use this mode.
>
> Secondly, we emulate the PAPR hypercalls for manipulating the external
> hashed page table.
>
> Signed-off-by: David Gibson<dwg@au1.ibm.com>
> ---
> hw/spapr.c | 32 ++++++-
> hw/spapr_hcall.c | 247 +++++++++++++++++++++++++++++++++++++++++++++++++++
> target-ppc/cpu.h | 2 +
> target-ppc/helper.c | 36 ++++++--
> 4 files changed, 305 insertions(+), 12 deletions(-)
>
> diff --git a/hw/spapr.c b/hw/spapr.c
> index 25e4a9e..c3d9286 100644
> --- a/hw/spapr.c
> +++ b/hw/spapr.c
> @@ -50,12 +50,15 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
> sPAPREnvironment *spapr,
> target_phys_addr_t initrd_base,
> target_phys_addr_t initrd_size,
> - const char *kernel_cmdline)
> + const char *kernel_cmdline,
> + long hash_shift)
> {
> void *fdt;
> uint64_t mem_reg_property[] = { 0, cpu_to_be64(ramsize) };
> uint32_t start_prop = cpu_to_be32(initrd_base);
> uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
> + uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
> + char hypertas_prop[] = "hcall-pft\0hcall-term";
> int i;
> char *modelname;
> int ret;
> @@ -138,6 +141,7 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
> * full emu, for kvm we should copy it from the host */
> _FDT((fdt_property_cell(fdt, "clock-frequency", 1000000000)));
> _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
> + _FDT((fdt_property(fdt, "ibm,pft-size", pft_size_prop, sizeof(pft_size_prop))));
> _FDT((fdt_property_string(fdt, "status", "okay")));
> _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
>
> @@ -153,6 +157,14 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
>
> _FDT((fdt_end_node(fdt)));
>
> + /* RTAS */
> + _FDT((fdt_begin_node(fdt, "rtas")));
> +
> + _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
> + sizeof(hypertas_prop))));
> +
> + _FDT((fdt_end_node(fdt)));
> +
> /* vdevice */
> _FDT((fdt_begin_node(fdt, "vdevice")));
>
> @@ -203,12 +215,13 @@ static void ppc_spapr_init(ram_addr_t ram_size,
> const char *cpu_model)
> {
> CPUState *envs[MAX_CPUS];
> - void *fdt;
> + void *fdt, *htab;
> int i;
> ram_addr_t ram_offset;
> target_phys_addr_t fdt_addr;
> uint32_t kernel_base, initrd_base;
> - long kernel_size, initrd_size;
> + long kernel_size, initrd_size, htab_size;
> + long pteg_shift = 17;
> int fdt_size;
> sPAPREnvironment *spapr;
>
> @@ -248,6 +261,16 @@ static void ppc_spapr_init(ram_addr_t ram_size,
> ram_offset = qemu_ram_alloc(NULL, "ppc_spapr.ram", ram_size);
> cpu_register_physical_memory(0, ram_size, ram_offset);
>
> + /* allocate hash page table */
> + htab_size = 1ULL<< (pteg_shift + 7);
Linux makes the htab size depend on the provided amount of ram.
Shouldn't we do the same?
> + htab = qemu_mallocz(htab_size);
> +
> + for (i = 0; i< smp_cpus; i++) {
> + envs[i]->external_htab = htab;
> + envs[i]->htab_base = -1;
> + envs[i]->htab_mask = htab_size - 1;
> + }
> +
> spapr->vio_bus = spapr_vio_bus_init();
>
> for (i = 0; i< MAX_SERIAL_PORTS; i++) {
> @@ -293,7 +316,8 @@ static void ppc_spapr_init(ram_addr_t ram_size,
>
> /* Prepare the device tree */
> fdt = spapr_create_fdt(&fdt_size, ram_size, cpu_model, envs, spapr,
> - initrd_base, initrd_size, kernel_cmdline);
> + initrd_base, initrd_size, kernel_cmdline,
> + pteg_shift + 7);
> if (!fdt) {
> hw_error("Couldn't create pSeries device tree\n");
> exit(1);
> diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c
> index 6ddac00..2b14000 100644
> --- a/hw/spapr_hcall.c
> +++ b/hw/spapr_hcall.c
> @@ -1,8 +1,246 @@
> #include "sysemu.h"
> #include "cpu.h"
> #include "qemu-char.h"
> +#include "sysemu.h"
> +#include "qemu-char.h"
> +#include "exec-all.h"
> #include "hw/spapr.h"
>
> +#define HPTES_PER_GROUP 8
> +
> +#define HPTE_V_SSIZE_SHIFT 62
> +#define HPTE_V_AVPN_SHIFT 7
> +#define HPTE_V_AVPN 0x3fffffffffffff80ULL
> +#define HPTE_V_AVPN_VAL(x) (((x)& HPTE_V_AVPN)>> HPTE_V_AVPN_SHIFT)
> +#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y))& 0xffffffffffffff80UL))
> +#define HPTE_V_BOLTED 0x0000000000000010ULL
> +#define HPTE_V_LOCK 0x0000000000000008ULL
> +#define HPTE_V_LARGE 0x0000000000000004ULL
> +#define HPTE_V_SECONDARY 0x0000000000000002ULL
> +#define HPTE_V_VALID 0x0000000000000001ULL
> +
> +#define HPTE_R_PP0 0x8000000000000000ULL
> +#define HPTE_R_TS 0x4000000000000000ULL
> +#define HPTE_R_KEY_HI 0x3000000000000000ULL
> +#define HPTE_R_RPN_SHIFT 12
> +#define HPTE_R_RPN 0x3ffffffffffff000ULL
> +#define HPTE_R_FLAGS 0x00000000000003ffULL
> +#define HPTE_R_PP 0x0000000000000003ULL
> +#define HPTE_R_N 0x0000000000000004ULL
> +#define HPTE_R_G 0x0000000000000008ULL
> +#define HPTE_R_M 0x0000000000000010ULL
> +#define HPTE_R_I 0x0000000000000020ULL
> +#define HPTE_R_W 0x0000000000000040ULL
> +#define HPTE_R_WIMG 0x0000000000000078ULL
> +#define HPTE_R_C 0x0000000000000080ULL
> +#define HPTE_R_R 0x0000000000000100ULL
> +#define HPTE_R_KEY_LO 0x0000000000000e00ULL
> +
> +#define HPTE_V_1TB_SEG 0x4000000000000000ULL
> +#define HPTE_V_VRMA_MASK 0x4001ffffff000000ULL
> +
> +#define HPTE_V_HVLOCK 0x40ULL
> +
> +static inline int lock_hpte(void *hpte, target_ulong bits)
> +{
> + uint64_t pteh;
> +
> + pteh = ldq_p(hpte);
> +
> + /* FIXME: probably need some sort of lockage for SMP */
Guest SMP doesn't get mapped to host SMP. So you're safe here.
> + if (pteh& bits) {
> + return 0;
> + }
> + stq_p(hpte, pteh | HPTE_V_HVLOCK);
> + return 1;
> +}
> +
> +static target_ulong compute_tlbie_rb(target_ulong v, target_ulong r,
> + target_ulong pte_index)
> +{
> + target_ulong rb, va_low;
> +
> + rb = (v& ~0x7fULL)<< 16; /* AVA field */
> + va_low = pte_index>> 3;
> + if (v& HPTE_V_SECONDARY)
Braces
> + va_low = ~va_low;
> + /* xor vsid from AVA */
> + if (!(v& HPTE_V_1TB_SEG))
Braces
> + va_low ^= v>> 12;
> + else
> + va_low ^= v>> 24;
> + va_low&= 0x7ff;
> + if (v& HPTE_V_LARGE) {
> + rb |= 1; /* L field */
> +#if 0 /* Disable that P7 specific bit for now */
> + if (r& 0xff000) {
> + /* non-16MB large page, must be 64k */
> + /* (masks depend on page size) */
> + rb |= 0x1000; /* page encoding in LP field */
> + rb |= (va_low& 0x7f)<< 16; /* 7b of VA in AVA/LP field */
> + rb |= (va_low& 0xfe); /* AVAL field */
> + }
> +#endif
> + } else {
> + /* 4kB page */
> + rb |= (va_low& 0x7ff)<< 12; /* remaining 11b of AVA */
> + }
> + rb |= (v>> 54)& 0x300; /* B field */
> + return rb;
> +}
> +
> +static target_ulong h_enter(CPUState *env, sPAPREnvironment *spapr,
> + target_ulong opcode, target_ulong *args)
> +{
> + target_ulong flags = args[0];
> + target_ulong pte_index = args[1];
> + target_ulong pteh = args[2];
> + target_ulong ptel = args[3];
> + target_ulong porder;
> + target_ulong i, pa;
> + uint8_t *hpte;
> +
> + /* only handle 4k and 16M pages for now */
> + porder = 12;
> + if (pteh& HPTE_V_LARGE) {
> + if ((ptel& 0xf000) == 0x1000) {
> + /* 64k page */
According to the comment above and the #if 0 in tlbie you don't support
64k pages?
> + porder = 16;
> + } else if ((ptel& 0xff000) == 0) {
> + /* 16M page */
> + porder = 24;
> + /* lowest AVA bit must be 0 for 16M pages */
> + if (pteh& 0x80)
Braces
> + return H_PARAMETER;
> + } else {
> + return H_PARAMETER;
> + }
> + }
> +
> + pa = ptel& HPTE_R_RPN;
> + /* FIXME: bounds check the pa? */
> +
> + /* Check WIMG */
> + if ((ptel& HPTE_R_WIMG) != HPTE_R_M)
Braces
> + return H_PARAMETER;
> + pteh&= ~0x60ULL;
> +
> + if ((pte_index * HASH_PTE_SIZE_64)& ~env->htab_mask)
Braces
> + return H_PARAMETER;
> + if (likely((flags& H_EXACT) == 0)) {
> + pte_index&= ~7ULL;
> + hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
> + for (i = 0; ; ++i) {
> + if (i == 8)
Braces
> + return H_PTEG_FULL;
> + if (((ldq_p(hpte)& HPTE_V_VALID) == 0)&&
> + lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) {
> + break;
> + }
> + hpte += HASH_PTE_SIZE_64;
> + }
> + } else {
> + i = 0;
> + hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
> + if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) {
> + return H_PTEG_FULL;
> + }
> + }
> + stq_p(hpte + (HASH_PTE_SIZE_64/2), ptel);
> + /* eieio(); FIXME: need some sort of barrier for smp? */
see above :)
> + stq_p(hpte, pteh);
> +
> + assert (!(ldq_p(hpte)& HPTE_V_HVLOCK));
> + args[0] = pte_index + i;
> + return H_SUCCESS;
> +}
> +
> +static target_ulong h_remove(CPUState *env, sPAPREnvironment *spapr,
> + target_ulong opcode, target_ulong *args)
> +{
> + target_ulong flags = args[0];
> + target_ulong pte_index = args[1];
> + target_ulong avpn = args[2];
> + uint8_t *hpte;
> + target_ulong v, r, rb;
> +
> + if ((pte_index * HASH_PTE_SIZE_64)& ~env->htab_mask) {
> + return H_PARAMETER;
> + }
> +
> + hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
> + while (!lock_hpte(hpte, HPTE_V_HVLOCK)) {
> + /* We have no real concurrency in qemu soft-emulation, so we
> + * will never actually have a contested lock */
> + assert(0);
> + }
> +
> + v = ldq_p(hpte);
> + r = ldq_p(hpte + (HASH_PTE_SIZE_64/2));
> +
> + if ((v& HPTE_V_VALID) == 0 ||
> + ((flags& H_AVPN)&& (v& ~0x7fULL) != avpn) ||
> + ((flags& H_ANDCOND)&& (v& avpn) != 0)) {
> + stq_p(hpte, v& ~HPTE_V_HVLOCK);
> + assert (!(ldq_p(hpte)& HPTE_V_HVLOCK));
> + return H_NOT_FOUND;
> + }
> + args[0] = v& ~HPTE_V_HVLOCK;
> + args[1] = r;
> + stq_p(hpte, 0);
> + rb = compute_tlbie_rb(v, r, pte_index);
> +// ppc_tlb_invalidate_one(env, rb);
Huh?
> + tlb_flush(env, 1);
> + assert (!(ldq_p(hpte)& HPTE_V_HVLOCK));
> + return H_SUCCESS;
> +}
> +
> +static target_ulong h_protect(CPUState *env, sPAPREnvironment *spapr,
> + target_ulong opcode, target_ulong *args)
> +{
> + target_ulong flags = args[0];
> + target_ulong pte_index = args[1];
> + target_ulong avpn = args[2];
> + uint8_t *hpte;
> + target_ulong v, r, rb;
> +
> + if ((pte_index * HASH_PTE_SIZE_64)& ~env->htab_mask) {
> + return H_PARAMETER;
> + }
> +
> + hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
> + while (!lock_hpte(hpte, HPTE_V_HVLOCK)) {
> + /* We have no real concurrency in qemu soft-emulation, so we
> + * will never actually have a contested lock */
> + assert(0);
> + }
> +
> + v = ldq_p(hpte);
> + r = ldq_p(hpte + (HASH_PTE_SIZE_64/2));
> +
> + if ((v& HPTE_V_VALID) == 0 ||
> + ((flags& H_AVPN)&& (v& ~0x7fULL) != avpn)) {
> + stq_p(hpte, v& ~HPTE_V_HVLOCK);
> + assert (!(ldq_p(hpte)& HPTE_V_HVLOCK));
> + return H_NOT_FOUND;
> + }
> +
> + r&= ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
> + HPTE_R_KEY_HI | HPTE_R_KEY_LO);
> + r |= (flags<< 55)& HPTE_R_PP0;
> + r |= (flags<< 48)& HPTE_R_KEY_HI;
> + r |= flags& (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
> + rb = compute_tlbie_rb(v, r, pte_index);
> + stq_p(hpte, v& ~HPTE_V_VALID);
> + //ppc_tlb_invalidate_one(env, rb);
Huh?
> + tlb_flush(env, 1);
Wow, why do you need a full tlb flush here?
Alex
next prev parent reply other threads:[~2011-03-16 15:03 UTC|newest]
Thread overview: 82+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-03-16 4:56 [Qemu-devel] Implement emulation of pSeries logical partitions (v3) David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 01/26] Clean up PowerPC SLB handling code David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 02/26] Allow qemu_devtree_setprop() to take arbitrary values David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 03/26] Add a hook to allow hypercalls to be emulated on PowerPC David Gibson
2011-03-16 13:46 ` [Qemu-devel] " Alexander Graf
2011-03-16 16:58 ` Stefan Hajnoczi
2011-03-17 2:26 ` David Gibson
2011-03-16 20:44 ` [Qemu-devel] " Anthony Liguori
2011-03-17 4:55 ` David Gibson
2011-03-17 13:20 ` Anthony Liguori
2011-03-18 4:03 ` David Gibson
2011-03-18 6:57 ` Alexander Graf
2011-03-16 4:56 ` [Qemu-devel] [PATCH 04/26] Implement PowerPC slbmfee and slbmfev instructions David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 05/26] Implement missing parts of the logic for the POWER PURR David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 06/26] Correct ppc popcntb logic, implement popcntw and popcntd David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 07/26] Clean up slb_lookup() function David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 08/26] Parse SDR1 on mtspr instead of at translate time David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 09/26] Use "hash" more consistently in ppc mmu code David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 10/26] Better factor the ppc hash translation path David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 11/26] Support 1T segments on ppc David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 12/26] Add POWER7 support for ppc David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 13/26] Start implementing pSeries logical partition machine David Gibson
2011-03-16 14:30 ` [Qemu-devel] " Alexander Graf
2011-03-16 21:59 ` [Qemu-devel] " Anthony Liguori
2011-03-16 23:46 ` Alexander Graf
2011-03-17 3:08 ` David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 14/26] Implement the bus structure for PAPR virtual IO David Gibson
2011-03-16 14:43 ` [Qemu-devel] " Alexander Graf
2011-03-16 22:04 ` [Qemu-devel] " Anthony Liguori
2011-03-17 3:19 ` David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 15/26] Virtual hash page table handling on pSeries machine David Gibson
2011-03-16 15:03 ` Alexander Graf [this message]
2011-03-17 1:03 ` [Qemu-devel] Re: [PATCH 15/26] Virtual hash page table handling on pSeries machine' David Gibson
2011-03-17 7:35 ` Alexander Graf
2011-03-16 4:56 ` [Qemu-devel] [PATCH 16/26] Implement hcall based RTAS for pSeries machines David Gibson
2011-03-16 15:08 ` [Qemu-devel] " Alexander Graf
2011-03-17 1:22 ` David Gibson
2011-03-17 7:36 ` Alexander Graf
2011-03-16 22:08 ` [Qemu-devel] " Anthony Liguori
2011-03-16 4:56 ` [Qemu-devel] [PATCH 17/26] Implement assorted pSeries hcalls and RTAS methods David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 18/26] Implement the PAPR (pSeries) virtualized interrupt controller (xics) David Gibson
2011-03-16 15:47 ` [Qemu-devel] " Alexander Graf
2011-03-17 1:29 ` David Gibson
2011-03-17 7:37 ` Alexander Graf
2011-03-16 22:16 ` [Qemu-devel] " Anthony Liguori
2011-03-17 1:34 ` David Gibson
2011-03-17 13:13 ` Anthony Liguori
2011-03-23 3:48 ` David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 19/26] Add PAPR H_VIO_SIGNAL hypercall and infrastructure for VIO interrupts David Gibson
2011-03-16 15:49 ` [Qemu-devel] " Alexander Graf
2011-03-17 1:38 ` David Gibson
2011-03-17 7:38 ` Alexander Graf
2011-03-16 4:56 ` [Qemu-devel] [PATCH 20/26] Add (virtual) interrupt to PAPR virtual tty device David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 21/26] Implement TCE translation for sPAPR VIO David Gibson
2011-03-16 16:03 ` [Qemu-devel] " Alexander Graf
2011-03-16 20:05 ` Benjamin Herrenschmidt
2011-03-16 20:21 ` Anthony Liguori
2011-03-16 20:22 ` Anthony Liguori
2011-03-16 20:36 ` Benjamin Herrenschmidt
2011-03-17 1:43 ` David Gibson
2011-03-16 22:20 ` [Qemu-devel] " Anthony Liguori
2011-03-18 1:58 ` David Gibson
2011-03-16 4:56 ` [Qemu-devel] [PATCH 22/26] Implement sPAPR Virtual LAN (ibmveth) David Gibson
2011-03-16 16:12 ` [Qemu-devel] " Alexander Graf
2011-03-17 2:04 ` David Gibson
2011-03-16 22:29 ` [Qemu-devel] " Anthony Liguori
2011-03-17 2:09 ` David Gibson
2011-03-16 4:57 ` [Qemu-devel] [PATCH 23/26] Implement PAPR CRQ hypercalls David Gibson
2011-03-16 16:15 ` [Qemu-devel] " Alexander Graf
2011-03-16 4:57 ` [Qemu-devel] [PATCH 24/26] Implement PAPR virtual SCSI interface (ibmvscsi) David Gibson
2011-03-16 16:41 ` [Qemu-devel] " Alexander Graf
2011-03-16 16:51 ` Anthony Liguori
2011-03-16 20:08 ` Benjamin Herrenschmidt
2011-03-16 20:19 ` Anthony Liguori
2011-03-16 4:57 ` [Qemu-devel] [PATCH 25/26] Add a PAPR TCE-bypass mechanism for the pSeries machine David Gibson
2011-03-16 16:43 ` [Qemu-devel] " Alexander Graf
2011-03-17 2:21 ` David Gibson
2011-03-17 3:25 ` Benjamin Herrenschmidt
2011-03-17 7:44 ` Alexander Graf
2011-03-17 8:44 ` Benjamin Herrenschmidt
2011-03-17 9:37 ` Alexander Graf
2011-03-16 4:57 ` [Qemu-devel] [PATCH 26/26] Implement PAPR VPA functions for pSeries shared processor partitions David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4D80D153.7050606@suse.de \
--to=agraf@suse.de \
--cc=anton@samba.org \
--cc=david@gibson.dropbear.id.au \
--cc=paulus@samba.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).