From mboxrd@z Thu Jan  1 00:00:00 1970
From: Alexander Graf <agraf@suse.de>
Date: Thu, 10 May 2012 17:49:33 +0000
Subject: Re: [PATCH 1/2] ppc64: Rudimentary Support for extra page sizes on server CPUs
Message-Id: <4FABFFAD.3040602@suse.de>
List-Id: <kvm-ppc.vger.kernel.org>
References: <1335505422.4578.3.camel@pasglop> <1335505900.4578.9.camel@pasglop>
In-Reply-To: <1335505900.4578.9.camel@pasglop>
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
To: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: kvm@vger.kernel.org, kvm-ppc <kvm-ppc@vger.kernel.org>

On 04/27/2012 07:51 AM, Benjamin Herrenschmidt wrote:
> More recent Power server chips (i.e. based on the 64 bit hash MMU)
> support more than just the traditional 4k and 16M page sizes.  This
> can get quite complicated, because which page sizes are supported,
> which combinations are supported within an MMU segment and how these
> page sizes are encoded both in the SLB entry and the hash PTE can vary
> depending on the CPU model (they are not specified by the
> architecture).  In addition the firmware or hypervisor may not permit
> use of certain page sizes, for various reasons.  Whether various page
> sizes are supported on KVM, for example, depends on whether the PR or
> HV variant of KVM is in use, and on the page size of the memory
> backing the guest's RAM.
>
> This patch adds information to the CPUState and cpu defs to describe
> the supported page sizes and encodings.  Since TCG does not yet
> support any extended page sizes, we just set this to NULL in the
> static CPU definitions, expanding this to the default 4k and 16M page
> sizes when we initialize the cpu state.  When using KVM, however, we
> instead determine available page sizes using the new
> KVM_PPC_GET_SMMU_INFO call.  For old kernels without that call, we use
> some defaults, with some guesswork which should do the right thing for
> existing HV and PR implementations.  The fallback might not be correct
> for future versions, but that's ok, because they'll have
> KVM_PPC_GET_SMMU_INFO.
>
> Signed-off-by: Benjamin Herrenschmidt<benh@kernel.crashing.org>
> Signed-off-by: David Gibson<david@gibson.dropbear.id.au>
> ---
>
> This patch, to compile, requires a linux-headers/linux/kvm.h which
> has the new capabilities and ioctl's defined (ie, from the kernel
> side patch I posted earlier). This means getting the numbers locked
> down with Avi
>
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 84c9674..d5891e4 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -874,6 +874,29 @@ enum {
>   #define DBELL_PIRTAG_MASK              0x3fff
>
>   /*****************************************************************************/
> +/* Segment page size information, used by recent hash MMUs
> + * The format of this structure mirrors kvm_ppc_smmu_info
> + */
> +
> +#define PPC_PAGE_SIZES_MAX_SZ	8
> +
> +struct ppc_one_page_size {
> +    uint32_t page_shift;  /* Page shift (or 0) */
> +    uint32_t pte_enc;	  /* Encoding in the HPTE (>>12) */
> +};
> +
> +struct ppc_one_seg_page_size {
> +    uint32_t page_shift;  /* Base page shift of segment (or 0) */
> +    uint32_t slb_enc;     /* SLB encoding for BookS */
> +    struct ppc_one_page_size enc[PPC_PAGE_SIZES_MAX_SZ];
> +};
> +
> +struct ppc_segment_page_sizes {
> +    struct ppc_one_seg_page_size sps[PPC_PAGE_SIZES_MAX_SZ];
> +};
> +
> +
> +/*****************************************************************************/
>   /* The whole PowerPC CPU context */
>   #define NB_MMU_MODES 3
>
> @@ -889,6 +912,9 @@ struct ppc_def_t {
>       powerpc_input_t bus_model;
>       uint32_t flags;
>       int bfd_mach;
> +#if defined(TARGET_PPC64)
> +    const struct ppc_segment_page_sizes *sps;
> +#endif
>       void (*init_proc)(CPUPPCState *env);
>       int  (*check_pow)(CPUPPCState *env);
>   };
> @@ -1012,6 +1038,9 @@ struct CPUPPCState {
>       uint32_t flags;
>       uint64_t insns_flags;
>       uint64_t insns_flags2;
> +#if defined(TARGET_PPC64)
> +    struct ppc_segment_page_sizes sps;
> +#endif
>
>   #if defined(TARGET_PPC64)&&  !defined(CONFIG_USER_ONLY)
>       target_phys_addr_t vpa;
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index 0ab7630..77aa186 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -18,6 +18,7 @@
>   #include<sys/types.h>
>   #include<sys/ioctl.h>
>   #include<sys/mman.h>
> +#include<sys/vfs.h>
>
>   #include<linux/kvm.h>
>
> @@ -167,10 +168,208 @@ static int kvm_booke206_tlb_init(CPUPPCState *env)
>       return 0;
>   }
>
> +
> +#if defined (TARGET_PPC64)
> +static void kvm_get_fallback_smmu_info(CPUPPCState *env,
> +                                       struct kvm_ppc_smmu_info *info)
> +{
> +    memset(info, 0, sizeof(*info));
> +
> +    /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
> +     * need to "guess" what the supported page sizes are.
> +     *
> +     * For that to work we make a few assumptions:
> +     *
> +     * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
> +     *   KVM which only supports 4K and 16M pages, but supports them
> +     *   regardless of the backing store characteritics. We also don't
> +     *   support 1T segments.
> +     *
> +     *   This is safe as if HV KVM ever supports that capability or PR
> +     *   KVM grows supports for more page/segment sizes, those versions
> +     *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
> +     *   will not hit this fallback
> +     *
> +     * - Else we are running HV KVM. This means we only support page
> +     *   sizes that fit in the backing store. Additionally we only
> +     *   advertize 64K pages if the processor is ARCH 2.06 and we assume
> +     *   P7 encodings for the SLB and hash table. Here too, we assume
> +     *   support for any newer processor will mean a kernel that
> +     *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
> +     *   this fallback.
> +     */
> +    if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
> +        /* No flags */
> +        info->flags = 0;
> +
> +        /* Standard 4k base page size segment */
> +        info->sps[0].page_shift = 12;
> +        info->sps[0].slb_enc = 0;
> +        info->sps[0].enc[0].page_shift = 12;
> +        info->sps[0].enc[0].pte_enc = 0;
> +
> +        /* Standard 16M large page size segment */
> +        info->sps[1].page_shift = 24;
> +        info->sps[1].slb_enc = SLB_VSID_L;
> +        info->sps[1].enc[0].page_shift = 24;
> +        info->sps[1].enc[0].pte_enc = 0;
> +    } else {
> +	int i = 0;
> +
> +	/* HV KVM has backing store size restrictions */
> +        info->flags = KVM_PPC_PAGE_SIZES_REAL;
> +
> +        if (env->mmu_model&  POWERPC_MMU_1TSEG)
> +            info->flags = KVM_PPC_1T_SEGMENTS;
> +
> +        /* Standard 4k base page size segment */
> +        info->sps[i].page_shift = 12;
> +        info->sps[i].slb_enc = 0;
> +        info->sps[i].enc[0].page_shift = 12;
> +        info->sps[i].enc[0].pte_enc = 0;
> +	i++;
> +
> +        /* 64K on MMU 2.06 */
> +        if (env->mmu_model = POWERPC_MMU_2_06) {
> +            info->sps[i].page_shift = 16;
> +            info->sps[i].slb_enc = 0x110;
> +            info->sps[i].enc[0].page_shift = 16;
> +            info->sps[i].enc[0].pte_enc = 1;
> +	    i++;
> +	}
> +
> +        /* Standard 16M large page size segment */
> +        info->sps[i].page_shift = 24;
> +        info->sps[i].slb_enc = SLB_VSID_L;
> +        info->sps[i].enc[0].page_shift = 24;
> +        info->sps[i].enc[0].pte_enc = 0;
> +    }
> +}
> +
> +static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
> +{
> +    int ret;
> +
> +    if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
> +        ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
> +        if (ret = 0) {
> +            return;
> +        }
> +    }
> +
> +    kvm_get_fallback_smmu_info(env, info);
> +}
> +
> +static long getrampagesize(void)
> +{
> +    struct statfs fs;
> +    int ret;
> +
> +    if (!mem_path) {
> +        /* guest RAM is backed by normal anonymous pages */
> +        return getpagesize();
> +    }
> +
> +    do {
> +        ret = statfs(mem_path,&fs);
> +    } while (ret != 0&&  errno = EINTR);
> +
> +    if (ret != 0) {
> +        fprintf(stderr, "Couldn't statfs() memory path: %s\n",
> +                strerror(errno));
> +        exit(1);
> +    }
> +
> +#define HUGETLBFS_MAGIC       0x958458f6
> +
> +    if (fs.f_type != HUGETLBFS_MAGIC) {
> +        /* Explicit mempath, but it's ordinary pages */
> +        return getpagesize();
> +    }
> +
> +    /* It's hugepage, return the huge page size */
> +    return fs.f_bsize;
> +}
> +
> +static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
> +{
> +    if (!(flags&  KVM_PPC_PAGE_SIZES_REAL))
> +        return true;
> +
> +    return (1ul<<  shift)<= rampgsize;
> +}
> +
> +static void kvm_fixup_page_sizes(CPUPPCState *env)
> +{
> +    static struct kvm_ppc_smmu_info smmu_info;
> +    static bool has_smmu_info;

Do we have to cache these? This is not exactly a fast path...

> +    long rampagesize;
> +    int iq, ik, jq, jk;
> +
> +    /* We only handle page sizes for 64-bit server guests for now */
> +    if (!(env->mmu_model&  POWERPC_MMU_64)) {
> +        return;
> +    }
> +
> +    /* Collect MMU info from kernel if not already */
> +    if (!has_smmu_info) {
> +        kvm_get_smmu_info(env,&smmu_info);
> +        has_smmu_info = true;
> +    }
> +
> +    rampagesize = getrampagesize();
> +
> +    /* Convert to QEMU form */
> +    memset(&env->sps, 0, sizeof(env->sps));
> +
> +    for (ik = iq = 0; ik<  KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
> +        struct ppc_one_seg_page_size *qsps =&env->sps.sps[iq];
> +        struct kvm_ppc_one_seg_page_size *ksps =&smmu_info.sps[ik];
> +
> +        if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
> +                                 ksps->page_shift)) {
> +            continue;
> +        }
> +        qsps->page_shift = ksps->page_shift;
> +        qsps->slb_enc = ksps->slb_enc;
> +        for (jk = jq = 0; jk<  KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
> +            if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
> +                                     ksps->enc[jk].page_shift)) {
> +                continue;
> +            }
> +            qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
> +            qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
> +            if (++jq>= PPC_PAGE_SIZES_MAX_SZ) {
> +                break;
> +            }
> +        }
> +        if (++iq>= PPC_PAGE_SIZES_MAX_SZ) {
> +            break;
> +        }
> +    }
> +    env->slb_nr = smmu_info.slb_size;

What happens in the fallback case here?

> +    if (smmu_info.flags&  KVM_PPC_1T_SEGMENTS) {
> +        env->mmu_model |= POWERPC_MMU_1TSEG;
> +    } else {
> +        env->mmu_model&= ~POWERPC_MMU_1TSEG;
> +    }
> +}
> +#else /* defined (TARGET_PPC64) */
> +
> +static inline void kvm_fixup_page_sizes(CPUPPCState *env)
> +{
> +}
> +
> +#endif /* !defined (TARGET_PPC64) */
> +
>   int kvm_arch_init_vcpu(CPUPPCState *cenv)
>   {
>       int ret;
>
> +    /* Gather server mmu info from KVM and update the CPU state*/
> +    kvm_fixup_page_sizes(cenv);
> +
> +    /* Synchronize sregs with kvm */
>       ret = kvm_arch_sync_sregs(cenv);
>       if (ret) {
>           return ret;
> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
> index 34ecad3..e2f8703 100644
> --- a/target-ppc/kvm_ppc.h
> +++ b/target-ppc/kvm_ppc.h
> @@ -58,6 +58,11 @@ static inline int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_l
>       return -1;
>   }
>
> +static inline int kvmppc_read_segment_page_sizes(uint32_t *prop, int maxcells)
> +{
> +    return -1;
> +}
> +
>   static inline int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
>   {
>       return -1;
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index ba4b84d..6b5079d 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -9926,6 +9926,27 @@ int cpu_ppc_register_internal (CPUPPCState *env, const ppc_def_t *def)
>       env->bfd_mach = def->bfd_mach;
>       env->check_pow = def->check_pow;
>
> +#if defined(TARGET_PPC64)
> +    if (def->sps)
> +        memcpy(&env->sps, def->sps, sizeof(*def->sps));

I never know if *def->... would dereference def or the complete 
construct. How about sizeof(env->sps)?


Alex

> +    else if (env->mmu_model&  POWERPC_MMU_64) {
> +        /* Use default sets of page sizes */
> +        static const struct ppc_segment_page_sizes defsps = {
> +            .sps = {
> +                { .page_shift = 12, /* 4K */
> +                  .slb_enc = 0,
> +                  .enc = { { .page_shift = 12, .pte_enc = 0 } }
> +                },
> +                { .page_shift = 24, /* 16M */
> +                  .slb_enc = 0x100,
> +                  .enc = { { .page_shift = 24, .pte_enc = 0 } }
> +                },
> +            },
> +        };
> +        memcpy(&env->sps,&defsps, sizeof(defsps));
> +    }
> +#endif /* defined(TARGET_PPC64) */
> +
>       if (kvm_enabled()) {
>           if (kvmppc_fixup_cpu(env) != 0) {
>               fprintf(stderr, "Unable to virtualize selected CPU with KVM\n");
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html