From mboxrd@z Thu Jan 1 00:00:00 1970 From: Alexander Graf Date: Thu, 10 May 2012 17:49:33 +0000 Subject: Re: [PATCH 1/2] ppc64: Rudimentary Support for extra page sizes on server CPUs Message-Id: <4FABFFAD.3040602@suse.de> List-Id: References: <1335505422.4578.3.camel@pasglop> <1335505900.4578.9.camel@pasglop> In-Reply-To: <1335505900.4578.9.camel@pasglop> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit To: Benjamin Herrenschmidt Cc: kvm@vger.kernel.org, kvm-ppc On 04/27/2012 07:51 AM, Benjamin Herrenschmidt wrote: > More recent Power server chips (i.e. based on the 64 bit hash MMU) > support more than just the traditional 4k and 16M page sizes. This > can get quite complicated, because which page sizes are supported, > which combinations are supported within an MMU segment and how these > page sizes are encoded both in the SLB entry and the hash PTE can vary > depending on the CPU model (they are not specified by the > architecture). In addition the firmware or hypervisor may not permit > use of certain page sizes, for various reasons. Whether various page > sizes are supported on KVM, for example, depends on whether the PR or > HV variant of KVM is in use, and on the page size of the memory > backing the guest's RAM. > > This patch adds information to the CPUState and cpu defs to describe > the supported page sizes and encodings. Since TCG does not yet > support any extended page sizes, we just set this to NULL in the > static CPU definitions, expanding this to the default 4k and 16M page > sizes when we initialize the cpu state. When using KVM, however, we > instead determine available page sizes using the new > KVM_PPC_GET_SMMU_INFO call. For old kernels without that call, we use > some defaults, with some guesswork which should do the right thing for > existing HV and PR implementations. The fallback might not be correct > for future versions, but that's ok, because they'll have > KVM_PPC_GET_SMMU_INFO. > > Signed-off-by: Benjamin Herrenschmidt > Signed-off-by: David Gibson > --- > > This patch, to compile, requires a linux-headers/linux/kvm.h which > has the new capabilities and ioctl's defined (ie, from the kernel > side patch I posted earlier). This means getting the numbers locked > down with Avi > > diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h > index 84c9674..d5891e4 100644 > --- a/target-ppc/cpu.h > +++ b/target-ppc/cpu.h > @@ -874,6 +874,29 @@ enum { > #define DBELL_PIRTAG_MASK 0x3fff > > /*****************************************************************************/ > +/* Segment page size information, used by recent hash MMUs > + * The format of this structure mirrors kvm_ppc_smmu_info > + */ > + > +#define PPC_PAGE_SIZES_MAX_SZ 8 > + > +struct ppc_one_page_size { > + uint32_t page_shift; /* Page shift (or 0) */ > + uint32_t pte_enc; /* Encoding in the HPTE (>>12) */ > +}; > + > +struct ppc_one_seg_page_size { > + uint32_t page_shift; /* Base page shift of segment (or 0) */ > + uint32_t slb_enc; /* SLB encoding for BookS */ > + struct ppc_one_page_size enc[PPC_PAGE_SIZES_MAX_SZ]; > +}; > + > +struct ppc_segment_page_sizes { > + struct ppc_one_seg_page_size sps[PPC_PAGE_SIZES_MAX_SZ]; > +}; > + > + > +/*****************************************************************************/ > /* The whole PowerPC CPU context */ > #define NB_MMU_MODES 3 > > @@ -889,6 +912,9 @@ struct ppc_def_t { > powerpc_input_t bus_model; > uint32_t flags; > int bfd_mach; > +#if defined(TARGET_PPC64) > + const struct ppc_segment_page_sizes *sps; > +#endif > void (*init_proc)(CPUPPCState *env); > int (*check_pow)(CPUPPCState *env); > }; > @@ -1012,6 +1038,9 @@ struct CPUPPCState { > uint32_t flags; > uint64_t insns_flags; > uint64_t insns_flags2; > +#if defined(TARGET_PPC64) > + struct ppc_segment_page_sizes sps; > +#endif > > #if defined(TARGET_PPC64)&& !defined(CONFIG_USER_ONLY) > target_phys_addr_t vpa; > diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c > index 0ab7630..77aa186 100644 > --- a/target-ppc/kvm.c > +++ b/target-ppc/kvm.c > @@ -18,6 +18,7 @@ > #include > #include > #include > +#include > > #include > > @@ -167,10 +168,208 @@ static int kvm_booke206_tlb_init(CPUPPCState *env) > return 0; > } > > + > +#if defined (TARGET_PPC64) > +static void kvm_get_fallback_smmu_info(CPUPPCState *env, > + struct kvm_ppc_smmu_info *info) > +{ > + memset(info, 0, sizeof(*info)); > + > + /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so > + * need to "guess" what the supported page sizes are. > + * > + * For that to work we make a few assumptions: > + * > + * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR" > + * KVM which only supports 4K and 16M pages, but supports them > + * regardless of the backing store characteritics. We also don't > + * support 1T segments. > + * > + * This is safe as if HV KVM ever supports that capability or PR > + * KVM grows supports for more page/segment sizes, those versions > + * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we > + * will not hit this fallback > + * > + * - Else we are running HV KVM. This means we only support page > + * sizes that fit in the backing store. Additionally we only > + * advertize 64K pages if the processor is ARCH 2.06 and we assume > + * P7 encodings for the SLB and hash table. Here too, we assume > + * support for any newer processor will mean a kernel that > + * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit > + * this fallback. > + */ > + if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) { > + /* No flags */ > + info->flags = 0; > + > + /* Standard 4k base page size segment */ > + info->sps[0].page_shift = 12; > + info->sps[0].slb_enc = 0; > + info->sps[0].enc[0].page_shift = 12; > + info->sps[0].enc[0].pte_enc = 0; > + > + /* Standard 16M large page size segment */ > + info->sps[1].page_shift = 24; > + info->sps[1].slb_enc = SLB_VSID_L; > + info->sps[1].enc[0].page_shift = 24; > + info->sps[1].enc[0].pte_enc = 0; > + } else { > + int i = 0; > + > + /* HV KVM has backing store size restrictions */ > + info->flags = KVM_PPC_PAGE_SIZES_REAL; > + > + if (env->mmu_model& POWERPC_MMU_1TSEG) > + info->flags = KVM_PPC_1T_SEGMENTS; > + > + /* Standard 4k base page size segment */ > + info->sps[i].page_shift = 12; > + info->sps[i].slb_enc = 0; > + info->sps[i].enc[0].page_shift = 12; > + info->sps[i].enc[0].pte_enc = 0; > + i++; > + > + /* 64K on MMU 2.06 */ > + if (env->mmu_model = POWERPC_MMU_2_06) { > + info->sps[i].page_shift = 16; > + info->sps[i].slb_enc = 0x110; > + info->sps[i].enc[0].page_shift = 16; > + info->sps[i].enc[0].pte_enc = 1; > + i++; > + } > + > + /* Standard 16M large page size segment */ > + info->sps[i].page_shift = 24; > + info->sps[i].slb_enc = SLB_VSID_L; > + info->sps[i].enc[0].page_shift = 24; > + info->sps[i].enc[0].pte_enc = 0; > + } > +} > + > +static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info) > +{ > + int ret; > + > + if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { > + ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info); > + if (ret = 0) { > + return; > + } > + } > + > + kvm_get_fallback_smmu_info(env, info); > +} > + > +static long getrampagesize(void) > +{ > + struct statfs fs; > + int ret; > + > + if (!mem_path) { > + /* guest RAM is backed by normal anonymous pages */ > + return getpagesize(); > + } > + > + do { > + ret = statfs(mem_path,&fs); > + } while (ret != 0&& errno = EINTR); > + > + if (ret != 0) { > + fprintf(stderr, "Couldn't statfs() memory path: %s\n", > + strerror(errno)); > + exit(1); > + } > + > +#define HUGETLBFS_MAGIC 0x958458f6 > + > + if (fs.f_type != HUGETLBFS_MAGIC) { > + /* Explicit mempath, but it's ordinary pages */ > + return getpagesize(); > + } > + > + /* It's hugepage, return the huge page size */ > + return fs.f_bsize; > +} > + > +static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) > +{ > + if (!(flags& KVM_PPC_PAGE_SIZES_REAL)) > + return true; > + > + return (1ul<< shift)<= rampgsize; > +} > + > +static void kvm_fixup_page_sizes(CPUPPCState *env) > +{ > + static struct kvm_ppc_smmu_info smmu_info; > + static bool has_smmu_info; Do we have to cache these? This is not exactly a fast path... > + long rampagesize; > + int iq, ik, jq, jk; > + > + /* We only handle page sizes for 64-bit server guests for now */ > + if (!(env->mmu_model& POWERPC_MMU_64)) { > + return; > + } > + > + /* Collect MMU info from kernel if not already */ > + if (!has_smmu_info) { > + kvm_get_smmu_info(env,&smmu_info); > + has_smmu_info = true; > + } > + > + rampagesize = getrampagesize(); > + > + /* Convert to QEMU form */ > + memset(&env->sps, 0, sizeof(env->sps)); > + > + for (ik = iq = 0; ik< KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { > + struct ppc_one_seg_page_size *qsps =&env->sps.sps[iq]; > + struct kvm_ppc_one_seg_page_size *ksps =&smmu_info.sps[ik]; > + > + if (!kvm_valid_page_size(smmu_info.flags, rampagesize, > + ksps->page_shift)) { > + continue; > + } > + qsps->page_shift = ksps->page_shift; > + qsps->slb_enc = ksps->slb_enc; > + for (jk = jq = 0; jk< KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { > + if (!kvm_valid_page_size(smmu_info.flags, rampagesize, > + ksps->enc[jk].page_shift)) { > + continue; > + } > + qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; > + qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; > + if (++jq>= PPC_PAGE_SIZES_MAX_SZ) { > + break; > + } > + } > + if (++iq>= PPC_PAGE_SIZES_MAX_SZ) { > + break; > + } > + } > + env->slb_nr = smmu_info.slb_size; What happens in the fallback case here? > + if (smmu_info.flags& KVM_PPC_1T_SEGMENTS) { > + env->mmu_model |= POWERPC_MMU_1TSEG; > + } else { > + env->mmu_model&= ~POWERPC_MMU_1TSEG; > + } > +} > +#else /* defined (TARGET_PPC64) */ > + > +static inline void kvm_fixup_page_sizes(CPUPPCState *env) > +{ > +} > + > +#endif /* !defined (TARGET_PPC64) */ > + > int kvm_arch_init_vcpu(CPUPPCState *cenv) > { > int ret; > > + /* Gather server mmu info from KVM and update the CPU state*/ > + kvm_fixup_page_sizes(cenv); > + > + /* Synchronize sregs with kvm */ > ret = kvm_arch_sync_sregs(cenv); > if (ret) { > return ret; > diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h > index 34ecad3..e2f8703 100644 > --- a/target-ppc/kvm_ppc.h > +++ b/target-ppc/kvm_ppc.h > @@ -58,6 +58,11 @@ static inline int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_l > return -1; > } > > +static inline int kvmppc_read_segment_page_sizes(uint32_t *prop, int maxcells) > +{ > + return -1; > +} > + > static inline int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level) > { > return -1; > diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c > index ba4b84d..6b5079d 100644 > --- a/target-ppc/translate_init.c > +++ b/target-ppc/translate_init.c > @@ -9926,6 +9926,27 @@ int cpu_ppc_register_internal (CPUPPCState *env, const ppc_def_t *def) > env->bfd_mach = def->bfd_mach; > env->check_pow = def->check_pow; > > +#if defined(TARGET_PPC64) > + if (def->sps) > + memcpy(&env->sps, def->sps, sizeof(*def->sps)); I never know if *def->... would dereference def or the complete construct. How about sizeof(env->sps)? Alex > + else if (env->mmu_model& POWERPC_MMU_64) { > + /* Use default sets of page sizes */ > + static const struct ppc_segment_page_sizes defsps = { > + .sps = { > + { .page_shift = 12, /* 4K */ > + .slb_enc = 0, > + .enc = { { .page_shift = 12, .pte_enc = 0 } } > + }, > + { .page_shift = 24, /* 16M */ > + .slb_enc = 0x100, > + .enc = { { .page_shift = 24, .pte_enc = 0 } } > + }, > + }, > + }; > + memcpy(&env->sps,&defsps, sizeof(defsps)); > + } > +#endif /* defined(TARGET_PPC64) */ > + > if (kvm_enabled()) { > if (kvmppc_fixup_cpu(env) != 0) { > fprintf(stderr, "Unable to virtualize selected CPU with KVM\n"); > > > -- > To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html