From mboxrd@z Thu Jan 1 00:00:00 1970 From: Alexander Graf Subject: Re: [RFC PATCH 2/4] KVM: PPC: Book3E: Handle LRAT error exception Date: Fri, 04 Jul 2014 10:15:07 +0200 Message-ID: <53B6628B.6020001@suse.de> References: <1404398727-12844-1-git-send-email-mihai.caraman@freescale.com> <1404398727-12844-3-git-send-email-mihai.caraman@freescale.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Cc: kvm@vger.kernel.org, linuxppc-dev@lists.ozlabs.org To: Mihai Caraman , kvm-ppc@vger.kernel.org Return-path: In-Reply-To: <1404398727-12844-3-git-send-email-mihai.caraman@freescale.com> Sender: kvm-ppc-owner@vger.kernel.org List-Id: kvm.vger.kernel.org On 03.07.14 16:45, Mihai Caraman wrote: > Handle LRAT error exception with support for lrat mapping and invalidation. > > Signed-off-by: Mihai Caraman > --- > arch/powerpc/include/asm/kvm_host.h | 1 + > arch/powerpc/include/asm/kvm_ppc.h | 2 + > arch/powerpc/include/asm/mmu-book3e.h | 3 + > arch/powerpc/include/asm/reg_booke.h | 13 ++++ > arch/powerpc/kernel/asm-offsets.c | 1 + > arch/powerpc/kvm/booke.c | 40 +++++++++++ > arch/powerpc/kvm/bookehv_interrupts.S | 9 ++- > arch/powerpc/kvm/e500_mmu_host.c | 125 ++++++++++++++++++++++++++++++++++ > arch/powerpc/kvm/e500mc.c | 2 + > 9 files changed, 195 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h > index bb66d8b..7b6b2ec 100644 > --- a/arch/powerpc/include/asm/kvm_host.h > +++ b/arch/powerpc/include/asm/kvm_host.h > @@ -433,6 +433,7 @@ struct kvm_vcpu_arch { > u32 eplc; > u32 epsc; > u32 oldpir; > + u64 fault_lper; > #endif > > #if defined(CONFIG_BOOKE) > diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h > index 9c89cdd..2730a29 100644 > --- a/arch/powerpc/include/asm/kvm_ppc.h > +++ b/arch/powerpc/include/asm/kvm_ppc.h > @@ -86,6 +86,8 @@ extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, > gva_t eaddr); > extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu); > extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu); > +extern void kvmppc_lrat_map(struct kvm_vcpu *vcpu, gfn_t gfn); > +extern void kvmppc_lrat_invalidate(struct kvm_vcpu *vcpu); > > extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, > unsigned int id); > diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h > index 088fd9f..ac6acf7 100644 > --- a/arch/powerpc/include/asm/mmu-book3e.h > +++ b/arch/powerpc/include/asm/mmu-book3e.h > @@ -40,6 +40,8 @@ > > /* MAS registers bit definitions */ > > +#define MAS0_ATSEL 0x80000000 > +#define MAS0_ATSEL_SHIFT 31 > #define MAS0_TLBSEL_MASK 0x30000000 > #define MAS0_TLBSEL_SHIFT 28 > #define MAS0_TLBSEL(x) (((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK) > @@ -53,6 +55,7 @@ > #define MAS0_WQ_CLR_RSRV 0x00002000 > > #define MAS1_VALID 0x80000000 > +#define MAS1_VALID_SHIFT 31 > #define MAS1_IPROT 0x40000000 > #define MAS1_TID(x) (((x) << 16) & 0x3FFF0000) > #define MAS1_IND 0x00002000 > diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h > index 75bda23..783d617 100644 > --- a/arch/powerpc/include/asm/reg_booke.h > +++ b/arch/powerpc/include/asm/reg_booke.h > @@ -43,6 +43,8 @@ > > /* Special Purpose Registers (SPRNs)*/ > #define SPRN_DECAR 0x036 /* Decrementer Auto Reload Register */ > +#define SPRN_LPER 0x038 /* Logical Page Exception Register */ > +#define SPRN_LPERU 0x039 /* Logical Page Exception Register Upper */ > #define SPRN_IVPR 0x03F /* Interrupt Vector Prefix Register */ > #define SPRN_USPRG0 0x100 /* User Special Purpose Register General 0 */ > #define SPRN_SPRG3R 0x103 /* Special Purpose Register General 3 Read */ > @@ -358,6 +360,9 @@ > #define ESR_ILK 0x00100000 /* Instr. Cache Locking */ > #define ESR_PUO 0x00040000 /* Unimplemented Operation exception */ > #define ESR_BO 0x00020000 /* Byte Ordering */ > +#define ESR_DATA 0x00000400 /* Page Table Data Access */ > +#define ESR_TLBI 0x00000200 /* Page Table TLB Ineligible */ > +#define ESR_PT 0x00000100 /* Page Table Translation */ > #define ESR_SPV 0x00000080 /* Signal Processing operation */ > > /* Bit definitions related to the DBCR0. */ > @@ -649,6 +654,14 @@ > #define EPC_EPID 0x00003fff > #define EPC_EPID_SHIFT 0 > > +/* Bit definitions for LPER */ > +#define LPER_ALPN 0x000FFFFFFFFFF000ULL > +#define LPER_ALPN_SHIFT 12 > +#define LPER_WIMGE 0x00000F80 > +#define LPER_WIMGE_SHIFT 7 > +#define LPER_LPS 0x0000000F > +#define LPER_LPS_SHIFT 0 > + > /* > * The IBM-403 is an even more odd special case, as it is much > * older than the IBM-405 series. We put these down here incase someone > diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c > index f5995a9..be6e329 100644 > --- a/arch/powerpc/kernel/asm-offsets.c > +++ b/arch/powerpc/kernel/asm-offsets.c > @@ -713,6 +713,7 @@ int main(void) > DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4)); > DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6)); > DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc)); > + DEFINE(VCPU_FAULT_LPER, offsetof(struct kvm_vcpu, arch.fault_lper)); > #endif > > #ifdef CONFIG_KVM_EXIT_TIMING > diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c > index a192975..ab1077f 100644 > --- a/arch/powerpc/kvm/booke.c > +++ b/arch/powerpc/kvm/booke.c > @@ -1286,6 +1286,46 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, > break; > } > > +#ifdef CONFIG_KVM_BOOKE_HV > + case BOOKE_INTERRUPT_LRAT_ERROR: > + { > + gfn_t gfn; > + > + /* > + * Guest TLB management instructions (EPCR.DGTMI == 0) is not > + * supported for now > + */ > + if (!(vcpu->arch.fault_esr & ESR_PT)) { > + WARN(1, "%s: Guest TLB management instructions not supported!\n", __func__); Wouldn't this allow a guest to flood the host's kernel log? > + break; > + } > + > + gfn = (vcpu->arch.fault_lper & LPER_ALPN) >> LPER_ALPN_SHIFT; Maybe add an #ifdef and #error check to make sure that LPER_ALPN_SHIFT == PAGE_SHIFT? > + > + idx = srcu_read_lock(&vcpu->kvm->srcu); > + > + if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { > + kvmppc_lrat_map(vcpu, gfn); > + r = RESUME_GUEST; > + } else if (vcpu->arch.fault_esr & ESR_DATA) { > + vcpu->arch.paddr_accessed = (gfn << PAGE_SHIFT) > + | (vcpu->arch.fault_dear & (PAGE_SIZE - 1)); > + vcpu->arch.vaddr_accessed = > + vcpu->arch.fault_dear; > + > + r = kvmppc_emulate_mmio(run, vcpu); > + kvmppc_account_exit(vcpu, MMIO_EXITS); It's a shame we have to duplicate that logic from the normal TLB miss path, but I can't see any good way to combine them either. > + } else { > + kvmppc_booke_queue_irqprio(vcpu, > + BOOKE_IRQPRIO_MACHINE_CHECK); > + r = RESUME_GUEST; > + } > + > + srcu_read_unlock(&vcpu->kvm->srcu, idx); > + break; > + } > +#endif > + > case BOOKE_INTERRUPT_DEBUG: { > r = kvmppc_handle_debug(run, vcpu); > if (r == RESUME_HOST) > diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S > index b3ecdd6..341c3a8 100644 > --- a/arch/powerpc/kvm/bookehv_interrupts.S > +++ b/arch/powerpc/kvm/bookehv_interrupts.S > @@ -64,6 +64,7 @@ > #define NEED_EMU 0x00000001 /* emulation -- save nv regs */ > #define NEED_DEAR 0x00000002 /* save faulting DEAR */ > #define NEED_ESR 0x00000004 /* save faulting ESR */ > +#define NEED_LPER 0x00000008 /* save faulting LPER */ > > /* > * On entry: > @@ -203,6 +204,12 @@ > PPC_STL r9, VCPU_FAULT_DEAR(r4) > .endif > > + /* Only suppported on 64-bit cores for now */ > + .if \flags & NEED_LPER > + mfspr r7, SPRN_LPER > + std r7, VCPU_FAULT_LPER(r4) > + .endif > + > b kvmppc_resume_host > .endm > > @@ -325,7 +332,7 @@ kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \ > kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \ > SPRN_CSRR0, SPRN_CSRR1, 0 > kvm_handler BOOKE_INTERRUPT_LRAT_ERROR, EX_PARAMS(GEN), \ > - SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) > + SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR | NEED_LPER) > #else > /* > * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h > diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c > index 79677d7..be1454b 100644 > --- a/arch/powerpc/kvm/e500_mmu_host.c > +++ b/arch/powerpc/kvm/e500_mmu_host.c > @@ -95,6 +95,131 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, > stlbe->mas2, stlbe->mas7_3); > } > > +#ifdef CONFIG_KVM_BOOKE_HV > +#ifdef CONFIG_64BIT > +static inline int lrat_next(void) No inline in .c files please. Just only make them "static". > +{ > + int this, next; > + > + this = local_paca->tcd.lrat_next; > + next = (this + 1) % local_paca->tcd.lrat_max; Can we assume that lrat_max is always a power of 2? IIRC modulo functions with variables can be quite expensive. So if we can instead do next = (this + 1) & local_paca->tcd.lrat_mask; we should be faster and not rely on division helpers. > + local_paca->tcd.lrat_next = next; > + > + return this; > +} > + > +static inline int lrat_size(void) > +{ > + return local_paca->tcd.lrat_max; > +} > +#else > +/* LRAT is only supported in 64-bit kernel for now */ > +static inline int lrat_next(void) > +{ > + BUG(); > +} > + > +static inline int lrat_size(void) > +{ > + return 0; > +} > +#endif > + > +void write_host_lrate(int tsize, gfn_t gfn, unsigned long pfn, uint32_t lpid, > + int valid, int lrat_entry) > +{ > + struct kvm_book3e_206_tlb_entry stlbe; > + int esel = lrat_entry; > + unsigned long flags; > + > + stlbe.mas1 = (valid ? MAS1_VALID : 0) | MAS1_TSIZE(tsize); > + stlbe.mas2 = ((u64)gfn << PAGE_SHIFT); > + stlbe.mas7_3 = ((u64)pfn << PAGE_SHIFT); > + stlbe.mas8 = MAS8_TGS | lpid; > + > + local_irq_save(flags); > + /* book3e_tlb_lock(); */ Hm? > + > + if (esel == -1) > + esel = lrat_next(); > + __write_host_tlbe(&stlbe, MAS0_ATSEL | MAS0_ESEL(esel)); > + > + /* book3e_tlb_unlock(); */ > + local_irq_restore(flags); > +} > + > +void kvmppc_lrat_map(struct kvm_vcpu *vcpu, gfn_t gfn) > +{ > + struct kvm_memory_slot *slot; > + unsigned long pfn; > + unsigned long hva; > + struct vm_area_struct *vma; > + unsigned long psize; > + int tsize; > + unsigned long tsize_pages; > + > + slot = gfn_to_memslot(vcpu->kvm, gfn); > + if (!slot) { > + pr_err_ratelimited("%s: couldn't find memslot for gfn %lx!\n", > + __func__, (long)gfn); > + return; > + } > + > + hva = slot->userspace_addr; > + > + down_read(¤t->mm->mmap_sem); > + vma = find_vma(current->mm, hva); > + if (vma && (hva >= vma->vm_start)) { > + psize = vma_kernel_pagesize(vma); > + } else { > + pr_err_ratelimited("%s: couldn't find virtual memory address for gfn %lx!\n", __func__, (long)gfn); > + return; > + } > + up_read(¤t->mm->mmap_sem); > + > + pfn = gfn_to_pfn_memslot(slot, gfn); > + if (is_error_noslot_pfn(pfn)) { > + pr_err_ratelimited("%s: couldn't get real page for gfn %lx!\n", > + __func__, (long)gfn); > + return; > + } > + > + tsize = __ilog2(psize) - 10; > + tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); > + gfn &= ~(tsize_pages - 1); > + pfn &= ~(tsize_pages - 1); > + > + write_host_lrate(tsize, gfn, pfn, vcpu->kvm->arch.lpid, 1, -1); > + kvm_release_pfn_clean(pfn); Don't we have to keep the page locked so it doesn't get swapped away? Alex > +} > + > +void kvmppc_lrat_invalidate(struct kvm_vcpu *vcpu) > +{ > + uint32_t mas0, mas1 = 0; > + int esel; > + unsigned long flags; > + > + local_irq_save(flags); > + /* book3e_tlb_lock(); */ > + > + /* LRAT does not have a dedicated instruction for invalidation */ > + for (esel = 0; esel < lrat_size(); esel++) { > + mas0 = MAS0_ATSEL | MAS0_ESEL(esel); > + mtspr(SPRN_MAS0, mas0); > + asm volatile("isync; tlbre" : : : "memory"); > + mas1 = mfspr(SPRN_MAS1) & ~MAS1_VALID; > + mtspr(SPRN_MAS1, mas1); > + asm volatile("isync; tlbwe" : : : "memory"); > + } > + /* Must clear mas8 for other host tlbwe's */ > + mtspr(SPRN_MAS8, 0); > + isync(); > + > + /* book3e_tlb_unlock(); */ > + local_irq_restore(flags); > +} > +#endif > + > /* > * Acquire a mas0 with victim hint, as if we just took a TLB miss. > * > diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c > index b1d9939..5622d9a 100644 > --- a/arch/powerpc/kvm/e500mc.c > +++ b/arch/powerpc/kvm/e500mc.c > @@ -99,6 +99,8 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500) > asm volatile("tlbilxlpid"); > mtspr(SPRN_MAS5, 0); > local_irq_restore(flags); > + > + kvmppc_lrat_invalidate(&vcpu_e500->vcpu); > } > > void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)