LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/9] arch/powerpc: Convert virtual address to vpn
From: Aneesh Kumar K.V @ 2012-07-05  8:14 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1341476058-5405-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This patch convert different functions to take virtual page number
instead of virtual address. Virtual page number is virtual address
shifted right by VPN_SHIFT (12) bits. This enable us to have an
address range of upto 76 bits.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mmu-hash64.h     |   70 +++++++++++++++++----
 arch/powerpc/include/asm/pte-hash64-64k.h |   18 +++---
 arch/powerpc/kvm/book3s_64_mmu_host.c     |    2 +-
 arch/powerpc/mm/hash_low_64.S             |   97 ++++++++++++++++++-----------
 arch/powerpc/mm/hash_native_64.c          |   40 +++++++++---
 arch/powerpc/mm/hash_utils_64.c           |    6 +-
 arch/powerpc/mm/hugetlbpage-hash64.c      |    2 +-
 arch/powerpc/mm/tlb_hash64.c              |    2 +-
 arch/powerpc/platforms/cell/beat_htab.c   |    2 +-
 arch/powerpc/platforms/pseries/lpar.c     |   20 +-----
 10 files changed, 168 insertions(+), 91 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 1c65a59..1c984a6 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -14,6 +14,7 @@
 
 #include <asm/asm-compat.h>
 #include <asm/page.h>
+#include <asm/bug.h>
 
 /*
  * Segment table
@@ -154,9 +155,25 @@ struct mmu_psize_def
 #define MMU_SEGSIZE_256M	0
 #define MMU_SEGSIZE_1T		1
 
+/*
+ * encode page number shift.
+ * Inorder to fit the 78 bit va in a 64 bit variable we shift the va by
+ * 12 bits. This enable us to address upto 76 bit va.
+ * For hpt hash from a va we can ignore the page size bits of va and for
+ * hpte encoding we ignore upto 23 bits of va. So ignoring lower 12 bits ensure
+ * we work in all cases including 4k page size.
+ */
+#define VPN_SHIFT	12
 
 #ifndef __ASSEMBLY__
 
+static inline int segment_shift(int ssize)
+{
+	if (ssize == MMU_SEGSIZE_256M)
+		return SID_SHIFT;
+	return SID_SHIFT_1T;
+}
+
 /*
  * The current system page and segment sizes
  */
@@ -180,6 +197,30 @@ extern unsigned long tce_alloc_start, tce_alloc_end;
 extern int mmu_ci_restrictions;
 
 /*
+ * This computes the AVPN and B fields of the first dword of a HPTE,
+ * for use when we want to match an existing PTE.  The bottom 7 bits
+ * of the returned value are zero.
+ */
+static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
+					     int ssize)
+{
+	unsigned long v;
+	/*
+	 * The AVA field omits the low-order 23 bits of the 78 bits VA.
+	 * These bits are not needed in the PTE, because the
+	 * low-order b of these bits are part of the byte offset
+	 * into the virtual page and, if b < 23, the high-order
+	 * 23-b of these bits are always used in selecting the
+	 * PTEGs to be searched
+	 */
+	BUG_ON(VPN_SHIFT > 23);
+	v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
+	v <<= HPTE_V_AVPN_SHIFT;
+	v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
+	return v;
+}
+
+/*
  * This function sets the AVPN and L fields of the HPTE  appropriately
  * for the page size
  */
@@ -187,11 +228,9 @@ static inline unsigned long hpte_encode_v(unsigned long va, int psize,
 					  int ssize)
 {
 	unsigned long v;
-	v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm);
-	v <<= HPTE_V_AVPN_SHIFT;
+	v = hpte_encode_avpn(va, psize, ssize);
 	if (psize != MMU_PAGE_4K)
 		v |= HPTE_V_LARGE;
-	v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
 	return v;
 }
 
@@ -216,14 +255,16 @@ static inline unsigned long hpte_encode_r(unsigned long pa, int psize)
 }
 
 /*
- * Build a VA given VSID, EA and segment size
+ * Build a VPN_SHIFT bit shifted va given VSID, EA and segment size.
  */
-static inline unsigned long hpt_va(unsigned long ea, unsigned long vsid,
+static inline unsigned long hpt_vpn(unsigned long ea, unsigned long vsid,
 				   int ssize)
 {
-	if (ssize == MMU_SEGSIZE_256M)
-		return (vsid << 28) | (ea & 0xfffffffUL);
-	return (vsid << 40) | (ea & 0xffffffffffUL);
+	unsigned long mask;
+	int s_shift = segment_shift(ssize);
+
+	mask = (1ul << (s_shift - VPN_SHIFT)) - 1;
+	return (vsid << (s_shift - VPN_SHIFT)) | ((ea >> VPN_SHIFT) & mask);
 }
 
 /*
@@ -233,13 +274,20 @@ static inline unsigned long hpt_va(unsigned long ea, unsigned long vsid,
 static inline unsigned long hpt_hash(unsigned long va, unsigned int shift,
 				     int ssize)
 {
+	int mask;
 	unsigned long hash, vsid;
 
+	BUG_ON(shift < VPN_SHIFT);
+
 	if (ssize == MMU_SEGSIZE_256M) {
-		hash = (va >> 28) ^ ((va & 0x0fffffffUL) >> shift);
+		mask = (1ul << (SID_SHIFT - VPN_SHIFT)) - 1;
+		hash = ((va >> (SID_SHIFT - VPN_SHIFT)) & 0x0000007fffffffff) ^
+			(((va & mask) >> (shift - VPN_SHIFT)) & 0xffff);
 	} else {
-		vsid = va >> 40;
-		hash = vsid ^ (vsid << 25) ^ ((va & 0xffffffffffUL) >> shift);
+		mask = (1ul << (SID_SHIFT_1T - VPN_SHIFT)) - 1;
+		vsid = va >> (SID_SHIFT_1T - VPN_SHIFT);
+		hash = (vsid & 0xffffff) ^ ((vsid << 25) & 0x7fffffffff) ^
+			(((va & mask) >> (shift - VPN_SHIFT)) & 0xfffffff);
 	}
 	return hash & 0x7fffffffffUL;
 }
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index 59247e8..eedf427 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -58,14 +58,16 @@
 /* Trick: we set __end to va + 64k, which happens works for
  * a 16M page as well as we want only one iteration
  */
-#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift)	    \
-        do {                                                                \
-                unsigned long __end = va + PAGE_SIZE;                       \
-                unsigned __split = (psize == MMU_PAGE_4K ||                 \
-				    psize == MMU_PAGE_64K_AP);              \
-                shift = mmu_psize_defs[psize].shift;                        \
-		for (index = 0; va < __end; index++, va += (1L << shift)) { \
-		        if (!__split || __rpte_sub_valid(rpte, index)) do { \
+#define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift)	\
+	do {								\
+		unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT));	\
+		unsigned __split = (psize == MMU_PAGE_4K ||		\
+				    psize == MMU_PAGE_64K_AP);		\
+		shift = mmu_psize_defs[psize].shift;			\
+		for (index = 0; vpn < __end; index++,			\
+			     vpn += (1L << (shift - VPN_SHIFT))) {	\
+			if (!__split || __rpte_sub_valid(rpte, index))	\
+				do {
 
 #define pte_iterate_hashed_end() } while(0); } } while(0)
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 10fc8ec..9d184f1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -117,7 +117,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	}
 
 	vsid = map->host_vsid;
-	va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
+	va = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
 
 	if (!orig_pte->may_write)
 		rflags |= HPTE_R_PP;
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index a242b5d..534cc26 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -71,7 +71,7 @@ _GLOBAL(__hash_page_4K)
 	/* Save non-volatile registers.
 	 * r31 will hold "old PTE"
 	 * r30 is "new PTE"
-	 * r29 is "va"
+	 * r29 is vpn
 	 * r28 is a hash value
 	 * r27 is hashtab mask (maybe dynamic patched instead ?)
 	 */
@@ -119,10 +119,10 @@ BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-	/* Calc va and put it in r29 */
-	rldicr	r29,r5,28,63-28
-	rldicl	r3,r3,0,36
-	or	r29,r3,r29
+	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT - VPN_SHIFT
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
+	or	r29,r28,r29
 
 	/* Calculate hash value for primary slot and store it in r28 */
 	rldicl	r5,r5,0,25		/* vsid & 0x0000007fffffffff */
@@ -130,14 +130,19 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	xor	r28,r5,r0
 	b	4f
 
-3:	/* Calc VA and hash in r29 and r28 for 1T segment */
-	sldi	r29,r5,40		/* vsid << 40 */
-	clrldi	r3,r3,24		/* ea & 0xffffffffff */
+3:	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT_1T - VPN_SHIFT
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
+	or	r29,r28,r29
+
+	/*
+	 * calculate hash value for primary slot and
+	 * store it in r28 for 1T segment
+	 */
 	rldic	r28,r5,25,25		/* (vsid << 25) & 0x7fffffffff */
 	clrldi	r5,r5,40		/* vsid & 0xffffff */
 	rldicl	r0,r3,64-12,36		/* (ea >> 12) & 0xfffffff */
 	xor	r28,r28,r5
-	or	r29,r3,r29		/* VA */
 	xor	r28,r28,r0		/* hash */
 
 	/* Convert linux PTE bits into HW equivalents */
@@ -193,7 +198,7 @@ htab_insert_pte:
 
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARM(r4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,0			/* !bolted, !secondary */
 	li	r8,MMU_PAGE_4K		/* page size */
 	ld	r9,STK_PARM(r9)(r1)	/* segment size */
@@ -216,7 +221,7 @@ _GLOBAL(htab_call_hpte_insert1)
 	
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARM(r4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,HPTE_V_SECONDARY	/* !bolted, secondary */
 	li	r8,MMU_PAGE_4K		/* page size */
 	ld	r9,STK_PARM(r9)(r1)	/* segment size */
@@ -286,7 +291,7 @@ htab_modify_pte:
 	add	r3,r0,r3	/* add slot idx */
 
 	/* Call ppc_md.hpte_updatepp */
-	mr	r5,r29			/* va */
+	mr	r5,r29			/* vpn */
 	li	r6,MMU_PAGE_4K		/* page size */
 	ld	r7,STK_PARM(r9)(r1)	/* segment size */
 	ld	r8,STK_PARM(r8)(r1)	/* get "local" param */
@@ -347,7 +352,7 @@ _GLOBAL(__hash_page_4K)
 	/* Save non-volatile registers.
 	 * r31 will hold "old PTE"
 	 * r30 is "new PTE"
-	 * r29 is "va"
+	 * r29 is vpn
 	 * r28 is a hash value
 	 * r27 is hashtab mask (maybe dynamic patched instead ?)
 	 * r26 is the hidx mask
@@ -402,10 +407,14 @@ BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-	/* Calc va and put it in r29 */
-	rldicr	r29,r5,28,63-28		/* r29 = (vsid << 28) */
-	rldicl	r3,r3,0,36		/* r3 = (ea & 0x0fffffff) */
-	or	r29,r3,r29		/* r29 = va */
+	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT - VPN_SHIFT
+	/*
+	 * clrldi r3,r3,64 - SID_SHIFT -->  ea & 0xfffffff
+	 * srdi	 r28,r3,VPN_SHIFT
+	 */
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
+	or	r29,r28,r29
 
 	/* Calculate hash value for primary slot and store it in r28 */
 	rldicl	r5,r5,0,25		/* vsid & 0x0000007fffffffff */
@@ -413,14 +422,23 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	xor	r28,r5,r0
 	b	4f
 
-3:	/* Calc VA and hash in r29 and r28 for 1T segment */
-	sldi	r29,r5,40		/* vsid << 40 */
-	clrldi	r3,r3,24		/* ea & 0xffffffffff */
+3:	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT_1T - VPN_SHIFT
+	/*
+	 * clrldi r3,r3,64 - SID_SHIFT_1T -->  ea & 0xffffffffff
+	 * srdi	r28,r3,VPN_SHIFT
+	 */
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
+	or	r29,r28,r29
+
+	/*
+	 * Calculate hash value for primary slot and
+	 * store it in r28  for 1T segment
+	 */
 	rldic	r28,r5,25,25		/* (vsid << 25) & 0x7fffffffff */
 	clrldi	r5,r5,40		/* vsid & 0xffffff */
 	rldicl	r0,r3,64-12,36		/* (ea >> 12) & 0xfffffff */
 	xor	r28,r28,r5
-	or	r29,r3,r29		/* VA */
 	xor	r28,r28,r0		/* hash */
 
 	/* Convert linux PTE bits into HW equivalents */
@@ -496,7 +514,7 @@ htab_special_pfn:
 
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARM(r4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,0			/* !bolted, !secondary */
 	li	r8,MMU_PAGE_4K		/* page size */
 	ld	r9,STK_PARM(r9)(r1)	/* segment size */
@@ -523,7 +541,7 @@ _GLOBAL(htab_call_hpte_insert1)
 
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARM(r4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,HPTE_V_SECONDARY	/* !bolted, secondary */
 	li	r8,MMU_PAGE_4K		/* page size */
 	ld	r9,STK_PARM(r9)(r1)	/* segment size */
@@ -555,7 +573,7 @@ _GLOBAL(htab_call_hpte_remove)
 	 * useless now that the segment has been switched to 4k pages.
 	 */
 htab_inval_old_hpte:
-	mr	r3,r29			/* virtual addr */
+	mr	r3,r29			/* vpn */
 	mr	r4,r31			/* PTE.pte */
 	li	r5,0			/* PTE.hidx */
 	li	r6,MMU_PAGE_64K		/* psize */
@@ -628,7 +646,7 @@ htab_modify_pte:
 	add	r3,r0,r3	/* add slot idx */
 
 	/* Call ppc_md.hpte_updatepp */
-	mr	r5,r29			/* va */
+	mr	r5,r29			/* vpn */
 	li	r6,MMU_PAGE_4K		/* page size */
 	ld	r7,STK_PARM(r9)(r1)	/* segment size */
 	ld	r8,STK_PARM(r8)(r1)	/* get "local" param */
@@ -684,7 +702,7 @@ _GLOBAL(__hash_page_64K)
 	/* Save non-volatile registers.
 	 * r31 will hold "old PTE"
 	 * r30 is "new PTE"
-	 * r29 is "va"
+	 * r29 is vpn
 	 * r28 is a hash value
 	 * r27 is hashtab mask (maybe dynamic patched instead ?)
 	 */
@@ -737,10 +755,10 @@ BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-	/* Calc va and put it in r29 */
-	rldicr	r29,r5,28,63-28
-	rldicl	r3,r3,0,36
-	or	r29,r3,r29
+	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT - VPN_SHIFT
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
+	or	r29,r28,r29
 
 	/* Calculate hash value for primary slot and store it in r28 */
 	rldicl	r5,r5,0,25		/* vsid & 0x0000007fffffffff */
@@ -748,14 +766,19 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	xor	r28,r5,r0
 	b	4f
 
-3:	/* Calc VA and hash in r29 and r28 for 1T segment */
-	sldi	r29,r5,40		/* vsid << 40 */
-	clrldi	r3,r3,24		/* ea & 0xffffffffff */
+3:	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT_1T - VPN_SHIFT
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
+	or	r29,r28,r29
+
+	/*
+	 * calculate hash value for primary slot and
+	 * store it in r28 for 1T segment
+	 */
 	rldic	r28,r5,25,25		/* (vsid << 25) & 0x7fffffffff */
 	clrldi	r5,r5,40		/* vsid & 0xffffff */
 	rldicl	r0,r3,64-16,40		/* (ea >> 16) & 0xffffff */
 	xor	r28,r28,r5
-	or	r29,r3,r29		/* VA */
 	xor	r28,r28,r0		/* hash */
 
 	/* Convert linux PTE bits into HW equivalents */
@@ -814,7 +837,7 @@ ht64_insert_pte:
 
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARM(r4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,0			/* !bolted, !secondary */
 	li	r8,MMU_PAGE_64K
 	ld	r9,STK_PARM(r9)(r1)	/* segment size */
@@ -837,7 +860,7 @@ _GLOBAL(ht64_call_hpte_insert1)
 
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARM(r4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,HPTE_V_SECONDARY	/* !bolted, secondary */
 	li	r8,MMU_PAGE_64K
 	ld	r9,STK_PARM(r9)(r1)	/* segment size */
@@ -907,7 +930,7 @@ ht64_modify_pte:
 	add	r3,r0,r3	/* add slot idx */
 
 	/* Call ppc_md.hpte_updatepp */
-	mr	r5,r29			/* va */
+	mr	r5,r29			/* vpn */
 	li	r6,MMU_PAGE_64K
 	ld	r7,STK_PARM(r9)(r1)	/* segment size */
 	ld	r8,STK_PARM(r8)(r1)	/* get "local" param */
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 660b8bb..aa6f4f0 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -39,22 +39,35 @@
 
 DEFINE_RAW_SPINLOCK(native_tlbie_lock);
 
-static inline void __tlbie(unsigned long va, int psize, int ssize)
+static inline void __tlbie(unsigned long vpn, int psize, int ssize)
 {
+	unsigned long va;
 	unsigned int penc;
 
+	/*
+	 * We need 14 to 65 bits of va for a tlibe of 4K page
+	 * With vpn we ignore the lower VPN_SHIFT bits already.
+	 * And top two bits are already ignored because we can
+	 * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT
+	 * of 12.
+	 */
+	BUG_ON((77 - 65) > VPN_SHIFT);
+	va = vpn << VPN_SHIFT;
+
+#if 1 /* FIXME!! What is this about ? */
 	/* clear top 16 bits, non SLS segment */
 	va &= ~(0xffffULL << 48);
+#endif
 
 	switch (psize) {
 	case MMU_PAGE_4K:
-		va &= ~0xffful;
 		va |= ssize << 8;
 		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
 			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
 			     : "memory");
 		break;
 	default:
+		/* We need 14 to 14 + i bits of va */
 		penc = mmu_psize_defs[psize].penc;
 		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
 		va |= penc << 12;
@@ -67,21 +80,27 @@ static inline void __tlbie(unsigned long va, int psize, int ssize)
 	}
 }
 
-static inline void __tlbiel(unsigned long va, int psize, int ssize)
+static inline void __tlbiel(unsigned long vpn, int psize, int ssize)
 {
+	unsigned long va;
 	unsigned int penc;
 
+	BUG_ON((77 - 65) > VPN_SHIFT);
+	va = vpn << VPN_SHIFT;
+
+#if 1 /*FIXME!! What is this about ? */
 	/* clear top 16 bits, non SLS segment */
 	va &= ~(0xffffULL << 48);
+#endif
 
 	switch (psize) {
 	case MMU_PAGE_4K:
-		va &= ~0xffful;
 		va |= ssize << 8;
 		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
 			     : : "r"(va) : "memory");
 		break;
 	default:
+		/* We need 14 to 14 + i bits of va */
 		penc = mmu_psize_defs[psize].penc;
 		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
 		va |= penc << 12;
@@ -234,7 +253,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 
 	want_v = hpte_encode_v(va, psize, ssize);
 
-	DBG_LOW("    update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
+	DBG_LOW("    update(va=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
 		va, want_v & HPTE_V_AVPN, slot, newpp);
 
 	native_lock_hpte(hptep);
@@ -300,7 +319,7 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 	struct hash_pte *hptep;
 
 	vsid = get_kernel_vsid(ea, ssize);
-	va = hpt_va(ea, vsid, ssize);
+	va = hpt_vpn(ea, vsid, ssize);
 
 	slot = native_hpte_find(va, psize, ssize);
 	if (slot == -1)
@@ -325,7 +344,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 
 	local_irq_save(flags);
 
-	DBG_LOW("    invalidate(va=%016lx, hash: %x)\n", va, slot);
+	DBG_LOW("    invalidate(va=%016lx, hash: %lx)\n", va, slot);
 
 	want_v = hpte_encode_v(va, psize, ssize);
 	native_lock_hpte(hptep);
@@ -399,7 +418,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			vpi = (vsid ^ pteg) & htab_hash_mask;
 			seg_off |= vpi << shift;
 		}
-		*va = vsid << SID_SHIFT | seg_off;
+		*va = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
 	case MMU_SEGSIZE_1T:
 		/* We only have 40 - 23 bits of seg_off in avpn */
 		seg_off = (avpn & 0x1ffff) << 23;
@@ -408,7 +427,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
 			seg_off |= vpi << shift;
 		}
-		*va = vsid << SID_SHIFT_1T | seg_off;
+		*va = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
 	default:
 		*va = size = 0;
 	}
@@ -425,9 +444,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
  */
 static void native_hpte_clear(void)
 {
+	unsigned long va = 0;
 	unsigned long slot, slots, flags;
 	struct hash_pte *hptep = htab_address;
-	unsigned long hpte_v, va;
+	unsigned long hpte_v;
 	unsigned long pteg_count;
 	int psize, ssize;
 
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 377e5cb..975c7d1 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -192,7 +192,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 	     vaddr += step, paddr += step) {
 		unsigned long hash, hpteg;
 		unsigned long vsid = get_kernel_vsid(vaddr, ssize);
-		unsigned long va = hpt_va(vaddr, vsid, ssize);
+		unsigned long va  = hpt_vpn(vaddr, vsid, ssize);
 		unsigned long tprot = prot;
 
 		/* Make kernel text executable */
@@ -1208,7 +1208,7 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
 {
 	unsigned long hash, hpteg;
 	unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
-	unsigned long va = hpt_va(vaddr, vsid, mmu_kernel_ssize);
+	unsigned long va = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
 	unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL);
 	int ret;
 
@@ -1229,7 +1229,7 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
 {
 	unsigned long hash, hidx, slot;
 	unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
-	unsigned long va = hpt_va(vaddr, vsid, mmu_kernel_ssize);
+	unsigned long va = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
 
 	hash = hpt_hash(va, PAGE_SHIFT, mmu_kernel_ssize);
 	spin_lock(&linear_map_hash_lock);
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index cc5c273..1331403 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -25,7 +25,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 	BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
 
 	/* Search the Linux page table for a match with va */
-	va = hpt_va(ea, vsid, ssize);
+	va = hpt_vpn(ea, vsid, ssize);
 
 	/* At this point, we have a pte (old_pte) which can be used to build
 	 * or update an HPTE. There are 2 cases:
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 31f1820..321c585 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -86,7 +86,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 		vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
 		ssize = mmu_kernel_ssize;
 	}
-	vaddr = hpt_va(addr, vsid, ssize);
+	vaddr = hpt_vpn(addr, vsid, ssize);
 	rpte = __real_pte(__pte(pte), ptep);
 
 	/*
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index b83077e..c8c7bf6 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -259,7 +259,7 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
 	u64 dummy0, dummy1;
 
 	vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
-	va = hpt_va(ea, vsid, MMU_SEGSIZE_256M);
+	va = hpt_vpn(ea, vsid, MMU_SEGSIZE_256M);
 
 	raw_spin_lock(&beat_htab_lock);
 	slot = beat_lpar_hpte_find(va, psize);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 5f3ef87..2127529 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -227,22 +227,6 @@ static void pSeries_lpar_hptab_clear(void)
 }
 
 /*
- * This computes the AVPN and B fields of the first dword of a HPTE,
- * for use when we want to match an existing PTE.  The bottom 7 bits
- * of the returned value are zero.
- */
-static inline unsigned long hpte_encode_avpn(unsigned long va, int psize,
-					     int ssize)
-{
-	unsigned long v;
-
-	v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm);
-	v <<= HPTE_V_AVPN_SHIFT;
-	v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
-	return v;
-}
-
-/*
  * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
  * the low 3 bits of flags happen to line up.  So no transform is needed.
  * We can probably optimize here and assume the high bits of newpp are
@@ -326,7 +310,7 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
 	unsigned long lpar_rc, slot, vsid, va, flags;
 
 	vsid = get_kernel_vsid(ea, ssize);
-	va = hpt_va(ea, vsid, ssize);
+	va = hpt_vpn(ea, vsid, ssize);
 
 	slot = pSeries_lpar_hpte_find(va, psize, ssize);
 	BUG_ON(slot == -1);
@@ -361,7 +345,7 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
 	unsigned long slot, vsid, va;
 
 	vsid = get_kernel_vsid(ea, ssize);
-	va = hpt_va(ea, vsid, ssize);
+	va = hpt_vpn(ea, vsid, ssize);
 
 	slot = pSeries_lpar_hpte_find(va, psize, ssize);
 	BUG_ON(slot == -1);
-- 
1.7.10

^ permalink raw reply related

* [PATCH 2/9] arch/powerpc: Simplify hpte_decode
From: Aneesh Kumar K.V @ 2012-07-05  8:14 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1341476058-5405-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This patch simplify hpte_decode for easy switching of virtual address to
virtual page number in the later patch

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/hash_native_64.c |   49 ++++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 90039bc..660b8bb 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -351,9 +351,10 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			int *psize, int *ssize, unsigned long *va)
 {
+	unsigned long avpn, pteg, vpi;
 	unsigned long hpte_r = hpte->r;
 	unsigned long hpte_v = hpte->v;
-	unsigned long avpn;
+	unsigned long vsid, seg_off;
 	int i, size, shift, penc;
 
 	if (!(hpte_v & HPTE_V_LARGE))
@@ -380,32 +381,38 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 	}
 
 	/* This works for all page sizes, and for 256M and 1T segments */
+	*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
 	shift = mmu_psize_defs[size].shift;
-	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm) << 23;
-
-	if (shift < 23) {
-		unsigned long vpi, vsid, pteg;
 
-		pteg = slot / HPTES_PER_GROUP;
-		if (hpte_v & HPTE_V_SECONDARY)
-			pteg = ~pteg;
-		switch (hpte_v >> HPTE_V_SSIZE_SHIFT) {
-		case MMU_SEGSIZE_256M:
-			vpi = ((avpn >> 28) ^ pteg) & htab_hash_mask;
-			break;
-		case MMU_SEGSIZE_1T:
-			vsid = avpn >> 40;
+	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
+	pteg = slot / HPTES_PER_GROUP;
+	if (hpte_v & HPTE_V_SECONDARY)
+		pteg = ~pteg;
+
+	switch (*ssize) {
+	case MMU_SEGSIZE_256M:
+		/* We only have 28 - 23 bits of seg_off in avpn */
+		seg_off = (avpn & 0x1f) << 23;
+		vsid    =  avpn >> 5;
+		/* We can find more bits from the pteg value */
+		if (shift < 23) {
+			vpi = (vsid ^ pteg) & htab_hash_mask;
+			seg_off |= vpi << shift;
+		}
+		*va = vsid << SID_SHIFT | seg_off;
+	case MMU_SEGSIZE_1T:
+		/* We only have 40 - 23 bits of seg_off in avpn */
+		seg_off = (avpn & 0x1ffff) << 23;
+		vsid    = avpn >> 17;
+		if (shift < 23) {
 			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
-			break;
-		default:
-			avpn = vpi = size = 0;
+			seg_off |= vpi << shift;
 		}
-		avpn |= (vpi << mmu_psize_defs[size].shift);
+		*va = vsid << SID_SHIFT_1T | seg_off;
+	default:
+		*va = size = 0;
 	}
-
-	*va = avpn;
 	*psize = size;
-	*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
 }
 
 /*
-- 
1.7.10

^ permalink raw reply related

* [PATCH 1/9] arch/powerpc: Use hpt_va to compute virtual address
From: Aneesh Kumar K.V @ 2012-07-05  8:14 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1341476058-5405-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Don't open code the same

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/cell/beat_htab.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index 943c9d3..b83077e 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -259,7 +259,7 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
 	u64 dummy0, dummy1;
 
 	vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
-	va = (vsid << 28) | (ea & 0x0fffffff);
+	va = hpt_va(ea, vsid, MMU_SEGSIZE_256M);
 
 	raw_spin_lock(&beat_htab_lock);
 	slot = beat_lpar_hpte_find(va, psize);
-- 
1.7.10

^ permalink raw reply related

* Re: [PATCH v3] printk: Have printk() never buffer its data
From: Michael Neuling @ 2012-07-05  7:03 UTC (permalink / raw)
  To: Kay Sievers
  Cc: Greg Kroah-Hartman, LKML, Steven Rostedt, Paul E. McKenney,
	linuxppc-dev, Joe Perches, Andrew Morton, Wu Fengguang,
	Linus Torvalds, Ingo Molnar
In-Reply-To: <1340726856.977.6.camel@mop>

> On Mon, 2012-06-25 at 18:40 -0700, Linus Torvalds wrote:
> > On Mon, Jun 25, 2012 at 5:56 PM, Kay Sievers <kay@vrfy.org> wrote:
> > >
> > > Buffering has nice effects though:
> > > It makes continuation lines appear as one record in the buffer, not as
> > > n individual prints with n headers.
> > 
> > As I already mentioned, buffering for *logging* is different from
> > buffering for *printing*.
> > 
> > I think it might be a great idea to buffer for logging in order to
> > generate one individual buffer record there.
> > 
> > But it needs to be printed as it is generated.
> 
> That's a good idea.
> 
> Something like this could work - only minimally tested at this moment.

This breaks some powerpc configs and is in Linus' tree now as
084681d14e.

When we have printks without a newline (like show_regs()), it
sometimes:
1) drops the console output for that line (dmesg is fine).  Patch to fix
   this below.
2) adds a newline unnecessary to both console and dmesg.  I have no fix
   for this currently.
Reverting this patch fixes both problems.

Post rc4 is way to late to be adding largely untested features like
this.  

I suggest we revert this until the next merge window.

Mikey

diff --git a/kernel/printk.c b/kernel/printk.c
index dba1821..91659da 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1335,7 +1335,7 @@ static void cont_flush(void)
        if (cont.len == 0)
                return;
 
-       log_store(cont.facility, cont.level, LOG_NOCONS, cont.ts_nsec,
+       log_store(cont.facility, cont.level, LOG_DEFAULT, cont.ts_nsec,
                  NULL, 0, cont.buf, cont.len);
 
        cont.flushed = true;



> 
> 
> From: Kay Sievers <kay@vrfy.org>
> Subject: printk: flush continuation lines immediately to console
> 
> Continuation lines are buffered internally to merge the chunked
> printk()s into a single record, and to isolate potentially racy
> continuation users from usual terminated line users.
> 
> This though, has the effect that partial lines are not printed to
> the console in the moment they are emitted. In case the kernel
> crashes in the meantime, the potentially interesting printed
> information will never reach the consoles.
> 
> Here we share the continuation buffer with the console copy logic,
> and partial lines are immediately flushed to the consoles, but still
> buffered internally to improve the integrity of the messages.
> ---
> 
>  kernel/printk.c |  129 +++++++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 101 insertions(+), 28 deletions(-)
> 
> diff --git a/kernel/printk.c b/kernel/printk.c
> index a2276b9..92c34bd1 100644
> --- a/kernel/printk.c
> +++ b/kernel/printk.c
> @@ -193,12 +193,19 @@ static int console_may_schedule;
>   * separated by ',', and find the message after the ';' character.
>   */
>  
> +enum log_flags {
> +	LOG_DEFAULT = 0,
> +	LOG_NOCONS = 1,		/* already flushed, do not print to console */
> +};
> +
>  struct log {
>  	u64 ts_nsec;		/* timestamp in nanoseconds */
>  	u16 len;		/* length of entire record */
>  	u16 text_len;		/* length of text buffer */
>  	u16 dict_len;		/* length of dictionary buffer */
> -	u16 level;		/* syslog level + facility */
> +	u8 facility;		/* syslog facility */
> +	u8 flags:5;		/* internal record flags */
> +	u8 level:3;		/* syslog level */
>  };
>  
>  /*
> @@ -286,6 +293,7 @@ static u32 log_next(u32 idx)
>  
>  /* insert record into the buffer, discard old ones, update heads */
>  static void log_store(int facility, int level,
> +		      enum log_flags flags, u64 ts_nsec,
>  		      const char *dict, u16 dict_len,
>  		      const char *text, u16 text_len)
>  {
> @@ -329,8 +337,13 @@ static void log_store(int facility, int level,
>  	msg->text_len = text_len;
>  	memcpy(log_dict(msg), dict, dict_len);
>  	msg->dict_len = dict_len;
> -	msg->level = (facility << 3) | (level & 7);
> -	msg->ts_nsec = local_clock();
> +	msg->facility = facility;
> +	msg->level = level & 7;
> +	msg->flags = flags & 0x1f;
> +	if (ts_nsec > 0)
> +		msg->ts_nsec = ts_nsec;
> +	else
> +		msg->ts_nsec = local_clock();
>  	memset(log_dict(msg) + dict_len, 0, pad_len);
>  	msg->len = sizeof(struct log) + text_len + dict_len + pad_len;
>  
> @@ -446,7 +459,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
>  	ts_usec = msg->ts_nsec;
>  	do_div(ts_usec, 1000);
>  	len = sprintf(user->buf, "%u,%llu,%llu;",
> -		      msg->level, user->seq, ts_usec);
> +		      (msg->facility << 3) | msg->level, user->seq, ts_usec);
>  
>  	/* escape non-printable characters */
>  	for (i = 0; i < msg->text_len; i++) {
> @@ -787,6 +800,21 @@ static bool printk_time;
>  #endif
>  module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
>  
> +static size_t print_time(u64 ts, char *buf)
> +{
> +	unsigned long rem_nsec;
> +
> +	if (!printk_time)
> +		return 0;
> +
> +	if (!buf)
> +		return 15;
> +
> +	rem_nsec = do_div(ts, 1000000000);
> +	return sprintf(buf, "[%5lu.%06lu] ",
> +		       (unsigned long)ts, rem_nsec / 1000);
> +}
> +
>  static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
>  {
>  	size_t len = 0;
> @@ -803,18 +831,7 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
>  		}
>  	}
>  
> -	if (printk_time) {
> -		if (buf) {
> -			unsigned long long ts = msg->ts_nsec;
> -			unsigned long rem_nsec = do_div(ts, 1000000000);
> -
> -			len += sprintf(buf + len, "[%5lu.%06lu] ",
> -					 (unsigned long) ts, rem_nsec / 1000);
> -		} else {
> -			len += 15;
> -		}
> -	}
> -
> +	len += print_time(msg->ts_nsec, buf ? buf + len : NULL);
>  	return len;
>  }
>  
> @@ -1272,15 +1289,25 @@ static inline void printk_delay(void)
>  	}
>  }
>  
> +/*
> + * Continuation lines are buffered, and not committed to the record buffer
> + * until the line is complete, or a race forces a flush. The line fragments
> + * though, are printed immediately to the consoles to ensure everything has
> + * reached the console in case of a kernel crash.
> + */
> +static char cont_buf[LOG_LINE_MAX];
> +static size_t cont_len;
> +static size_t cont_cons;
> +static size_t cont_cons_len;
> +static int cont_level;
> +static u64 cont_ts_nsec;
> +static struct task_struct *cont_task;
> +
>  asmlinkage int vprintk_emit(int facility, int level,
>  			    const char *dict, size_t dictlen,
>  			    const char *fmt, va_list args)
>  {
>  	static int recursion_bug;
> -	static char cont_buf[LOG_LINE_MAX];
> -	static size_t cont_len;
> -	static int cont_level;
> -	static struct task_struct *cont_task;
>  	static char textbuf[LOG_LINE_MAX];
>  	char *text = textbuf;
>  	size_t text_len;
> @@ -1326,7 +1353,8 @@ asmlinkage int vprintk_emit(int facility, int level,
>  		recursion_bug = 0;
>  		printed_len += strlen(recursion_msg);
>  		/* emit KERN_CRIT message */
> -		log_store(0, 2, NULL, 0, recursion_msg, printed_len);
> +		log_store(0, 2, LOG_DEFAULT, 0,
> +			  NULL, 0, recursion_msg, printed_len);
>  	}
>  
>  	/*
> @@ -1369,13 +1397,17 @@ asmlinkage int vprintk_emit(int facility, int level,
>  			 * Flush earlier buffer, which is either from a
>  			 * different thread, or when we got a new prefix.
>  			 */
> -			log_store(facility, cont_level, NULL, 0, cont_buf, cont_len);
> +			log_store(facility, cont_level,
> +				  LOG_NOCONS, cont_ts_nsec,
> +				  NULL, 0, cont_buf, cont_len);
>  			cont_len = 0;
>  		}
>  
>  		if (!cont_len) {
> +			cont_cons = 0;
>  			cont_level = level;
>  			cont_task = current;
> +			cont_ts_nsec = local_clock();
>  		}
>  
>  		/* buffer or append to earlier buffer from the same thread */
> @@ -1383,6 +1415,8 @@ asmlinkage int vprintk_emit(int facility, int level,
>  			text_len = sizeof(cont_buf) - cont_len;
>  		memcpy(cont_buf + cont_len, text, text_len);
>  		cont_len += text_len;
> +		cont_cons_len = cont_len;
> +		printed_len = text_len;
>  	} else {
>  		if (cont_len && cont_task == current) {
>  			if (prefix) {
> @@ -1392,6 +1426,7 @@ asmlinkage int vprintk_emit(int facility, int level,
>  				 * with an interrupt.
>  				 */
>  				log_store(facility, cont_level,
> +					  LOG_NOCONS, cont_ts_nsec,
>  					  NULL, 0, cont_buf, cont_len);
>  				cont_len = 0;
>  			}
> @@ -1401,14 +1436,15 @@ asmlinkage int vprintk_emit(int facility, int level,
>  				text_len = sizeof(cont_buf) - cont_len;
>  			memcpy(cont_buf + cont_len, text, text_len);
>  			cont_len += text_len;
> +			cont_cons_len = cont_len;
>  			log_store(facility, cont_level,
> +				  LOG_NOCONS, cont_ts_nsec,
>  				  NULL, 0, cont_buf, cont_len);
>  			cont_len = 0;
> -			cont_task = NULL;
> -			printed_len = cont_len;
> +			printed_len = text_len;
>  		} else {
>  			/* ordinary single and terminated line */
> -			log_store(facility, level,
> +			log_store(facility, level, LOG_DEFAULT, 0,
>  				  dict, dictlen, text, text_len);
>  			printed_len = text_len;
>  		}
> @@ -1795,6 +1831,7 @@ static u32 console_idx;
>   */
>  void console_unlock(void)
>  {
> +	static char text[LOG_LINE_MAX];
>  	static u64 seen_seq;
>  	unsigned long flags;
>  	bool wake_klogd = false;
> @@ -1807,10 +1844,37 @@ void console_unlock(void)
>  
>  	console_may_schedule = 0;
>  
> +	/* flush buffered message fragment immediately to console */
> +	raw_spin_lock_irqsave(&logbuf_lock, flags);
> +	if (cont_cons_len && (cont_cons < cont_cons_len || !cont_len)) {
> +		size_t textlen = 0;
> +		size_t len;
> +
> +		len = cont_cons_len - cont_cons;
> +		if (!cont_cons)
> +			textlen = print_time(cont_ts_nsec, text);
> +		memcpy(text + textlen, cont_buf + cont_cons, len);
> +		textlen += len;
> +		cont_cons = cont_cons_len;
> +
> +		if (!cont_len) {
> +			/* last chunk of line; terminate */
> +			text[textlen++] = '\n';
> +			cont_cons_len = 0;
> +		}
> +		raw_spin_unlock(&logbuf_lock);
> +
> +		stop_critical_timings();
> +		call_console_drivers(cont_level, text, textlen);
> +		start_critical_timings();
> +
> +		local_irq_restore(flags);
> +	} else
> +		raw_spin_unlock_irqrestore(&logbuf_lock, flags);
> +
>  again:
>  	for (;;) {
>  		struct log *msg;
> -		static char text[LOG_LINE_MAX];
>  		size_t len;
>  		int level;
>  
> @@ -1825,13 +1889,22 @@ again:
>  			console_seq = log_first_seq;
>  			console_idx = log_first_idx;
>  		}
> -
> +skip:
>  		if (console_seq == log_next_seq)
>  			break;
>  
>  		msg = log_from_idx(console_idx);
> -		level = msg->level & 7;
> +		if (msg->flags & LOG_NOCONS) {
> +			/*
> +			 * Skip record we have buffered and already printed
> +			 * directly to the console when we received it.
> +			 */
> +			console_idx = log_next(console_idx);
> +			console_seq++;
> +			goto skip;
> +		}
>  
> +		level = msg->level;
>  		len = msg_print_text(msg, false, text, sizeof(text));
>  
>  		console_idx = log_next(console_idx);
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

^ permalink raw reply related

* [PATCH] powerpc: Add VDSO version of getcpu
From: Anton Blanchard @ 2012-07-05  6:37 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev


We have a request for a fast method of getting CPU and NUMA node IDs
from userspace. This patch implements a getcpu VDSO function,
similar to x86.

Ben suggested we use SPRG3 which is userspace readable. SPRG3 can be
modified by a KVM guest, so we save the SPRG3 value in the paca and
restore it when transitioning from the guest to the host.

I have a glibc patch that implements sched_getcpu on top of this.
Testing on a POWER7:

baseline: 538 cycles 
vdso:      30 cycles

Signed-off-by: Anton Blanchard <anton@samba.org>
---

v2:
- Fix kvm
- add vdso32 version on 64bit kernel

Index: linux-build/arch/powerpc/include/asm/reg.h
===================================================================
--- linux-build.orig/arch/powerpc/include/asm/reg.h	2012-07-05 16:33:46.191266813 +1000
+++ linux-build/arch/powerpc/include/asm/reg.h	2012-07-05 16:34:27.036005107 +1000
@@ -491,6 +491,7 @@
 #define SPRN_SPRG1	0x111	/* Special Purpose Register General 1 */
 #define SPRN_SPRG2	0x112	/* Special Purpose Register General 2 */
 #define SPRN_SPRG3	0x113	/* Special Purpose Register General 3 */
+#define SPRN_USPRG3	0x103	/* SPRG3 userspace read */
 #define SPRN_SPRG4	0x114	/* Special Purpose Register General 4 */
 #define SPRN_SPRG5	0x115	/* Special Purpose Register General 5 */
 #define SPRN_SPRG6	0x116	/* Special Purpose Register General 6 */
@@ -753,14 +754,14 @@
  * 64-bit server:
  *	- SPRG0 unused (reserved for HV on Power4)
  *	- SPRG2 scratch for exception vectors
- *	- SPRG3 unused (user visible)
+ *	- SPRG3 CPU and NUMA node for VDSO getcpu (user visible)
  *      - HSPRG0 stores PACA in HV mode
  *      - HSPRG1 scratch for "HV" exceptions
  *
  * 64-bit embedded
  *	- SPRG0 generic exception scratch
  *	- SPRG2 TLB exception stack
- *	- SPRG3 unused (user visible)
+ *	- SPRG3 CPU and NUMA node for VDSO getcpu (user visible)
  *	- SPRG4 unused (user visible)
  *	- SPRG6 TLB miss scratch (user visible, sorry !)
  *	- SPRG7 critical exception scratch
Index: linux-build/arch/powerpc/kernel/vdso.c
===================================================================
--- linux-build.orig/arch/powerpc/kernel/vdso.c	2012-07-05 16:33:46.311268984 +1000
+++ linux-build/arch/powerpc/kernel/vdso.c	2012-07-05 16:34:27.036005107 +1000
@@ -706,6 +706,34 @@ static void __init vdso_setup_syscall_ma
 	}
 }
 
+#ifdef CONFIG_PPC64
+int __cpuinit vdso_getcpu_init(void)
+{
+	unsigned long cpu, node, val;
+
+	/*
+	 * SPRG3 contains the CPU in the bottom 16 bits and the NUMA node in
+	 * the next 16 bits. The VDSO uses this to implement getcpu().
+	 */
+	cpu = get_cpu();
+	WARN_ON_ONCE(cpu > 0xffff);
+
+	node = cpu_to_node(cpu);
+	WARN_ON_ONCE(node > 0xffff);
+
+	val = (cpu & 0xfff) | ((node & 0xffff) << 16);
+	mtspr(SPRN_SPRG3, val);
+#ifdef CONFIG_KVM_BOOK3S_HANDLER
+	get_paca()->kvm_hstate.sprg3 = val;
+#endif
+
+	put_cpu();
+
+	return 0;
+}
+/* We need to call this before SMP init */
+early_initcall(vdso_getcpu_init);
+#endif
 
 static int __init vdso_init(void)
 {
Index: linux-build/arch/powerpc/kernel/vdso64/Makefile
===================================================================
--- linux-build.orig/arch/powerpc/kernel/vdso64/Makefile	2012-07-05 16:33:46.279268404 +1000
+++ linux-build/arch/powerpc/kernel/vdso64/Makefile	2012-07-05 16:34:27.036005107 +1000
@@ -1,6 +1,6 @@
 # List of files in the vdso, has to be asm only for now
 
-obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o
+obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
 
 # Build rules
 
Index: linux-build/arch/powerpc/kernel/vdso64/vdso64.lds.S
===================================================================
--- linux-build.orig/arch/powerpc/kernel/vdso64/vdso64.lds.S	2012-07-05 16:33:46.295268693 +1000
+++ linux-build/arch/powerpc/kernel/vdso64/vdso64.lds.S	2012-07-05 16:34:27.036005107 +1000
@@ -146,6 +146,7 @@ VERSION
 		__kernel_sync_dicache;
 		__kernel_sync_dicache_p5;
 		__kernel_sigtramp_rt64;
+		__kernel_getcpu;
 
 	local: *;
 	};
Index: linux-build/arch/powerpc/kernel/vdso64/getcpu.S
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-build/arch/powerpc/kernel/vdso64/getcpu.S	2012-07-05 16:34:27.036005107 +1000
@@ -0,0 +1,45 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+
+	.text
+/*
+ * Exact prototype of getcpu
+ *
+ * int __kernel_getcpu(unsigned *cpu, unsigned *node);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_getcpu)
+  .cfi_startproc
+	mfspr	r5,SPRN_USPRG3
+	cmpdi	cr0,r3,0
+	cmpdi	cr1,r4,0
+	clrlwi  r6,r5,16
+	rlwinm  r7,r5,16,31-15,31-0
+	beq	cr0,1f
+	stw	r6,0(r3)
+1:	beq	cr1,2f
+	stw	r7,0(r4)
+2:	crclr	cr0*4+so
+	li	r3,0			/* always success */
+	blr
+  .cfi_endproc
+V_FUNCTION_END(__kernel_getcpu)
Index: linux-build/arch/powerpc/kernel/smp.c
===================================================================
--- linux-build.orig/arch/powerpc/kernel/smp.c	2012-07-05 16:33:46.259268043 +1000
+++ linux-build/arch/powerpc/kernel/smp.c	2012-07-05 16:34:27.036005107 +1000
@@ -48,6 +48,7 @@
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
 #endif
+#include <asm/vdso.h>
 #include <asm/debug.h>
 
 #ifdef DEBUG
@@ -570,6 +571,8 @@ void __devinit start_secondary(void *unu
 #ifdef CONFIG_PPC64
 	if (system_state == SYSTEM_RUNNING)
 		vdso_data->processorCount++;
+
+	vdso_getcpu_init();
 #endif
 	ipi_call_lock();
 	notify_cpu_starting(cpu);
Index: linux-build/arch/powerpc/include/asm/vdso.h
===================================================================
--- linux-build.orig/arch/powerpc/include/asm/vdso.h	2012-07-05 16:33:46.179266598 +1000
+++ linux-build/arch/powerpc/include/asm/vdso.h	2012-07-05 16:34:27.036005107 +1000
@@ -22,6 +22,8 @@ extern unsigned long vdso64_rt_sigtramp;
 extern unsigned long vdso32_sigtramp;
 extern unsigned long vdso32_rt_sigtramp;
 
+int __cpuinit vdso_getcpu_init(void);
+
 #else /* __ASSEMBLY__ */
 
 #ifdef __VDSO64__
Index: linux-build/arch/powerpc/kernel/vdso32/Makefile
===================================================================
--- linux-build.orig/arch/powerpc/kernel/vdso32/Makefile	2012-07-05 16:33:46.231267536 +1000
+++ linux-build/arch/powerpc/kernel/vdso32/Makefile	2012-07-05 16:34:27.040005179 +1000
@@ -1,7 +1,9 @@
 
 # List of files in the vdso, has to be asm only for now
 
-obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o
+obj-vdso32-$(CONFIG_PPC64) = getcpu.o
+obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \
+		$(obj-vdso32-y)
 
 # Build rules
 
Index: linux-build/arch/powerpc/kernel/vdso32/vdso32.lds.S
===================================================================
--- linux-build.orig/arch/powerpc/kernel/vdso32/vdso32.lds.S	2012-07-05 16:33:46.247267827 +1000
+++ linux-build/arch/powerpc/kernel/vdso32/vdso32.lds.S	2012-07-05 16:34:27.040005179 +1000
@@ -147,6 +147,9 @@ VERSION
 		__kernel_sync_dicache_p5;
 		__kernel_sigtramp32;
 		__kernel_sigtramp_rt32;
+#ifdef CONFIG_PPC64
+		__kernel_getcpu;
+#endif
 
 	local: *;
 	};
Index: linux-build/arch/powerpc/kernel/vdso32/getcpu.S
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-build/arch/powerpc/kernel/vdso32/getcpu.S	2012-07-05 16:34:27.040005179 +1000
@@ -0,0 +1,45 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+
+	.text
+/*
+ * Exact prototype of getcpu
+ *
+ * int __kernel_getcpu(unsigned *cpu, unsigned *node);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_getcpu)
+  .cfi_startproc
+	mfspr	r5,SPRN_USPRG3
+	cmpdi	cr0,r3,0
+	cmpdi	cr1,r4,0
+	clrlwi  r6,r5,16
+	rlwinm  r7,r5,16,31-15,31-0
+	beq	cr0,1f
+	stw	r6,0(r3)
+1:	beq	cr1,2f
+	stw	r7,0(r4)
+2:	crclr	cr0*4+so
+	li	r3,0			/* always success */
+	blr
+  .cfi_endproc
+V_FUNCTION_END(__kernel_getcpu)
Index: linux-build/arch/powerpc/include/asm/kvm_book3s_asm.h
===================================================================
--- linux-build.orig/arch/powerpc/include/asm/kvm_book3s_asm.h	2012-07-05 16:33:46.163266308 +1000
+++ linux-build/arch/powerpc/include/asm/kvm_book3s_asm.h	2012-07-05 16:34:27.040005179 +1000
@@ -74,6 +74,7 @@ struct kvmppc_host_state {
 	ulong vmhandler;
 	ulong scratch0;
 	ulong scratch1;
+	ulong sprg3;
 	u8 in_guest;
 	u8 restore_hid5;
 	u8 napping;
Index: linux-build/arch/powerpc/kernel/asm-offsets.c
===================================================================
--- linux-build.orig/arch/powerpc/kernel/asm-offsets.c	2012-07-05 16:33:46.319269127 +1000
+++ linux-build/arch/powerpc/kernel/asm-offsets.c	2012-07-05 16:34:27.040005179 +1000
@@ -533,6 +533,7 @@ int main(void)
 	HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler);
 	HSTATE_FIELD(HSTATE_SCRATCH0, scratch0);
 	HSTATE_FIELD(HSTATE_SCRATCH1, scratch1);
+	HSTATE_FIELD(HSTATE_SPRG3, sprg3);
 	HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);
 	HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
 	HSTATE_FIELD(HSTATE_NAPPING, napping);
Index: linux-build/arch/powerpc/kvm/book3s_hv_rmhandlers.S
===================================================================
--- linux-build.orig/arch/powerpc/kvm/book3s_hv_rmhandlers.S	2012-07-05 16:33:46.327269273 +1000
+++ linux-build/arch/powerpc/kvm/book3s_hv_rmhandlers.S	2012-07-05 16:34:27.040005179 +1000
@@ -1067,6 +1067,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	mtspr	SPRN_DABR,r5
 	mtspr	SPRN_DABRX,r6
 
+	/* Restore SPRG3 */
+	ld	r3,HSTATE_SPRG3(r13)
+	mtspr	SPRN_SPRG3,r3
+
 	/*
 	 * Reload DEC.  HDEC interrupts were disabled when
 	 * we reloaded the host's LPCR value.

^ permalink raw reply

* Re: linux-next: build failure after merge of the kvm-ppc tree
From: Alexander Graf @ 2012-07-05  6:02 UTC (permalink / raw)
  To: Stephen Rothwell; +Cc: linux-next, ppc-dev, linux-kernel, Stuart Yoder
In-Reply-To: <20120705154929.219789032ef59f9243d0b8a5@canb.auug.org.au>


On 05.07.2012, at 07:49, Stephen Rothwell wrote:

> Hi Alexander,
>=20
> After merging the kvm-ppc tree, today's linux-next build (powerpc
> ppc64_defconfig) failed like this:
>=20
> arch/powerpc/kernel/exceptions-64s.S: Assembler messages:
> arch/powerpc/kernel/exceptions-64s.S:479: Error: wrong number of =
operands
> arch/powerpc/kernel/exceptions-64s.S:486: Error: wrong number of =
operands
> arch/powerpc/kernel/exceptions-64s.S:486: Error: wrong number of =
operands
> arch/powerpc/kernel/exceptions-64s.S:487: Error: wrong number of =
operands
> arch/powerpc/kernel/exceptions-64s.S:487: Error: wrong number of =
operands
> arch/powerpc/kernel/exceptions-64s.S:494: Error: wrong number of =
operands
> arch/powerpc/kernel/exceptions-64s.S:494: Error: wrong number of =
operands
>=20
> Caused by commit bc8cc4993e19 ("PPC: use CURRENT_THREAD_INFO instead =
of
> open coded assembly").  The fact that this patch has problems was =
already
> pointed out on the linuxppc-dev mailing list and it clearly hasn't =
been
> build tested.
>=20
> I have used the version of the kvm-ppc tree from next-20120703 for =
today.

Ah, yes, thanks. I kept the old, broken version in my tree to make sure =
I have the dependencies for (and don't forget) the (hopefully good) =
patches that follow on top, but was planning to replace it once we have =
a new version of the CURRENT_THREAD_INFO one.

Stuart, could you please send a new version of the CURRENT_THREAD_INFO =
patch?


Alex

^ permalink raw reply

* linux-next: build failure after merge of the kvm-ppc tree
From: Stephen Rothwell @ 2012-07-05  5:49 UTC (permalink / raw)
  To: Alexander Graf; +Cc: linux-next, ppc-dev, linux-kernel, Stuart Yoder

[-- Attachment #1: Type: text/plain, Size: 1093 bytes --]

Hi Alexander,

After merging the kvm-ppc tree, today's linux-next build (powerpc
ppc64_defconfig) failed like this:

arch/powerpc/kernel/exceptions-64s.S: Assembler messages:
arch/powerpc/kernel/exceptions-64s.S:479: Error: wrong number of operands
arch/powerpc/kernel/exceptions-64s.S:486: Error: wrong number of operands
arch/powerpc/kernel/exceptions-64s.S:486: Error: wrong number of operands
arch/powerpc/kernel/exceptions-64s.S:487: Error: wrong number of operands
arch/powerpc/kernel/exceptions-64s.S:487: Error: wrong number of operands
arch/powerpc/kernel/exceptions-64s.S:494: Error: wrong number of operands
arch/powerpc/kernel/exceptions-64s.S:494: Error: wrong number of operands

Caused by commit bc8cc4993e19 ("PPC: use CURRENT_THREAD_INFO instead of
open coded assembly").  The fact that this patch has problems was already
pointed out on the linuxppc-dev mailing list and it clearly hasn't been
build tested.

I have used the version of the kvm-ppc tree from next-20120703 for today.
-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* linux-next: manual merge of the kvm-ppc tree with Linus' tree
From: Stephen Rothwell @ 2012-07-05  5:37 UTC (permalink / raw)
  To: Alexander Graf
  Cc: linux-kernel, Stuart Yoder, Tiejun Chen, linux-next, ppc-dev

[-- Attachment #1: Type: text/plain, Size: 2628 bytes --]

Hi Alexander,

Today's linux-next merge of the kvm-ppc tree got a conflict in
arch/powerpc/kernel/entry_64.S between commit c58ce2b1e3c7 ("ppc64: fix
missing to check all bits of _TIF_USER_WORK_MASK in preempt") from Linus'
tree and commit bc8cc4993e19 ("PPC: use CURRENT_THREAD_INFO instead of
open coded assembly") from the kvm-ppc tree.

Just context changes.  I fixed it up (see below) and can carry the fix as
necessary.
-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

diff --cc arch/powerpc/kernel/entry_64.S
index cf38a17,ba943b9..0000000
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@@ -559,54 -558,27 +559,54 @@@ _GLOBAL(ret_from_except_lite
  	mtmsrd	r10,1		  /* Update machine state */
  #endif /* CONFIG_PPC_BOOK3E */
  
- 	clrrdi	r9,r1,THREAD_SHIFT	/* current_thread_info() */
 -#ifdef CONFIG_PREEMPT
+ 	CURRENT_THREAD_INFO(r9, r1)
 -	li	r0,_TIF_NEED_RESCHED	/* bits to check */
  	ld	r3,_MSR(r1)
  	ld	r4,TI_FLAGS(r9)
 -	/* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */
 -	rlwimi	r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING
 -	and.	r0,r4,r0	/* check NEED_RESCHED and maybe SIGPENDING */
 -	bne	do_work
 -
 -#else /* !CONFIG_PREEMPT */
 -	ld	r3,_MSR(r1)	/* Returning to user mode? */
  	andi.	r3,r3,MSR_PR
 -	beq	restore		/* if not, just restore regs and return */
 +	beq	resume_kernel
  
  	/* Check current_thread_info()->flags */
 +	andi.	r0,r4,_TIF_USER_WORK_MASK
 +	beq	restore
 +
 +	andi.	r0,r4,_TIF_NEED_RESCHED
 +	beq	1f
 +	bl	.restore_interrupts
 +	bl	.schedule
 +	b	.ret_from_except_lite
 +
 +1:	bl	.save_nvgprs
 +	bl	.restore_interrupts
 +	addi	r3,r1,STACK_FRAME_OVERHEAD
 +	bl	.do_notify_resume
 +	b	.ret_from_except
 +
 +resume_kernel:
 +#ifdef CONFIG_PREEMPT
 +	/* Check if we need to preempt */
 +	andi.	r0,r4,_TIF_NEED_RESCHED
 +	beq+	restore
 +	/* Check that preempt_count() == 0 and interrupts are enabled */
 +	lwz	r8,TI_PREEMPT(r9)
 +	cmpwi	cr1,r8,0
 +	ld	r0,SOFTE(r1)
 +	cmpdi	r0,0
 +	crandc	eq,cr1*4+eq,eq
 +	bne	restore
 +
 +	/*
 +	 * Here we are preempting the current task. We want to make
 +	 * sure we are soft-disabled first
 +	 */
 +	SOFT_DISABLE_INTS(r3,r4)
 +1:	bl	.preempt_schedule_irq
 +
 +	/* Re-test flags and eventually loop */
- 	clrrdi	r9,r1,THREAD_SHIFT
+ 	CURRENT_THREAD_INFO(r9, r1)
  	ld	r4,TI_FLAGS(r9)
 -	andi.	r0,r4,_TIF_USER_WORK_MASK
 -	bne	do_work
 -#endif /* !CONFIG_PREEMPT */
 +	andi.	r0,r4,_TIF_NEED_RESCHED
 +	bne	1b
 +#endif /* CONFIG_PREEMPT */
  
  	.globl	fast_exc_return_irq
  fast_exc_return_irq:

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [PATCH powerpc 2/2] kfree the cache name  of pgtable cache if SLUB is used
From: Li Zhong @ 2012-07-05  1:41 UTC (permalink / raw)
  To: Glauber Costa
  Cc: LKML, Pekka Enberg, linux-mm, Paul Mackerras, Matt Mackall,
	Christoph Lameter, PowerPC email list
In-Reply-To: <4FF439D0.1000603@parallels.com>

On Wed, 2012-07-04 at 16:40 +0400, Glauber Costa wrote:
> On 07/04/2012 01:00 PM, Li Zhong wrote:
> > On Tue, 2012-07-03 at 15:36 -0500, Christoph Lameter wrote:
> >> > Looking through the emails it seems that there is an issue with alias
> >> > strings. 
> > To be more precise, there seems no big issue currently. I just wanted to
> > make following usage of kmem_cache_create (SLUB) possible:
> > 
> > 	name = some string kmalloced
> > 	kmem_cache_create(name, ...)
> > 	kfree(name);
> 
> Out of curiosity: Why?
> This is not (currently) possible with the other allocators (may change
> with christoph's unification patches), so you would be making your code
> slub-dependent.
> 

For slub itself, I think it's not good that: in some cases, the name
string could be kfreed ( if it was kmalloced ) immediately after calling
the cache create; in some other case, the name string needs to be kept
valid until some init calls finished. 

I agree with you that it would make the code slub-dependent, so I'm now
working on the consistency of the other allocators regarding this name
string duplicating thing. 

^ permalink raw reply

* Re: [Qemu-ppc] [RFC PATCH 12/17] PowerPC: booke64: Add DO_KVM kernel hooks
From: Benjamin Herrenschmidt @ 2012-07-04 22:25 UTC (permalink / raw)
  To: Alexander Graf
  Cc: qemu-ppc@nongnu.org List, Mihai Caraman, linuxppc-dev, KVM list,
	<kvm-ppc@vger.kernel.org>
In-Reply-To: <1B2CBB56-7180-4A73-8E51-6538A725F710@suse.de>

On Wed, 2012-07-04 at 16:29 +0200, Alexander Graf wrote:
 
> > +#ifdef CONFIG_KVM_BOOKE_HV
> > +#define KVM_BOOKE_HV_MFSPR(reg, spr)				\
> > +	BEGIN_FTR_SECTION					\
> > +		mfspr	reg, spr;			  	\
> > +	END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
> > +#else
> > +#define KVM_BOOKE_HV_MFSPR(reg, spr)
> > +#endif
> 
> Bleks - this is ugly. Do we really need to open-code the #ifdef here?
> Can't the feature section code determine that the feature is disabled
> and just always not include the code?

You can't but in any case I don't see the point of the conditional here,
we'll eventually have to load srr1 no ? We can move the load up to here
in all cases or can't we ? If really not, we could have it inside DO_KVM
and be done with it no ?

> > +
> > /* Exception prolog code for all exceptions */
> > -#define EXCEPTION_PROLOG(n, type, srr0, srr1, addition)		     	    \
> > +#define EXCEPTION_PROLOG(n, intnum, type, srr0, srr1, addition)		    \
> > 	mtspr	SPRN_SPRG_##type##_SCRATCH,r13;	/* get spare registers */   \
> > 	mfspr	r13,SPRN_SPRG_PACA;	/* get PACA */			    \
> > 	std	r10,PACA_EX##type+EX_R10(r13);				    \
> > 	std	r11,PACA_EX##type+EX_R11(r13);				    \
> > 	mfcr	r10;			/* save CR */			    \
> > +	KVM_BOOKE_HV_MFSPR(r11,srr1);			    		    \
> > +	DO_KVM	intnum,srr1;				    		    \
> 
> So if DO_KVM already knows srr1, why explicitly do something with it
> the line above, and not in DO_KVM itself?

Yeah that or just move things around in the prolog.

> > 	addition;			/* additional code for that exc. */ \
> > 	std	r1,PACA_EX##type+EX_R1(r13); /* save old r1 in the PACA */  \
> > 	stw	r10,PACA_EX##type+EX_CR(r13); /* save old CR in the PACA */ \
> > @@ -69,17 +82,21 @@
> > 	ld	r1,PACA_MC_STACK(r13);					    \
> > 	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE;
> > 
> > -#define NORMAL_EXCEPTION_PROLOG(n, addition)				    \
> > -	EXCEPTION_PROLOG(n, GEN, SPRN_SRR0, SPRN_SRR1, addition##_GEN(n))
> > +#define NORMAL_EXCEPTION_PROLOG(n, intnum, addition)			    \
> > +	EXCEPTION_PROLOG(n, intnum, GEN, SPRN_SRR0, SPRN_SRR1,		    \
> 
> We would we want to pass in 2 numbers? Let's please confine this onto
> a single ID per interrupt vector. Either we use the hardcoded ones
> available here in the KVM code or we use the KVM ones instead of the
> hardcoded ones here. But not both please. Just because it's like that
> on 32bit doesn't count as an excuse :).

Right. Also I already objected to the explicit passing of the srr's
anyway.

Cheers,
Ben.

^ permalink raw reply

* Re: [Qemu-ppc] [RFC PATCH 09/17] KVM: PPC64: booke: Hard disable interrupts when entering guest
From: Benjamin Herrenschmidt @ 2012-07-04 22:21 UTC (permalink / raw)
  To: Alexander Graf
  Cc: qemu-ppc@nongnu.org List, Mihai Caraman, linuxppc-dev, KVM list,
	<kvm-ppc@vger.kernel.org>
In-Reply-To: <10CBFB35-5A18-4EA8-A129-58CCC4CFBB83@suse.de>

On Wed, 2012-07-04 at 16:14 +0200, Alexander Graf wrote:
> > +#ifdef CONFIG_64BIT
> > +#define _hard_irq_disable() hard_irq_disable()
> > +#else
> > +#define _hard_irq_disable() local_irq_disable()
> > +#endif
> 
> So you only swap out the disable bit, but not the enable one? Ben,
> would this work out?

hard_irq_disable() both soft and hard disable. local_irq_enable() will
see that irqs are hard disabled and will hard enable.

However, there's a nastier discrepancy above: local_irq_disable will
properly inform lockdep that we are disabling, while hard_irq_disable
won't.

Arguably we might want to fix that inside hard_irq_disable() itself...

Also you need to be careful. If you are coming with interrupts already
enabled, it's fine, but if you have interrupts soft disabled, then
you hard disable, before you enter the guest you probably want to
check if anything was left "pending" and cancel the entering of the
guest if that is the case.

Cheers,
Ben.

^ permalink raw reply

* RE: [Qemu-ppc] [RFC PATCH 15/17] KVM: PPC64: bookehv: Add support for interrupt handling
From: Caraman Mihai Claudiu-B02008 @ 2012-07-04 18:21 UTC (permalink / raw)
  To: Alexander Graf
  Cc: qemu-ppc@nongnu.org, linuxppc-dev@lists.ozlabs.org,
	kvm@vger.kernel.org, kvm-ppc@vger.kernel.org
In-Reply-To: <5C22DF4D-4F41-4A25-9E01-C69CF02B0C52@suse.de>

>On 04.07.2012, at 17:37, Caraman Mihai Claudiu-B02008 wrote:=0A=
>=0A=
>>> -----Original Message-----=0A=
>>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-=0A=
>>> owner@vger.kernel.org] On Behalf Of Alexander Graf=0A=
>>> Sent: Wednesday, July 04, 2012 6:14 PM=0A=
>>> To: Caraman Mihai Claudiu-B02008=0A=
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; linuxppc-=0A=
>>> dev@lists.ozlabs.org; qemu-ppc@nongnu.org=0A=
>>> Subject: Re: [Qemu-ppc] [RFC PATCH 15/17] KVM: PPC64: bookehv: Add=0A=
>>> support for interrupt handling=0A=
>>>=0A=
>>> Is this code so vastly different from the 32bit variant that they can't=
=0A=
>>> be the same with a few simple ifdef's here and there?=0A=
>>=0A=
>> As you can see from input register values things are quite different. I =
strived=0A=
>> to keep the code common, the only divergence is in the kvm_handler defin=
itions.=0A=
>=0A=
>What a shame :(. A lot of it looks very very similar.=0A=
=0A=
The Devil is in the details ;)=0A=
=0A=
-Mike=

^ permalink raw reply

* RE: [Qemu-ppc] [RFC PATCH 12/17] PowerPC: booke64: Add DO_KVM kernel hooks
From: Caraman Mihai Claudiu-B02008 @ 2012-07-04 18:15 UTC (permalink / raw)
  To: Alexander Graf
  Cc: qemu-ppc@nongnu.org List, linuxppc-dev, KVM list,
	<kvm-ppc@vger.kernel.org>
In-Reply-To: <2526CB9E-4B5A-40EA-9CFC-DFCA4B09F375@suse.de>

>________________________________________=0A=
>From: Alexander Graf [agraf@suse.de]=0A=
>Sent: Wednesday, July 04, 2012 6:45 PM=0A=
>To: Caraman Mihai Claudiu-B02008=0A=
>Cc: <kvm-ppc@vger.kernel.org>; KVM list; linuxppc-dev; qemu-ppc@nongnu.org=
 List; Benjamin Herrenschmidt=0A=
>Subject: Re: [Qemu-ppc] [RFC PATCH 12/17] PowerPC: booke64: Add DO_KVM ker=
nel hooks=0A=
>=0A=
>On 04.07.2012, at 17:27, Caraman Mihai Claudiu-B02008 wrote:=0A=
>=0A=
>>> -----Original Message-----=0A=
>>> From: Alexander Graf [mailto:agraf@suse.de]=0A=
>>> Sent: Wednesday, July 04, 2012 5:30 PM=0A=
>>> To: Caraman Mihai Claudiu-B02008=0A=
>>> Cc: <kvm-ppc@vger.kernel.org>; KVM list; linuxppc-dev; qemu-=0A=
>>> ppc@nongnu.org List; Benjamin Herrenschmidt=0A=
>>> Subject: Re: [Qemu-ppc] [RFC PATCH 12/17] PowerPC: booke64: Add DO_KVM=
=0A=
>>> kernel hooks=0A=
>>>=0A=
>>>=0A=
>>> On 25.06.2012, at 14:26, Mihai Caraman wrote:=0A=
>>>=0A=
>>>> Hook DO_KVM macro to 64-bit booke in a optimal way similar to 32-bit=
=0A=
>>> booke=0A=
>>>> see head_fsl_booke.S file. Extend interrupt handlers' parameter list=
=0A=
>>> with=0A=
>>>> interrupt vector numbers to accomodate the macro. Rework Guest Doorbel=
l=0A=
>>>> handler to use the proper GSRRx save/restore registers.=0A=
>>>> Only the bolted version of tlb miss handers is addressed now.=0A=
>>>>=0A=
>>>> Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>=0A=
>>>> ---=0A=
>>>> arch/powerpc/kernel/exceptions-64e.S |  114 ++++++++++++++++++++++++--=
-=0A=
>>> -------=0A=
>>>> arch/powerpc/mm/tlb_low_64e.S        |   14 +++-=0A=
>>>> 2 files changed, 92 insertions(+), 36 deletions(-)=0A=
>>>>=0A=
>>>> diff --git a/arch/powerpc/kernel/exceptions-64e.S=0A=
>>> b/arch/powerpc/kernel/exceptions-64e.S=0A=
>>>> index 06f7aec..a60f81f 100644=0A=
>>>> --- a/arch/powerpc/kernel/exceptions-64e.S=0A=
>>>> +++ b/arch/powerpc/kernel/exceptions-64e.S=0A=
>>>> @@ -25,6 +25,8 @@=0A=
>>>> #include <asm/ppc-opcode.h>=0A=
>>>> #include <asm/mmu.h>=0A=
>>>> #include <asm/hw_irq.h>=0A=
>>>> +#include <asm/kvm_asm.h>=0A=
>>>> +#include <asm/kvm_booke_hv_asm.h>=0A=
>>>>=0A=
>>>> /* XXX This will ultimately add space for a special exception save=0A=
>>>> *     structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...=
=0A=
>>>> @@ -34,13 +36,24 @@=0A=
>>>> */=0A=
>>>> #define     SPECIAL_EXC_FRAME_SIZE  INT_FRAME_SIZE=0A=
>>>>=0A=
>>>> +#ifdef CONFIG_KVM_BOOKE_HV=0A=
>>>> +#define KVM_BOOKE_HV_MFSPR(reg, spr)                               \=
=0A=
>>>> +   BEGIN_FTR_SECTION                                       \=0A=
>>>> +           mfspr   reg, spr;                               \=0A=
>>>> +   END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)=0A=
>>>> +#else=0A=
>>>> +#define KVM_BOOKE_HV_MFSPR(reg, spr)=0A=
>>>> +#endif=0A=
>>>=0A=
>>> Bleks - this is ugly.=0A=
>>=0A=
>> I agree :) But I opted to keep the optimizations done for 32-bit.=0A=
>>=0A=
>>> Do we really need to open-code the #ifdef here?=0A=
>>=0A=
>> 32-bit implementation fortunately use asm macros, we can't nest defines.=
=0A=
>>=0A=
>>> Can't the feature section code determine that the feature is disabled a=
nd=0A=
>>> just always not include the code?=0A=
>>=0A=
>> CPU_FTR_EMB_HV is set even if KVM is not configured.=0A=
>=0A=
>I don't get the point then. Why not have the whole DO_KVM masked under FTR=
_SECTION_IFSET(CPU_FTR_EMB_HV)? Are there book3s_64 implementations without=
 HV? =0A=
=0A=
I guess you refer to book3e_64. I don't know all implementations but Embedd=
ed.HV category is optional.=0A=
=0A=
>Can't we just mfspr unconditionally in DO_KVM?=0A=
=0A=
I think Scott should better answer this question, I don't know why he opted=
 for the other approach.=0A=
=0A=
>>>> -/* Guest Doorbell */=0A=
>>>> -   MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception,=0A=
>>> ACK_NONE)=0A=
>>>> +/*=0A=
>>>> + * Guest doorbell interrupt=0A=
>>>> + * This general exception use GSRRx save/restore registers=0A=
>>>> + */=0A=
>>>> +   START_EXCEPTION(guest_doorbell);=0A=
>>>> +   EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL, GEN,=0A=
>>>> +                    SPRN_GSRR0, SPRN_GSRR1, PROLOG_ADDITION_NONE)=0A=
>>>> +   EXCEPTION_COMMON(0x2c0, PACA_EXGEN, INTS_KEEP)=0A=
>>>> +   addi    r3,r1,STACK_FRAME_OVERHEAD=0A=
>>>> +   bl      .save_nvgprs=0A=
>>>> +   INTS_RESTORE_HARD=0A=
>>>> +   bl      .unknown_exception=0A=
>>>> +   b       .ret_from_except=0A=
>>>=0A=
>>> This is independent of DO_KVM, right?=0A=
>>=0A=
>> Yes, just kvm_handler definitions in bookehv_interrupts.S depends on thi=
s.=0A=
>=0A=
>Then please split it out into a separate patch.=0A=
=0A=
Can you be more precise, are you referring to guest_doorbell exception hand=
ler?=0A=
=0A=
>>>> -.macro tlb_prolog_bolted addr=0A=
>>>> +.macro tlb_prolog_bolted intnum addr=0A=
>>>>     mtspr   SPRN_SPRG_TLB_SCRATCH,r13=0A=
>>>>     mfspr   r13,SPRN_SPRG_PACA=0A=
>>>>     std     r10,PACA_EXTLB+EX_TLB_R10(r13)=0A=
>>>>     mfcr    r10=0A=
>>>>     std     r11,PACA_EXTLB+EX_TLB_R11(r13)=0A=
>>>> +#ifdef CONFIG_KVM_BOOKE_HV=0A=
>>>> +BEGIN_FTR_SECTION=0A=
>>>> +   mfspr   r11, SPRN_SRR1=0A=
>>>> +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)=0A=
>>>> +#endif=0A=
>>>=0A=
>>> This thing really should vanish behind DO_KVM :)=0A=
>>=0A=
>> Then let's do it first for 32-bit ;)=0A=
>=0A=
>You could #ifdef it in DO_KVM for 64-bit for now. IIRC it's not done on 32=
-bit because the register value is used even beyond DO_KVM there.=0A=
=0A=
Nope, 32-bit code is also guarded by CONFIG_KVM_BOOKE_HV.=0A=
=0A=
-Mike=

^ permalink raw reply

* Re: [Qemu-ppc] [RFC PATCH 15/17] KVM: PPC64: bookehv: Add support for interrupt handling
From: Alexander Graf @ 2012-07-04 15:46 UTC (permalink / raw)
  To: Caraman Mihai Claudiu-B02008
  Cc: qemu-ppc@nongnu.org, linuxppc-dev@lists.ozlabs.org,
	kvm@vger.kernel.org, kvm-ppc@vger.kernel.org
In-Reply-To: <300B73AA675FCE4A93EB4FC1D42459FF15A6E8@039-SN2MPN1-013.039d.mgd.msft.net>


On 04.07.2012, at 17:37, Caraman Mihai Claudiu-B02008 wrote:

>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-
>> owner@vger.kernel.org] On Behalf Of Alexander Graf
>> Sent: Wednesday, July 04, 2012 6:14 PM
>> To: Caraman Mihai Claudiu-B02008
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; linuxppc-
>> dev@lists.ozlabs.org; qemu-ppc@nongnu.org
>> Subject: Re: [Qemu-ppc] [RFC PATCH 15/17] KVM: PPC64: bookehv: Add
>> support for interrupt handling
>>=20
>>=20
>> On 25.06.2012, at 14:26, Mihai Caraman wrote:
>>=20
>>> Add bookehv interrupt handling support for 64-bit hosts. Change =
common
>> stack
>>> layout to refer PPC_LR_STKOFF kernel constant. Dispatch the 64-bit
>> execution
>>> flow to the existing kvm_handler_common asm macro. Update input
>> register
>>> values documentation.
>>> Only the bolted version of TLB miss exception handlers is supported
>> now.
>>>=20
>>> Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
>>> ---
>>> arch/powerpc/include/asm/kvm_booke_hv_asm.h |   12 +++-
>>> arch/powerpc/kvm/bookehv_interrupts.S       |  120
>> +++++++++++++++++++++++++--
>>> 2 files changed, 122 insertions(+), 10 deletions(-)
>>>=20
>>> diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h
>> b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
>>> index 30a600f..8be6f87 100644
>>> --- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h
>>> +++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
>>> @@ -1,5 +1,5 @@
>>> /*
>>> - * Copyright 2010-2011 Freescale Semiconductor, Inc.
>>> + * Copyright 2010-2012 Freescale Semiconductor, Inc.
>>> *
>>> * This program is free software; you can redistribute it and/or =
modify
>>> * it under the terms of the GNU General Public License, version 2, =
as
>>> @@ -17,6 +17,7 @@
>>> * there are no exceptions for which we fall through directly to
>>> * the normal host handler.
>>> *
>>> + * 32-bit host
>>> * Expected inputs (normal exceptions):
>>> *   SCRATCH0 =3D saved r10
>>> *   r10 =3D thread struct
>>> @@ -33,6 +34,15 @@
>>> *   *(r8 + GPR9) =3D saved r9
>>> *   *(r8 + GPR10) =3D saved r10 (r10 not yet clobbered)
>>> *   *(r8 + GPR11) =3D saved r11
>>> + *
>>> + * 64-bit host
>>> + * Expected inputs (exception types GEN/DBG/CRIT/MC):
>>> + *  r13 =3D PACA_POINTER
>>> + *  r10 =3D saved CR
>>> + *  SPRN_SPRG_##type##_SCRATCH =3D saved r13
>>> + *  *(r13 + PACA_EX##type + EX_R10) =3D saved r10
>>> + *  *(r13 + PACA_EX##type + EX_R11) =3D saved r11
>>> + * Only the bolted version of TLB miss exception handlers is =
supported
>> now.
>>> */
>>> .macro DO_KVM intno srr1
>>> #ifdef CONFIG_KVM_BOOKE_HV
>>> diff --git a/arch/powerpc/kvm/bookehv_interrupts.S
>> b/arch/powerpc/kvm/bookehv_interrupts.S
>>> index dff8ed4..04097de 100644
>>> --- a/arch/powerpc/kvm/bookehv_interrupts.S
>>> +++ b/arch/powerpc/kvm/bookehv_interrupts.S
>>> @@ -12,10 +12,11 @@
>>> * along with this program; if not, write to the Free Software
>>> * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  =
02110-1301,
>> USA.
>>> *
>>> - * Copyright (C) 2010-2011 Freescale Semiconductor, Inc.
>>> + * Copyright (C) 2010-2012 Freescale Semiconductor, Inc.
>>> *
>>> * Author: Varun Sethi <varun.sethi@freescale.com>
>>> * Author: Scott Wood <scotwood@freescale.com>
>>> + * Author: Mihai Caraman <mihai.caraman@freescale.com>
>>> *
>>> * This file is derived from arch/powerpc/kvm/booke_interrupts.S
>>> */
>>> @@ -30,7 +31,11 @@
>>> #include <asm/bitsperlong.h>
>>> #include <asm/thread_info.h>
>>>=20
>>> +#ifdef CONFIG_64BIT
>>> +#include <asm/exception-64e.h>
>>> +#else
>>> #include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
>>> +#endif
>>>=20
>>>=20
>>> +#ifdef CONFIG_64BIT
>>> +/*
>>> + * For input register values, see
>> arch/powerpc/include/asm/kvm_booke_hv_asm.h
>>> + */
>>> +.macro kvm_handler intno scratch, paca_ex, ex_r10, ex_r11, srr0, =
srr1,
>> flags
>>> + _GLOBAL(kvmppc_handler_\intno\()_\srr1)
>>=20
>> Is this code so vastly different from the 32bit variant that they =
can't
>> be the same with a few simple ifdef's here and there?
>=20
> As you can see from input register values things are quite different. =
I strived
> to keep the code common, the only divergence is in the kvm_handler =
definitions.

What a shame :(. A lot of it looks very very similar.


Alex

^ permalink raw reply

* Re: [Qemu-ppc] [RFC PATCH 12/17] PowerPC: booke64: Add DO_KVM kernel hooks
From: Alexander Graf @ 2012-07-04 15:45 UTC (permalink / raw)
  To: Caraman Mihai Claudiu-B02008
  Cc: qemu-ppc@nongnu.org List, linuxppc-dev, KVM list,
	<kvm-ppc@vger.kernel.org>
In-Reply-To: <300B73AA675FCE4A93EB4FC1D42459FF15A6C6@039-SN2MPN1-013.039d.mgd.msft.net>


On 04.07.2012, at 17:27, Caraman Mihai Claudiu-B02008 wrote:

>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf@suse.de]
>> Sent: Wednesday, July 04, 2012 5:30 PM
>> To: Caraman Mihai Claudiu-B02008
>> Cc: <kvm-ppc@vger.kernel.org>; KVM list; linuxppc-dev; qemu-
>> ppc@nongnu.org List; Benjamin Herrenschmidt
>> Subject: Re: [Qemu-ppc] [RFC PATCH 12/17] PowerPC: booke64: Add =
DO_KVM
>> kernel hooks
>>=20
>>=20
>> On 25.06.2012, at 14:26, Mihai Caraman wrote:
>>=20
>>> Hook DO_KVM macro to 64-bit booke in a optimal way similar to 32-bit
>> booke
>>> see head_fsl_booke.S file. Extend interrupt handlers' parameter list
>> with
>>> interrupt vector numbers to accomodate the macro. Rework Guest =
Doorbell
>>> handler to use the proper GSRRx save/restore registers.
>>> Only the bolted version of tlb miss handers is addressed now.
>>>=20
>>> Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
>>> ---
>>> arch/powerpc/kernel/exceptions-64e.S |  114 =
++++++++++++++++++++++++---
>> -------
>>> arch/powerpc/mm/tlb_low_64e.S        |   14 +++-
>>> 2 files changed, 92 insertions(+), 36 deletions(-)
>>>=20
>>> diff --git a/arch/powerpc/kernel/exceptions-64e.S
>> b/arch/powerpc/kernel/exceptions-64e.S
>>> index 06f7aec..a60f81f 100644
>>> --- a/arch/powerpc/kernel/exceptions-64e.S
>>> +++ b/arch/powerpc/kernel/exceptions-64e.S
>>> @@ -25,6 +25,8 @@
>>> #include <asm/ppc-opcode.h>
>>> #include <asm/mmu.h>
>>> #include <asm/hw_irq.h>
>>> +#include <asm/kvm_asm.h>
>>> +#include <asm/kvm_booke_hv_asm.h>
>>>=20
>>> /* XXX This will ultimately add space for a special exception save
>>> *     structure used to save things like SRR0/SRR1, SPRGs, MAS, =
etc...
>>> @@ -34,13 +36,24 @@
>>> */
>>> #define	SPECIAL_EXC_FRAME_SIZE	INT_FRAME_SIZE
>>>=20
>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>> +#define KVM_BOOKE_HV_MFSPR(reg, spr)				=
\
>>> +	BEGIN_FTR_SECTION					\
>>> +		mfspr	reg, spr;			  	\
>>> +	END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
>>> +#else
>>> +#define KVM_BOOKE_HV_MFSPR(reg, spr)
>>> +#endif
>>=20
>> Bleks - this is ugly.
>=20
> I agree :) But I opted to keep the optimizations done for 32-bit.
>=20
>> Do we really need to open-code the #ifdef here?
>=20
> 32-bit implementation fortunately use asm macros, we can't nest =
defines.
>=20
>> Can't the feature section code determine that the feature is disabled =
and
>> just always not include the code?
>=20
> CPU_FTR_EMB_HV is set even if KVM is not configured.

I don't get the point then. Why not have the whole DO_KVM masked under =
FTR_SECTION_IFSET(CPU_FTR_EMB_HV)? Are there book3s_64 implementations =
without HV? Can't we just mfspr unconditionally in DO_KVM?

>=20
>>=20
>>> +
>>> /* Exception prolog code for all exceptions */
>>> -#define EXCEPTION_PROLOG(n, type, srr0, srr1, addition)
>> \
>>> +#define EXCEPTION_PROLOG(n, intnum, type, srr0, srr1, addition)
>> 	    \
>>> 	mtspr	SPRN_SPRG_##type##_SCRATCH,r13;	/* get spare registers =
*/
>> \
>>> 	mfspr	r13,SPRN_SPRG_PACA;	/* get PACA */			 =
   \
>>> 	std	r10,PACA_EX##type+EX_R10(r13);				 =
   \
>>> 	std	r11,PACA_EX##type+EX_R11(r13);				 =
   \
>>> 	mfcr	r10;			/* save CR */			 =
   \
>>> +	KVM_BOOKE_HV_MFSPR(r11,srr1);			    		 =
   \
>>> +	DO_KVM	intnum,srr1;				    		 =
   \
>>=20
>> So if DO_KVM already knows srr1, why explicitly do something with it =
the
>> line above, and not in DO_KVM itself?
>=20
> srr1 is used to expand the interrupt handler symbol name while r11 is =
used
> for the actual MSR[GS] optimal check:
> 	mtocrf	0x80, r11

Right, so basically we want

#ifdef CONFIG_KVM
mfspr r11, spr
mtocrf 0x80, r11
beq ...
#endif

right?

>=20
>>> -/* Guest Doorbell */
>>> -	MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception,
>> ACK_NONE)
>>> +/*
>>> + *	Guest doorbell interrupt
>>> + *	This general exception use GSRRx save/restore registers
>>> + */
>>> +	START_EXCEPTION(guest_doorbell);
>>> +	EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL, GEN,
>>> +			 SPRN_GSRR0, SPRN_GSRR1, PROLOG_ADDITION_NONE)
>>> +	EXCEPTION_COMMON(0x2c0, PACA_EXGEN, INTS_KEEP)
>>> +	addi	r3,r1,STACK_FRAME_OVERHEAD
>>> +	bl	.save_nvgprs
>>> +	INTS_RESTORE_HARD
>>> +	bl	.unknown_exception
>>> +	b	.ret_from_except
>>=20
>> This is independent of DO_KVM, right?
>=20
> Yes, just kvm_handler definitions in bookehv_interrupts.S depends on =
this.

Then please split it out into a separate patch.

>=20
>>=20
>>>=20
>>> /* Guest Doorbell critical Interrupt */
>>> 	START_EXCEPTION(guest_doorbell_crit);
>>> -	CRIT_EXCEPTION_PROLOG(0x2e0, PROLOG_ADDITION_NONE)
>>> +	CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT,
>>> +			      PROLOG_ADDITION_NONE)
>>=20
>> Shouldn't this one also use GSRR?
>=20
> No, this is a critical exception.

Ah, right. Looked at the wrong bit, sorry :).

>=20
>>>=20
>>> -.macro tlb_prolog_bolted addr
>>> +.macro tlb_prolog_bolted intnum addr
>>> 	mtspr	SPRN_SPRG_TLB_SCRATCH,r13
>>> 	mfspr	r13,SPRN_SPRG_PACA
>>> 	std	r10,PACA_EXTLB+EX_TLB_R10(r13)
>>> 	mfcr	r10
>>> 	std	r11,PACA_EXTLB+EX_TLB_R11(r13)
>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>> +BEGIN_FTR_SECTION
>>> +	mfspr	r11, SPRN_SRR1
>>> +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
>>> +#endif
>>=20
>> This thing really should vanish behind DO_KVM :)
>=20
> Then let's do it first for 32-bit ;)

You could #ifdef it in DO_KVM for 64-bit for now. IIRC it's not done on =
32-bit because the register value is used even beyond DO_KVM there.


Alex

^ permalink raw reply

* RE: [Qemu-ppc] [RFC PATCH 15/17] KVM: PPC64: bookehv: Add support for interrupt handling
From: Caraman Mihai Claudiu-B02008 @ 2012-07-04 15:37 UTC (permalink / raw)
  To: Alexander Graf
  Cc: qemu-ppc@nongnu.org, linuxppc-dev@lists.ozlabs.org,
	kvm@vger.kernel.org, kvm-ppc@vger.kernel.org
In-Reply-To: <FB7DFF5C-5E14-4202-9180-7968294F6A56@suse.de>

> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-
> owner@vger.kernel.org] On Behalf Of Alexander Graf
> Sent: Wednesday, July 04, 2012 6:14 PM
> To: Caraman Mihai Claudiu-B02008
> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; linuxppc-
> dev@lists.ozlabs.org; qemu-ppc@nongnu.org
> Subject: Re: [Qemu-ppc] [RFC PATCH 15/17] KVM: PPC64: bookehv: Add
> support for interrupt handling
>=20
>=20
> On 25.06.2012, at 14:26, Mihai Caraman wrote:
>=20
> > Add bookehv interrupt handling support for 64-bit hosts. Change common
> stack
> > layout to refer PPC_LR_STKOFF kernel constant. Dispatch the 64-bit
> execution
> > flow to the existing kvm_handler_common asm macro. Update input
> register
> > values documentation.
> > Only the bolted version of TLB miss exception handlers is supported
> now.
> >
> > Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
> > ---
> > arch/powerpc/include/asm/kvm_booke_hv_asm.h |   12 +++-
> > arch/powerpc/kvm/bookehv_interrupts.S       |  120
> +++++++++++++++++++++++++--
> > 2 files changed, 122 insertions(+), 10 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h
> b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
> > index 30a600f..8be6f87 100644
> > --- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h
> > +++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
> > @@ -1,5 +1,5 @@
> > /*
> > - * Copyright 2010-2011 Freescale Semiconductor, Inc.
> > + * Copyright 2010-2012 Freescale Semiconductor, Inc.
> >  *
> >  * This program is free software; you can redistribute it and/or modify
> >  * it under the terms of the GNU General Public License, version 2, as
> > @@ -17,6 +17,7 @@
> >  * there are no exceptions for which we fall through directly to
> >  * the normal host handler.
> >  *
> > + * 32-bit host
> >  * Expected inputs (normal exceptions):
> >  *   SCRATCH0 =3D saved r10
> >  *   r10 =3D thread struct
> > @@ -33,6 +34,15 @@
> >  *   *(r8 + GPR9) =3D saved r9
> >  *   *(r8 + GPR10) =3D saved r10 (r10 not yet clobbered)
> >  *   *(r8 + GPR11) =3D saved r11
> > + *
> > + * 64-bit host
> > + * Expected inputs (exception types GEN/DBG/CRIT/MC):
> > + *  r13 =3D PACA_POINTER
> > + *  r10 =3D saved CR
> > + *  SPRN_SPRG_##type##_SCRATCH =3D saved r13
> > + *  *(r13 + PACA_EX##type + EX_R10) =3D saved r10
> > + *  *(r13 + PACA_EX##type + EX_R11) =3D saved r11
> > + * Only the bolted version of TLB miss exception handlers is supported
> now.
> >  */
> > .macro DO_KVM intno srr1
> > #ifdef CONFIG_KVM_BOOKE_HV
> > diff --git a/arch/powerpc/kvm/bookehv_interrupts.S
> b/arch/powerpc/kvm/bookehv_interrupts.S
> > index dff8ed4..04097de 100644
> > --- a/arch/powerpc/kvm/bookehv_interrupts.S
> > +++ b/arch/powerpc/kvm/bookehv_interrupts.S
> > @@ -12,10 +12,11 @@
> >  * along with this program; if not, write to the Free Software
> >  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
> USA.
> >  *
> > - * Copyright (C) 2010-2011 Freescale Semiconductor, Inc.
> > + * Copyright (C) 2010-2012 Freescale Semiconductor, Inc.
> >  *
> >  * Author: Varun Sethi <varun.sethi@freescale.com>
> >  * Author: Scott Wood <scotwood@freescale.com>
> > + * Author: Mihai Caraman <mihai.caraman@freescale.com>
> >  *
> >  * This file is derived from arch/powerpc/kvm/booke_interrupts.S
> >  */
> > @@ -30,7 +31,11 @@
> > #include <asm/bitsperlong.h>
> > #include <asm/thread_info.h>
> >
> > +#ifdef CONFIG_64BIT
> > +#include <asm/exception-64e.h>
> > +#else
> > #include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
> > +#endif
> >
> >
> > +#ifdef CONFIG_64BIT
> > +/*
> > + * For input register values, see
> arch/powerpc/include/asm/kvm_booke_hv_asm.h
> > + */
> > +.macro kvm_handler intno scratch, paca_ex, ex_r10, ex_r11, srr0, srr1,
> flags
> > + _GLOBAL(kvmppc_handler_\intno\()_\srr1)
>=20
> Is this code so vastly different from the 32bit variant that they can't
> be the same with a few simple ifdef's here and there?

As you can see from input register values things are quite different. I str=
ived
to keep the code common, the only divergence is in the kvm_handler definiti=
ons.

-Mike

^ permalink raw reply

* RE: [Qemu-ppc] [RFC PATCH 12/17] PowerPC: booke64: Add DO_KVM kernel hooks
From: Caraman Mihai Claudiu-B02008 @ 2012-07-04 15:27 UTC (permalink / raw)
  To: Alexander Graf
  Cc: qemu-ppc@nongnu.org List, linuxppc-dev, KVM list,
	<kvm-ppc@vger.kernel.org>
In-Reply-To: <1B2CBB56-7180-4A73-8E51-6538A725F710@suse.de>

> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de]
> Sent: Wednesday, July 04, 2012 5:30 PM
> To: Caraman Mihai Claudiu-B02008
> Cc: <kvm-ppc@vger.kernel.org>; KVM list; linuxppc-dev; qemu-
> ppc@nongnu.org List; Benjamin Herrenschmidt
> Subject: Re: [Qemu-ppc] [RFC PATCH 12/17] PowerPC: booke64: Add DO_KVM
> kernel hooks
>=20
>=20
> On 25.06.2012, at 14:26, Mihai Caraman wrote:
>=20
> > Hook DO_KVM macro to 64-bit booke in a optimal way similar to 32-bit
> booke
> > see head_fsl_booke.S file. Extend interrupt handlers' parameter list
> with
> > interrupt vector numbers to accomodate the macro. Rework Guest Doorbell
> > handler to use the proper GSRRx save/restore registers.
> > Only the bolted version of tlb miss handers is addressed now.
> >
> > Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
> > ---
> > arch/powerpc/kernel/exceptions-64e.S |  114 ++++++++++++++++++++++++---
> -------
> > arch/powerpc/mm/tlb_low_64e.S        |   14 +++-
> > 2 files changed, 92 insertions(+), 36 deletions(-)
> >
> > diff --git a/arch/powerpc/kernel/exceptions-64e.S
> b/arch/powerpc/kernel/exceptions-64e.S
> > index 06f7aec..a60f81f 100644
> > --- a/arch/powerpc/kernel/exceptions-64e.S
> > +++ b/arch/powerpc/kernel/exceptions-64e.S
> > @@ -25,6 +25,8 @@
> > #include <asm/ppc-opcode.h>
> > #include <asm/mmu.h>
> > #include <asm/hw_irq.h>
> > +#include <asm/kvm_asm.h>
> > +#include <asm/kvm_booke_hv_asm.h>
> >
> > /* XXX This will ultimately add space for a special exception save
> >  *     structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
> > @@ -34,13 +36,24 @@
> >  */
> > #define	SPECIAL_EXC_FRAME_SIZE	INT_FRAME_SIZE
> >
> > +#ifdef CONFIG_KVM_BOOKE_HV
> > +#define KVM_BOOKE_HV_MFSPR(reg, spr)				\
> > +	BEGIN_FTR_SECTION					\
> > +		mfspr	reg, spr;			  	\
> > +	END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
> > +#else
> > +#define KVM_BOOKE_HV_MFSPR(reg, spr)
> > +#endif
>=20
> Bleks - this is ugly.

I agree :) But I opted to keep the optimizations done for 32-bit.

> Do we really need to open-code the #ifdef here?

32-bit implementation fortunately use asm macros, we can't nest defines.

> Can't the feature section code determine that the feature is disabled and
> just always not include the code?

CPU_FTR_EMB_HV is set even if KVM is not configured.

>=20
> > +
> > /* Exception prolog code for all exceptions */
> > -#define EXCEPTION_PROLOG(n, type, srr0, srr1, addition)
> \
> > +#define EXCEPTION_PROLOG(n, intnum, type, srr0, srr1, addition)
> 	    \
> > 	mtspr	SPRN_SPRG_##type##_SCRATCH,r13;	/* get spare registers */
> \
> > 	mfspr	r13,SPRN_SPRG_PACA;	/* get PACA */			    \
> > 	std	r10,PACA_EX##type+EX_R10(r13);				    \
> > 	std	r11,PACA_EX##type+EX_R11(r13);				    \
> > 	mfcr	r10;			/* save CR */			    \
> > +	KVM_BOOKE_HV_MFSPR(r11,srr1);			    		    \
> > +	DO_KVM	intnum,srr1;				    		    \
>=20
> So if DO_KVM already knows srr1, why explicitly do something with it the
> line above, and not in DO_KVM itself?

srr1 is used to expand the interrupt handler symbol name while r11 is used
for the actual MSR[GS] optimal check:
	mtocrf	0x80, r11

> > -/* Guest Doorbell */
> > -	MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception,
> ACK_NONE)
> > +/*
> > + *	Guest doorbell interrupt
> > + *	This general exception use GSRRx save/restore registers
> > + */
> > +	START_EXCEPTION(guest_doorbell);
> > +	EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL, GEN,
> > +			 SPRN_GSRR0, SPRN_GSRR1, PROLOG_ADDITION_NONE)
> > +	EXCEPTION_COMMON(0x2c0, PACA_EXGEN, INTS_KEEP)
> > +	addi	r3,r1,STACK_FRAME_OVERHEAD
> > +	bl	.save_nvgprs
> > +	INTS_RESTORE_HARD
> > +	bl	.unknown_exception
> > +	b	.ret_from_except
>=20
> This is independent of DO_KVM, right?

Yes, just kvm_handler definitions in bookehv_interrupts.S depends on this.

>=20
> >
> > /* Guest Doorbell critical Interrupt */
> > 	START_EXCEPTION(guest_doorbell_crit);
> > -	CRIT_EXCEPTION_PROLOG(0x2e0, PROLOG_ADDITION_NONE)
> > +	CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT,
> > +			      PROLOG_ADDITION_NONE)
>=20
> Shouldn't this one also use GSRR?

No, this is a critical exception.

> >
> > -.macro tlb_prolog_bolted addr
> > +.macro tlb_prolog_bolted intnum addr
> > 	mtspr	SPRN_SPRG_TLB_SCRATCH,r13
> > 	mfspr	r13,SPRN_SPRG_PACA
> > 	std	r10,PACA_EXTLB+EX_TLB_R10(r13)
> > 	mfcr	r10
> > 	std	r11,PACA_EXTLB+EX_TLB_R11(r13)
> > +#ifdef CONFIG_KVM_BOOKE_HV
> > +BEGIN_FTR_SECTION
> > +	mfspr	r11, SPRN_SRR1
> > +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
> > +#endif
>=20
> This thing really should vanish behind DO_KVM :)

Then let's do it first for 32-bit ;)

-Mike

^ permalink raw reply

* Re: [PATCH v7 1/5] powerpc/85xx: implement hardware timebase sync
From: Tabi Timur-B04825 @ 2012-07-04 15:19 UTC (permalink / raw)
  To: Zhao Chenhui-B35336
  Cc: Wood Scott-B07421, Li Yang-R58472, Zhao Chenhui-B35336,
	linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org
In-Reply-To: <20120704034545.GA6196@localhost.localdomain>

Zhao Chenhui wrote:
> On Tue, Jul 03, 2012 at 10:17:12PM -0500, Tabi Timur-B04825 wrote:
>> Zhao Chenhui wrote:
>>> If the guts variable is NULL, it indicates there is error in dts or ker=
nel.
>>> We should fix the error, rather than ignore it.
>>
>> And that's why there's a warning message.  Crashing the kernel is not
>> going to fix anything.
>>
>
> This error likely crashes the kenel somewhere.

Can you test this, please?

The point I'm trying to make is that it's wrong to intentionally halt the=20
kernel unless you're sure that it's the best option.  A missing device=20
tree node is supposed to only disable a given feature, not break everything=
.

--=20
Timur Tabi
Linux kernel developer at Freescale

^ permalink raw reply

* Re: [Qemu-ppc] [RFC PATCH 15/17] KVM: PPC64: bookehv: Add support for interrupt handling
From: Alexander Graf @ 2012-07-04 15:13 UTC (permalink / raw)
  To: Mihai Caraman; +Cc: qemu-ppc, linuxppc-dev, kvm, kvm-ppc
In-Reply-To: <1340627195-11544-16-git-send-email-mihai.caraman@freescale.com>


On 25.06.2012, at 14:26, Mihai Caraman wrote:

> Add bookehv interrupt handling support for 64-bit hosts. Change common =
stack
> layout to refer PPC_LR_STKOFF kernel constant. Dispatch the 64-bit =
execution
> flow to the existing kvm_handler_common asm macro. Update input =
register
> values documentation.
> Only the bolted version of TLB miss exception handlers is supported =
now.
>=20
> Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
> ---
> arch/powerpc/include/asm/kvm_booke_hv_asm.h |   12 +++-
> arch/powerpc/kvm/bookehv_interrupts.S       |  120 =
+++++++++++++++++++++++++--
> 2 files changed, 122 insertions(+), 10 deletions(-)
>=20
> diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h =
b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
> index 30a600f..8be6f87 100644
> --- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h
> +++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
> @@ -1,5 +1,5 @@
> /*
> - * Copyright 2010-2011 Freescale Semiconductor, Inc.
> + * Copyright 2010-2012 Freescale Semiconductor, Inc.
>  *
>  * This program is free software; you can redistribute it and/or =
modify
>  * it under the terms of the GNU General Public License, version 2, as
> @@ -17,6 +17,7 @@
>  * there are no exceptions for which we fall through directly to
>  * the normal host handler.
>  *
> + * 32-bit host
>  * Expected inputs (normal exceptions):
>  *   SCRATCH0 =3D saved r10
>  *   r10 =3D thread struct
> @@ -33,6 +34,15 @@
>  *   *(r8 + GPR9) =3D saved r9
>  *   *(r8 + GPR10) =3D saved r10 (r10 not yet clobbered)
>  *   *(r8 + GPR11) =3D saved r11
> + *
> + * 64-bit host
> + * Expected inputs (exception types GEN/DBG/CRIT/MC):
> + *  r13 =3D PACA_POINTER
> + *  r10 =3D saved CR
> + *  SPRN_SPRG_##type##_SCRATCH =3D saved r13
> + *  *(r13 + PACA_EX##type + EX_R10) =3D saved r10
> + *  *(r13 + PACA_EX##type + EX_R11) =3D saved r11
> + * Only the bolted version of TLB miss exception handlers is =
supported now.
>  */
> .macro DO_KVM intno srr1
> #ifdef CONFIG_KVM_BOOKE_HV
> diff --git a/arch/powerpc/kvm/bookehv_interrupts.S =
b/arch/powerpc/kvm/bookehv_interrupts.S
> index dff8ed4..04097de 100644
> --- a/arch/powerpc/kvm/bookehv_interrupts.S
> +++ b/arch/powerpc/kvm/bookehv_interrupts.S
> @@ -12,10 +12,11 @@
>  * along with this program; if not, write to the Free Software
>  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  =
02110-1301, USA.
>  *
> - * Copyright (C) 2010-2011 Freescale Semiconductor, Inc.
> + * Copyright (C) 2010-2012 Freescale Semiconductor, Inc.
>  *
>  * Author: Varun Sethi <varun.sethi@freescale.com>
>  * Author: Scott Wood <scotwood@freescale.com>
> + * Author: Mihai Caraman <mihai.caraman@freescale.com>
>  *
>  * This file is derived from arch/powerpc/kvm/booke_interrupts.S
>  */
> @@ -30,7 +31,11 @@
> #include <asm/bitsperlong.h>
> #include <asm/thread_info.h>
>=20
> +#ifdef CONFIG_64BIT
> +#include <asm/exception-64e.h>
> +#else
> #include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
> +#endif
>=20
> #define LONGBYTES		(BITS_PER_LONG / 8)
>=20
> @@ -38,20 +43,21 @@
> #define VCPU_GUEST_SPRG(n)	(VCPU_GUEST_SPRGS + (n * LONGBYTES))
>=20
> /* The host stack layout: */
> -#define HOST_R1         (0 * LONGBYTES) /* Implied by stwu. */
> -#define HOST_CALLEE_LR  (1 * LONGBYTES)
> -#define HOST_RUN        (2 * LONGBYTES) /* struct kvm_run */
> +#define HOST_R1         0 /* Implied by stwu. */
> +#define HOST_CALLEE_LR  PPC_LR_STKOFF
> +#define HOST_RUN        (HOST_CALLEE_LR + LONGBYTES)
> /*
>  * r2 is special: it holds 'current', and it made nonvolatile in the
>  * kernel with the -ffixed-r2 gcc option.
>  */
> -#define HOST_R2         (3 * LONGBYTES)
> -#define HOST_CR         (4 * LONGBYTES)
> -#define HOST_NV_GPRS    (5 * LONGBYTES)
> +#define HOST_R2         (HOST_RUN + LONGBYTES)
> +#define HOST_CR         (HOST_R2 + LONGBYTES)
> +#define HOST_NV_GPRS    (HOST_CR + LONGBYTES)
> #define HOST_NV_GPR(n)  (HOST_NV_GPRS + ((n - 14) * LONGBYTES))
> #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + LONGBYTES)
> #define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. =
*/
> -#define HOST_STACK_LR   (HOST_STACK_SIZE + LONGBYTES) /* In caller =
stack frame. */
> +/* LR in caller stack frame. */
> +#define HOST_STACK_LR	(HOST_STACK_SIZE + PPC_LR_STKOFF)
>=20
> #define NEED_EMU		0x00000001 /* emulation -- save nv regs =
*/
> #define NEED_DEAR		0x00000002 /* save faulting DEAR */
> @@ -202,6 +208,102 @@
> 	b	kvmppc_resume_host
> .endm
>=20
> +#ifdef CONFIG_64BIT
> +/*
> + * For input register values, see =
arch/powerpc/include/asm/kvm_booke_hv_asm.h
> + */
> +.macro kvm_handler intno scratch, paca_ex, ex_r10, ex_r11, srr0, =
srr1, flags
> + _GLOBAL(kvmppc_handler_\intno\()_\srr1)

Is this code so vastly different from the 32bit variant that they can't =
be the same with a few simple ifdef's here and there?


Alex

^ permalink raw reply

* Re: [RFC PATCH 02/17] KVM: PPC64: booke: Add EPCR support in mtspr/mfspr emulation
From: Alexander Graf @ 2012-07-04 14:53 UTC (permalink / raw)
  To: Caraman Mihai Claudiu-B02008
  Cc: qemu-ppc@nongnu.org, linuxppc-dev@lists.ozlabs.org,
	kvm@vger.kernel.org, kvm-ppc@vger.kernel.org
In-Reply-To: <300B73AA675FCE4A93EB4FC1D42459FF15A48E@039-SN2MPN1-013.039d.mgd.msft.net>


On 04.07.2012, at 16:14, Caraman Mihai Claudiu-B02008 wrote:

>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-
>> owner@vger.kernel.org] On Behalf Of Alexander Graf
>> Sent: Wednesday, July 04, 2012 4:22 PM
>> To: Caraman Mihai Claudiu-B02008
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; linuxppc-
>> dev@lists.ozlabs.org; qemu-ppc@nongnu.org
>> Subject: Re: [RFC PATCH 02/17] KVM: PPC64: booke: Add EPCR support in
>> mtspr/mfspr emulation
>>=20
>>=20
>> On 25.06.2012, at 14:26, Mihai Caraman wrote:
>>=20
>>> Add EPCR support in booke mtspr/mfspr emulation. EPCR register is
>> defined
>>> only for 64-bit and HV categories, so it shoud be available only on =
64-
>> bit
>>> virtual processors. Undefine the support for 32-bit builds.
>>> Define a reusable setter function for vcpu's EPCR.
>>>=20
>>> Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
>>> ---
>>> arch/powerpc/kvm/booke.c         |   12 +++++++++++-
>>> arch/powerpc/kvm/booke.h         |    6 ++++++
>>> arch/powerpc/kvm/booke_emulate.c |   13 ++++++++++++-
>>> 3 files changed, 29 insertions(+), 2 deletions(-)
>>>=20
>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
>>> index 72f13f4..f9fa260 100644
>>> --- a/arch/powerpc/kvm/booke.c
>>> +++ b/arch/powerpc/kvm/booke.c
>>> @@ -13,7 +13,7 @@
>>> * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  =
02110-1301,
>> USA.
>>> *
>>> * Copyright IBM Corp. 2007
>>> - * Copyright 2010-2011 Freescale Semiconductor, Inc.
>>> + * Copyright 2010-2012 Freescale Semiconductor, Inc.
>>> *
>>> * Authors: Hollis Blanchard <hollisb@us.ibm.com>
>>> *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
>>> @@ -1243,6 +1243,16 @@ void kvmppc_core_commit_memory_region(struct =
kvm
>> *kvm,
>>> {
>>> }
>>>=20
>>> +#ifdef CONFIG_64BIT
>>> +void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr)
>>> +{
>>> +	vcpu->arch.epcr =3D new_epcr;
>>> +	vcpu->arch.shadow_epcr &=3D ~SPRN_EPCR_GICM;
>>> +	if (vcpu->arch.epcr  & SPRN_EPCR_ICM)
>>> +		vcpu->arch.shadow_epcr |=3D SPRN_EPCR_GICM;
>>=20
>> Why would the setter be #ifdef CONFIG_64BIT? EPCR exists on e500mc =
too,
>> no? Please only #ifdef the GICM bits out.
>=20
> kvmppc_set_epcr deals with guest EPCR and EPCR does not exist on a =
virtual e500mc
> as detailed in patch's comment. All callers are also guarded by #ifdef =
CONFIG_64BIT,
> my assumption was that we will not support a virtual core with 64-bit =
category
> on a 32-bit host.

My main concern is that every #ifdef potentially breaks things without =
us knowing. So the less #ifdef's we have, the better off we are. The =
spec only says that we don't _have_ to implement EPCR for non-hv =
non-64bit systems. It doesn't forbid to do so, right?


Alex

^ permalink raw reply

* Re: [Qemu-ppc] [RFC PATCH 12/17] PowerPC: booke64: Add DO_KVM kernel hooks
From: Alexander Graf @ 2012-07-04 14:29 UTC (permalink / raw)
  To: Mihai Caraman
  Cc: qemu-ppc@nongnu.org List, linuxppc-dev, KVM list,
	<kvm-ppc@vger.kernel.org>
In-Reply-To: <1340627195-11544-13-git-send-email-mihai.caraman@freescale.com>


On 25.06.2012, at 14:26, Mihai Caraman wrote:

> Hook DO_KVM macro to 64-bit booke in a optimal way similar to 32-bit =
booke
> see head_fsl_booke.S file. Extend interrupt handlers' parameter list =
with
> interrupt vector numbers to accomodate the macro. Rework Guest =
Doorbell
> handler to use the proper GSRRx save/restore registers.
> Only the bolted version of tlb miss handers is addressed now.
>=20
> Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
> ---
> arch/powerpc/kernel/exceptions-64e.S |  114 =
++++++++++++++++++++++++----------
> arch/powerpc/mm/tlb_low_64e.S        |   14 +++-
> 2 files changed, 92 insertions(+), 36 deletions(-)
>=20
> diff --git a/arch/powerpc/kernel/exceptions-64e.S =
b/arch/powerpc/kernel/exceptions-64e.S
> index 06f7aec..a60f81f 100644
> --- a/arch/powerpc/kernel/exceptions-64e.S
> +++ b/arch/powerpc/kernel/exceptions-64e.S
> @@ -25,6 +25,8 @@
> #include <asm/ppc-opcode.h>
> #include <asm/mmu.h>
> #include <asm/hw_irq.h>
> +#include <asm/kvm_asm.h>
> +#include <asm/kvm_booke_hv_asm.h>
>=20
> /* XXX This will ultimately add space for a special exception save
>  *     structure used to save things like SRR0/SRR1, SPRGs, MAS, =
etc...
> @@ -34,13 +36,24 @@
>  */
> #define	SPECIAL_EXC_FRAME_SIZE	INT_FRAME_SIZE
>=20
> +#ifdef CONFIG_KVM_BOOKE_HV
> +#define KVM_BOOKE_HV_MFSPR(reg, spr)				\
> +	BEGIN_FTR_SECTION					\
> +		mfspr	reg, spr;			  	\
> +	END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
> +#else
> +#define KVM_BOOKE_HV_MFSPR(reg, spr)
> +#endif

Bleks - this is ugly. Do we really need to open-code the #ifdef here? =
Can't the feature section code determine that the feature is disabled =
and just always not include the code?

> +
> /* Exception prolog code for all exceptions */
> -#define EXCEPTION_PROLOG(n, type, srr0, srr1, addition)		 =
    	    \
> +#define EXCEPTION_PROLOG(n, intnum, type, srr0, srr1, addition)		=
    \
> 	mtspr	SPRN_SPRG_##type##_SCRATCH,r13;	/* get spare registers =
*/   \
> 	mfspr	r13,SPRN_SPRG_PACA;	/* get PACA */			 =
   \
> 	std	r10,PACA_EX##type+EX_R10(r13);				 =
   \
> 	std	r11,PACA_EX##type+EX_R11(r13);				 =
   \
> 	mfcr	r10;			/* save CR */			 =
   \
> +	KVM_BOOKE_HV_MFSPR(r11,srr1);			    		 =
   \
> +	DO_KVM	intnum,srr1;				    		 =
   \

So if DO_KVM already knows srr1, why explicitly do something with it the =
line above, and not in DO_KVM itself?

> 	addition;			/* additional code for that exc. =
*/ \
> 	std	r1,PACA_EX##type+EX_R1(r13); /* save old r1 in the PACA =
*/  \
> 	stw	r10,PACA_EX##type+EX_CR(r13); /* save old CR in the PACA =
*/ \
> @@ -69,17 +82,21 @@
> 	ld	r1,PACA_MC_STACK(r13);					 =
   \
> 	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE;
>=20
> -#define NORMAL_EXCEPTION_PROLOG(n, addition)				 =
   \
> -	EXCEPTION_PROLOG(n, GEN, SPRN_SRR0, SPRN_SRR1, =
addition##_GEN(n))
> +#define NORMAL_EXCEPTION_PROLOG(n, intnum, addition)			 =
   \
> +	EXCEPTION_PROLOG(n, intnum, GEN, SPRN_SRR0, SPRN_SRR1,		 =
   \

We would we want to pass in 2 numbers? Let's please confine this onto a =
single ID per interrupt vector. Either we use the hardcoded ones =
available here in the KVM code or we use the KVM ones instead of the =
hardcoded ones here. But not both please. Just because it's like that on =
32bit doesn't count as an excuse :).

> +					 addition##_GEN(n))
>=20
> -#define CRIT_EXCEPTION_PROLOG(n, addition)				 =
   \
> -	EXCEPTION_PROLOG(n, CRIT, SPRN_CSRR0, SPRN_CSRR1, =
addition##_CRIT(n))
> +#define CRIT_EXCEPTION_PROLOG(n, intnum, addition)			 =
   \
> +	EXCEPTION_PROLOG(n, intnum, CRIT, SPRN_CSRR0, SPRN_CSRR1, 	 =
   \
> +					 addition##_CRIT(n))
>=20
> -#define DBG_EXCEPTION_PROLOG(n, addition)				 =
   \
> -	EXCEPTION_PROLOG(n, DBG, SPRN_DSRR0, SPRN_DSRR1, =
addition##_DBG(n))
> +#define DBG_EXCEPTION_PROLOG(n, intnum, addition)			 =
   \
> +	EXCEPTION_PROLOG(n, intnum, DBG, SPRN_DSRR0, SPRN_DSRR1, 	 =
   \
> +					 addition##_DBG(n))
>=20
> -#define MC_EXCEPTION_PROLOG(n, addition)				 =
   \
> -	EXCEPTION_PROLOG(n, MC, SPRN_MCSRR0, SPRN_MCSRR1, =
addition##_MC(n))
> +#define MC_EXCEPTION_PROLOG(n, intnum, addition)			 =
   \
> +	EXCEPTION_PROLOG(n, intnum, MC, SPRN_MCSRR0, SPRN_MCSRR1, 	 =
   \
> +					 addition##_MC(n))
>=20
>=20
> /* Variants of the "addition" argument for the prolog
> @@ -226,9 +243,9 @@ exc_##n##_bad_stack:					=
		    \
> 1:
>=20
>=20
> -#define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack)			=
\
> +#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack)		=
\
> 	START_EXCEPTION(label);						=
\
> -	NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE)	=
\
> +	NORMAL_EXCEPTION_PROLOG(trapnum, intnum, =
PROLOG_ADDITION_MASKABLE)\
> 	EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE)		=
\
> 	ack(r8);							=
\
> 	CHECK_NAPPING();						=
\
> @@ -279,7 +296,8 @@ interrupt_end_book3e:
>=20
> /* Critical Input Interrupt */
> 	START_EXCEPTION(critical_input);
> -	CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE)
> +	CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL,
> +			      PROLOG_ADDITION_NONE)
> //	EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE)
> //	bl	special_reg_save_crit
> //	CHECK_NAPPING();
> @@ -290,7 +308,8 @@ interrupt_end_book3e:
>=20
> /* Machine Check Interrupt */
> 	START_EXCEPTION(machine_check);
> -	MC_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE)
> +	MC_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_MACHINE_CHECK,
> +			    PROLOG_ADDITION_NONE)
> //	EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE)
> //	bl	special_reg_save_mc
> //	addi	r3,r1,STACK_FRAME_OVERHEAD
> @@ -301,7 +320,8 @@ interrupt_end_book3e:
>=20
> /* Data Storage Interrupt */
> 	START_EXCEPTION(data_storage)
> -	NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS)
> +	NORMAL_EXCEPTION_PROLOG(0x300, BOOKE_INTERRUPT_DATA_STORAGE,
> +				PROLOG_ADDITION_2REGS)
> 	mfspr	r14,SPRN_DEAR
> 	mfspr	r15,SPRN_ESR
> 	EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_DISABLE)
> @@ -309,18 +329,21 @@ interrupt_end_book3e:
>=20
> /* Instruction Storage Interrupt */
> 	START_EXCEPTION(instruction_storage);
> -	NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS)
> +	NORMAL_EXCEPTION_PROLOG(0x400, BOOKE_INTERRUPT_INST_STORAGE,
> +				PROLOG_ADDITION_2REGS)
> 	li	r15,0
> 	mr	r14,r10
> 	EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_DISABLE)
> 	b	storage_fault_common
>=20
> /* External Input Interrupt */
> -	MASKABLE_EXCEPTION(0x500, external_input, .do_IRQ, ACK_NONE)
> +	MASKABLE_EXCEPTION(0x500, BOOKE_INTERRUPT_EXTERNAL,
> +			   external_input, .do_IRQ, ACK_NONE)
>=20
> /* Alignment */
> 	START_EXCEPTION(alignment);
> -	NORMAL_EXCEPTION_PROLOG(0x600, PROLOG_ADDITION_2REGS)
> +	NORMAL_EXCEPTION_PROLOG(0x600, BOOKE_INTERRUPT_ALIGNMENT,
> +				PROLOG_ADDITION_2REGS)
> 	mfspr	r14,SPRN_DEAR
> 	mfspr	r15,SPRN_ESR
> 	EXCEPTION_COMMON(0x600, PACA_EXGEN, INTS_KEEP)
> @@ -328,7 +351,8 @@ interrupt_end_book3e:
>=20
> /* Program Interrupt */
> 	START_EXCEPTION(program);
> -	NORMAL_EXCEPTION_PROLOG(0x700, PROLOG_ADDITION_1REG)
> +	NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
> +				PROLOG_ADDITION_1REG)
> 	mfspr	r14,SPRN_ESR
> 	EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE)
> 	std	r14,_DSISR(r1)
> @@ -340,7 +364,8 @@ interrupt_end_book3e:
>=20
> /* Floating Point Unavailable Interrupt */
> 	START_EXCEPTION(fp_unavailable);
> -	NORMAL_EXCEPTION_PROLOG(0x800, PROLOG_ADDITION_NONE)
> +	NORMAL_EXCEPTION_PROLOG(0x800, BOOKE_INTERRUPT_FP_UNAVAIL,
> +				PROLOG_ADDITION_NONE)
> 	/* we can probably do a shorter exception entry for that one... =
*/
> 	EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP)
> 	ld	r12,_MSR(r1)
> @@ -355,14 +380,17 @@ interrupt_end_book3e:
> 	b	.ret_from_except
>=20
> /* Decrementer Interrupt */
> -	MASKABLE_EXCEPTION(0x900, decrementer, .timer_interrupt, =
ACK_DEC)
> +	MASKABLE_EXCEPTION(0x900, BOOKE_INTERRUPT_DECREMENTER,
> +			   decrementer, .timer_interrupt, ACK_DEC)
>=20
> /* Fixed Interval Timer Interrupt */
> -	MASKABLE_EXCEPTION(0x980, fixed_interval, .unknown_exception, =
ACK_FIT)
> +	MASKABLE_EXCEPTION(0x980, BOOKE_INTERRUPT_FIT,
> +			   fixed_interval, .unknown_exception, ACK_FIT)
>=20
> /* Watchdog Timer Interrupt */
> 	START_EXCEPTION(watchdog);
> -	CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE)
> +	CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG,
> +			      PROLOG_ADDITION_NONE)
> //	EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE)
> //	bl	special_reg_save_crit
> //	CHECK_NAPPING();
> @@ -381,7 +409,8 @@ interrupt_end_book3e:
>=20
> /* Auxiliary Processor Unavailable Interrupt */
> 	START_EXCEPTION(ap_unavailable);
> -	NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE)
> +	NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL,
> +				PROLOG_ADDITION_NONE)
> 	EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_DISABLE)
> 	bl	.save_nvgprs
> 	addi	r3,r1,STACK_FRAME_OVERHEAD
> @@ -390,7 +419,8 @@ interrupt_end_book3e:
>=20
> /* Debug exception as a critical interrupt*/
> 	START_EXCEPTION(debug_crit);
> -	CRIT_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS)
> +	CRIT_EXCEPTION_PROLOG(0xd00, BOOKE_INTERRUPT_DEBUG,
> +			      PROLOG_ADDITION_2REGS)
>=20
> 	/*
> 	 * If there is a single step or branch-taken exception in an
> @@ -455,7 +485,8 @@ kernel_dbg_exc:
>=20
> /* Debug exception as a debug interrupt*/
> 	START_EXCEPTION(debug_debug);
> -	DBG_EXCEPTION_PROLOG(0xd08, PROLOG_ADDITION_2REGS)
> +	DBG_EXCEPTION_PROLOG(0xd00, BOOKE_INTERRUPT_DEBUG,
> +						 PROLOG_ADDITION_2REGS)
>=20
> 	/*
> 	 * If there is a single step or branch-taken exception in an
> @@ -516,18 +547,21 @@ kernel_dbg_exc:
> 	b	.ret_from_except
>=20
> 	START_EXCEPTION(perfmon);
> -	NORMAL_EXCEPTION_PROLOG(0x260, PROLOG_ADDITION_NONE)
> +	NORMAL_EXCEPTION_PROLOG(0x260, =
BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
> +				PROLOG_ADDITION_NONE)
> 	EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE)
> 	addi	r3,r1,STACK_FRAME_OVERHEAD
> 	bl	.performance_monitor_exception
> 	b	.ret_from_except_lite
>=20
> /* Doorbell interrupt */
> -	MASKABLE_EXCEPTION(0x280, doorbell, .doorbell_exception, =
ACK_NONE)
> +	MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL,
> +			   doorbell, .doorbell_exception, ACK_NONE)
>=20
> /* Doorbell critical Interrupt */
> 	START_EXCEPTION(doorbell_crit);
> -	CRIT_EXCEPTION_PROLOG(0x2a0, PROLOG_ADDITION_NONE)
> +	CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL,
> +			      PROLOG_ADDITION_NONE)
> //	EXCEPTION_COMMON(0x2a0, PACA_EXCRIT, INTS_DISABLE)
> //	bl	special_reg_save_crit
> //	CHECK_NAPPING();
> @@ -536,12 +570,24 @@ kernel_dbg_exc:
> //	b	ret_from_crit_except
> 	b	.
>=20
> -/* Guest Doorbell */
> -	MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, =
ACK_NONE)
> +/*
> + *	Guest doorbell interrupt
> + *	This general exception use GSRRx save/restore registers
> + */
> +	START_EXCEPTION(guest_doorbell);
> +	EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL, GEN,
> +			 SPRN_GSRR0, SPRN_GSRR1, PROLOG_ADDITION_NONE)
> +	EXCEPTION_COMMON(0x2c0, PACA_EXGEN, INTS_KEEP)
> +	addi	r3,r1,STACK_FRAME_OVERHEAD
> +	bl	.save_nvgprs
> +	INTS_RESTORE_HARD
> +	bl	.unknown_exception
> +	b	.ret_from_except

This is independent of DO_KVM, right?

>=20
> /* Guest Doorbell critical Interrupt */
> 	START_EXCEPTION(guest_doorbell_crit);
> -	CRIT_EXCEPTION_PROLOG(0x2e0, PROLOG_ADDITION_NONE)
> +	CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT,
> +			      PROLOG_ADDITION_NONE)

Shouldn't this one also use GSRR?

> //	EXCEPTION_COMMON(0x2e0, PACA_EXCRIT, INTS_DISABLE)
> //	bl	special_reg_save_crit
> //	CHECK_NAPPING();
> @@ -552,7 +598,8 @@ kernel_dbg_exc:
>=20
> /* Hypervisor call */
> 	START_EXCEPTION(hypercall);
> -	NORMAL_EXCEPTION_PROLOG(0x310, PROLOG_ADDITION_NONE)
> +	NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL,
> +			        PROLOG_ADDITION_NONE)
> 	EXCEPTION_COMMON(0x310, PACA_EXGEN, INTS_KEEP)
> 	addi	r3,r1,STACK_FRAME_OVERHEAD
> 	bl	.save_nvgprs
> @@ -562,7 +609,8 @@ kernel_dbg_exc:
>=20
> /* Embedded Hypervisor priviledged  */
> 	START_EXCEPTION(ehpriv);
> -	NORMAL_EXCEPTION_PROLOG(0x320, PROLOG_ADDITION_NONE)
> +	NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV,
> +			        PROLOG_ADDITION_NONE)
> 	EXCEPTION_COMMON(0x320, PACA_EXGEN, INTS_KEEP)
> 	addi	r3,r1,STACK_FRAME_OVERHEAD
> 	bl	.save_nvgprs
> diff --git a/arch/powerpc/mm/tlb_low_64e.S =
b/arch/powerpc/mm/tlb_low_64e.S
> index ff672bd..88feaaa 100644
> --- a/arch/powerpc/mm/tlb_low_64e.S
> +++ b/arch/powerpc/mm/tlb_low_64e.S
> @@ -20,6 +20,8 @@
> #include <asm/pgtable.h>
> #include <asm/exception-64e.h>
> #include <asm/ppc-opcode.h>
> +#include <asm/kvm_asm.h>
> +#include <asm/kvm_booke_hv_asm.h>
>=20
> #ifdef CONFIG_PPC_64K_PAGES
> #define VPTE_PMD_SHIFT	(PTE_INDEX_SIZE+1)
> @@ -37,12 +39,18 @@
>  *                                                                    =
*
>  =
**********************************************************************/
>=20
> -.macro tlb_prolog_bolted addr
> +.macro tlb_prolog_bolted intnum addr
> 	mtspr	SPRN_SPRG_TLB_SCRATCH,r13
> 	mfspr	r13,SPRN_SPRG_PACA
> 	std	r10,PACA_EXTLB+EX_TLB_R10(r13)
> 	mfcr	r10
> 	std	r11,PACA_EXTLB+EX_TLB_R11(r13)
> +#ifdef CONFIG_KVM_BOOKE_HV
> +BEGIN_FTR_SECTION
> +	mfspr	r11, SPRN_SRR1
> +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
> +#endif

This thing really should vanish behind DO_KVM :)

Alex

> +	DO_KVM	\intnum, SPRN_SRR1
> 	std	r16,PACA_EXTLB+EX_TLB_R16(r13)
> 	mfspr	r16,\addr		/* get faulting address */
> 	std	r14,PACA_EXTLB+EX_TLB_R14(r13)
> @@ -66,7 +74,7 @@
>=20
> /* Data TLB miss */
> 	START_EXCEPTION(data_tlb_miss_bolted)
> -	tlb_prolog_bolted SPRN_DEAR
> +	tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR
>=20
> 	/* We need _PAGE_PRESENT and  _PAGE_ACCESSED set */
>=20
> @@ -214,7 +222,7 @@ itlb_miss_fault_bolted:
>=20
> /* Instruction TLB miss */
> 	START_EXCEPTION(instruction_tlb_miss_bolted)
> -	tlb_prolog_bolted SPRN_SRR0
> +	tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0
>=20
> 	rldicl.	r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
> 	srdi	r15,r16,60		/* get region */
> --=20
> 1.7.4.1
>=20
>=20
>=20

^ permalink raw reply

* Re: [Qemu-ppc] [RFC PATCH 09/17] KVM: PPC64: booke: Hard disable interrupts when entering guest
From: Alexander Graf @ 2012-07-04 14:14 UTC (permalink / raw)
  To: Mihai Caraman
  Cc: qemu-ppc@nongnu.org List, linuxppc-dev, KVM list,
	<kvm-ppc@vger.kernel.org>
In-Reply-To: <1340627195-11544-10-git-send-email-mihai.caraman@freescale.com>


On 25.06.2012, at 14:26, Mihai Caraman wrote:

> 64-bit host runs with lazy interrupt disabling, so local_irq_disable() =
does
> not disable interrupts right away and does not protect against =
preemption
> required by __kvmppc_vcpu_run(). Define a macro for 64-bit to use
> hard_irq_disable().
>=20
> Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
> ---
> arch/powerpc/kvm/booke.c |   14 ++++++++++----
> 1 files changed, 10 insertions(+), 4 deletions(-)
>=20
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index 93b48e0..db05692 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -45,6 +45,12 @@ unsigned long kvmppc_booke_handlers;
> #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
> #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
>=20
> +#ifdef CONFIG_64BIT
> +#define _hard_irq_disable() hard_irq_disable()
> +#else
> +#define _hard_irq_disable() local_irq_disable()
> +#endif

So you only swap out the disable bit, but not the enable one? Ben, would =
this work out?


Alex

> +
> struct kvm_stats_debugfs_item debugfs_entries[] =3D {
> 	{ "mmio",       VCPU_STAT(mmio_exits) },
> 	{ "dcr",        VCPU_STAT(dcr_exits) },
> @@ -456,7 +462,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu =
*vcpu)
> 		local_irq_enable();
> 		kvm_vcpu_block(vcpu);
> 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
> -		local_irq_disable();
> +		_hard_irq_disable();
>=20
> 		kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
> 		r =3D 1;
> @@ -480,7 +486,7 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu =
*vcpu)
> 		if (need_resched()) {
> 			local_irq_enable();
> 			cond_resched();
> -			local_irq_disable();
> +			_hard_irq_disable();
> 			continue;
> 		}
>=20
> @@ -515,7 +521,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, =
struct kvm_vcpu *vcpu)
> 		return -EINVAL;
> 	}
>=20
> -	local_irq_disable();
> +	_hard_irq_disable();
> 	if (kvmppc_prepare_to_enter(vcpu)) {
> 		kvm_run->exit_reason =3D KVM_EXIT_INTR;
> 		ret =3D -EINTR;
> @@ -955,7 +961,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct =
kvm_vcpu *vcpu,
> 	 * aren't already exiting to userspace for some other reason.
> 	 */
> 	if (!(r & RESUME_HOST)) {
> -		local_irq_disable();
> +		_hard_irq_disable();
> 		if (kvmppc_prepare_to_enter(vcpu)) {
> 			run->exit_reason =3D KVM_EXIT_INTR;
> 			r =3D (-EINTR << 2) | RESUME_HOST | (r & =
RESUME_FLAG_NV);
> --=20
> 1.7.4.1
>=20
>=20
>=20

^ permalink raw reply

* RE: [RFC PATCH 02/17] KVM: PPC64: booke: Add EPCR support in mtspr/mfspr emulation
From: Caraman Mihai Claudiu-B02008 @ 2012-07-04 14:14 UTC (permalink / raw)
  To: Alexander Graf
  Cc: qemu-ppc@nongnu.org, linuxppc-dev@lists.ozlabs.org,
	kvm@vger.kernel.org, kvm-ppc@vger.kernel.org
In-Reply-To: <868ABA98-C55B-435B-BC09-01DC4067A3BE@suse.de>

> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-
> owner@vger.kernel.org] On Behalf Of Alexander Graf
> Sent: Wednesday, July 04, 2012 4:22 PM
> To: Caraman Mihai Claudiu-B02008
> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; linuxppc-
> dev@lists.ozlabs.org; qemu-ppc@nongnu.org
> Subject: Re: [RFC PATCH 02/17] KVM: PPC64: booke: Add EPCR support in
> mtspr/mfspr emulation
>=20
>=20
> On 25.06.2012, at 14:26, Mihai Caraman wrote:
>=20
> > Add EPCR support in booke mtspr/mfspr emulation. EPCR register is
> defined
> > only for 64-bit and HV categories, so it shoud be available only on 64-
> bit
> > virtual processors. Undefine the support for 32-bit builds.
> > Define a reusable setter function for vcpu's EPCR.
> >
> > Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
> > ---
> > arch/powerpc/kvm/booke.c         |   12 +++++++++++-
> > arch/powerpc/kvm/booke.h         |    6 ++++++
> > arch/powerpc/kvm/booke_emulate.c |   13 ++++++++++++-
> > 3 files changed, 29 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> > index 72f13f4..f9fa260 100644
> > --- a/arch/powerpc/kvm/booke.c
> > +++ b/arch/powerpc/kvm/booke.c
> > @@ -13,7 +13,7 @@
> >  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
> USA.
> >  *
> >  * Copyright IBM Corp. 2007
> > - * Copyright 2010-2011 Freescale Semiconductor, Inc.
> > + * Copyright 2010-2012 Freescale Semiconductor, Inc.
> >  *
> >  * Authors: Hollis Blanchard <hollisb@us.ibm.com>
> >  *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
> > @@ -1243,6 +1243,16 @@ void kvmppc_core_commit_memory_region(struct kvm
> *kvm,
> > {
> > }
> >
> > +#ifdef CONFIG_64BIT
> > +void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr)
> > +{
> > +	vcpu->arch.epcr =3D new_epcr;
> > +	vcpu->arch.shadow_epcr &=3D ~SPRN_EPCR_GICM;
> > +	if (vcpu->arch.epcr  & SPRN_EPCR_ICM)
> > +		vcpu->arch.shadow_epcr |=3D SPRN_EPCR_GICM;
>=20
> Why would the setter be #ifdef CONFIG_64BIT? EPCR exists on e500mc too,
> no? Please only #ifdef the GICM bits out.

kvmppc_set_epcr deals with guest EPCR and EPCR does not exist on a virtual =
e500mc
as detailed in patch's comment. All callers are also guarded by #ifdef CONF=
IG_64BIT,
my assumption was that we will not support a virtual core with 64-bit categ=
ory
on a 32-bit host.

>=20
>=20
> Alex
>=20
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [Qemu-ppc] [RFC PATCH 07/17] KVM: PPC: e500: Mask ea's high 32-bits in 32/64 instr emulation
From: Alexander Graf @ 2012-07-04 14:05 UTC (permalink / raw)
  To: Mihai Caraman; +Cc: linuxppc-dev, qemu-ppc, kvm-ppc, kvm
In-Reply-To: <78A34E01-4FF9-46F2-8D12-BD27AA44BC37@suse.de>


On 04.07.2012, at 16:00, Alexander Graf wrote:

>=20
> On 25.06.2012, at 14:26, Mihai Caraman wrote:
>=20
>> Mask high 32 bits of effective address in emulation layer, for guests =
running
>> in 32-bit mode.
>> MAS2's high-order 32 bits represents the upper 32 bits of the =
effective address
>> of the page. Mask it too for tlbwe instruction emulation.
>=20
> Ah, there is the tlbwe masking :). Please split this into 2 patches.
>=20
>>=20
>> Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
>> ---
>> arch/powerpc/kvm/e500_emulate.c |    5 ++++-
>> arch/powerpc/kvm/e500_tlb.c     |    2 ++
>> 2 files changed, 6 insertions(+), 1 deletions(-)
>>=20
>> diff --git a/arch/powerpc/kvm/e500_emulate.c =
b/arch/powerpc/kvm/e500_emulate.c
>> index 81288f7..94305db 100644
>> --- a/arch/powerpc/kvm/e500_emulate.c
>> +++ b/arch/powerpc/kvm/e500_emulate.c
>> @@ -1,5 +1,5 @@
>> /*
>> - * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights =
reserved.
>> + * Copyright (C) 2008-2012 Freescale Semiconductor, Inc. All rights =
reserved.
>> *
>> * Author: Yu Liu, <yu.liu@freescale.com>
>> *
>> @@ -90,6 +90,9 @@ static inline ulong kvmppc_get_ea_indexed(struct =
kvm_vcpu *vcpu, int ra, int rb)
>> 	if (ra)
>> 		ea +=3D kvmppc_get_gpr(vcpu, ra);
>>=20
>> +	if (!(vcpu->arch.shared->msr & MSR_CM))
>> +		ea &=3D 0xffffffffUL;
>=20
> Since this will be in generic code, please guard it with an #ifdef =
CONFIG_BOOKE.

Oh and do the same check for MSR_SF on Book3s :). Maybe something like

ulong msr_64bit =3D 0;

#if defined(CONFIG_PPC_BOOK3E_64)
msr_64bit =3D MSR_CM;
#elif defined(CONFIG_PPC_BOOK3S_64)
msr_64bit =3D MSR_SF;
#endif

if (!(vcpu->arch.shared->msr & msr_64bit))
    ea =3D (uint32_t)ea;


Alex

^ permalink raw reply

* Re: [Qemu-ppc] [RFC PATCH 07/17] KVM: PPC: e500: Mask ea's high 32-bits in 32/64 instr emulation
From: Alexander Graf @ 2012-07-04 14:00 UTC (permalink / raw)
  To: Mihai Caraman; +Cc: qemu-ppc, linuxppc-dev, kvm, kvm-ppc
In-Reply-To: <1340627195-11544-8-git-send-email-mihai.caraman@freescale.com>


On 25.06.2012, at 14:26, Mihai Caraman wrote:

> Mask high 32 bits of effective address in emulation layer, for guests =
running
> in 32-bit mode.
> MAS2's high-order 32 bits represents the upper 32 bits of the =
effective address
> of the page. Mask it too for tlbwe instruction emulation.

Ah, there is the tlbwe masking :). Please split this into 2 patches.

>=20
> Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
> ---
> arch/powerpc/kvm/e500_emulate.c |    5 ++++-
> arch/powerpc/kvm/e500_tlb.c     |    2 ++
> 2 files changed, 6 insertions(+), 1 deletions(-)
>=20
> diff --git a/arch/powerpc/kvm/e500_emulate.c =
b/arch/powerpc/kvm/e500_emulate.c
> index 81288f7..94305db 100644
> --- a/arch/powerpc/kvm/e500_emulate.c
> +++ b/arch/powerpc/kvm/e500_emulate.c
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights =
reserved.
> + * Copyright (C) 2008-2012 Freescale Semiconductor, Inc. All rights =
reserved.
>  *
>  * Author: Yu Liu, <yu.liu@freescale.com>
>  *
> @@ -90,6 +90,9 @@ static inline ulong kvmppc_get_ea_indexed(struct =
kvm_vcpu *vcpu, int ra, int rb)
> 	if (ra)
> 		ea +=3D kvmppc_get_gpr(vcpu, ra);
>=20
> +	if (!(vcpu->arch.shared->msr & MSR_CM))
> +		ea &=3D 0xffffffffUL;

Since this will be in generic code, please guard it with an #ifdef =
CONFIG_BOOKE.


Alex

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox