LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 2/4] KVM: PPC: Rename current DAWR macros and variables
From: Ravi Bangoria @ 2020-11-24 10:59 UTC (permalink / raw)
  To: mpe, paulus
  Cc: christophe.leroy, ravi.bangoria, mikey, kvm, leobras.c, jniethe5,
	linux-kernel, npiggin, kvm-ppc, pbonzini, linuxppc-dev
In-Reply-To: <20201124105953.39325-1-ravi.bangoria@linux.ibm.com>

Power10 is introducing second DAWR. Use real register names (with
suffix 0) from ISA for current macros and variables used by kvm.
One exception is KVM_REG_PPC_DAWR. Keep it as it is because it's
uapi so changing it will break userspace.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
 arch/powerpc/include/asm/kvm_host.h     |  4 ++--
 arch/powerpc/kernel/asm-offsets.c       |  4 ++--
 arch/powerpc/kvm/book3s_hv.c            | 24 ++++++++++++------------
 arch/powerpc/kvm/book3s_hv_nested.c     |  8 ++++----
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 20 ++++++++++----------
 5 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d67a470e95a3..62cadf1a596e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -584,8 +584,8 @@ struct kvm_vcpu_arch {
 	u32 ctrl;
 	u32 dabrx;
 	ulong dabr;
-	ulong dawr;
-	ulong dawrx;
+	ulong dawr0;
+	ulong dawrx0;
 	ulong ciabr;
 	ulong cfar;
 	ulong ppr;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8711c2164b45..e4256f5b4602 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -547,8 +547,8 @@ int main(void)
 	OFFSET(VCPU_CTRL, kvm_vcpu, arch.ctrl);
 	OFFSET(VCPU_DABR, kvm_vcpu, arch.dabr);
 	OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx);
-	OFFSET(VCPU_DAWR, kvm_vcpu, arch.dawr);
-	OFFSET(VCPU_DAWRX, kvm_vcpu, arch.dawrx);
+	OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0);
+	OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0);
 	OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
 	OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
 	OFFSET(VCPU_DEC, kvm_vcpu, arch.dec);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 490a0f6a7285..d5c6efc8a76e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -782,8 +782,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
 			return H_UNSUPPORTED_FLAG_START;
 		if (value2 & DABRX_HYP)
 			return H_P4;
-		vcpu->arch.dawr  = value1;
-		vcpu->arch.dawrx = value2;
+		vcpu->arch.dawr0  = value1;
+		vcpu->arch.dawrx0 = value2;
 		return H_SUCCESS;
 	case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
 		/* KVM does not support mflags=2 (AIL=2) */
@@ -1747,10 +1747,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		*val = get_reg_val(id, vcpu->arch.vcore->vtb);
 		break;
 	case KVM_REG_PPC_DAWR:
-		*val = get_reg_val(id, vcpu->arch.dawr);
+		*val = get_reg_val(id, vcpu->arch.dawr0);
 		break;
 	case KVM_REG_PPC_DAWRX:
-		*val = get_reg_val(id, vcpu->arch.dawrx);
+		*val = get_reg_val(id, vcpu->arch.dawrx0);
 		break;
 	case KVM_REG_PPC_CIABR:
 		*val = get_reg_val(id, vcpu->arch.ciabr);
@@ -1979,10 +1979,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		vcpu->arch.vcore->vtb = set_reg_val(id, *val);
 		break;
 	case KVM_REG_PPC_DAWR:
-		vcpu->arch.dawr = set_reg_val(id, *val);
+		vcpu->arch.dawr0 = set_reg_val(id, *val);
 		break;
 	case KVM_REG_PPC_DAWRX:
-		vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP;
+		vcpu->arch.dawrx0 = set_reg_val(id, *val) & ~DAWRX_HYP;
 		break;
 	case KVM_REG_PPC_CIABR:
 		vcpu->arch.ciabr = set_reg_val(id, *val);
@@ -3437,8 +3437,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	int trap;
 	unsigned long host_hfscr = mfspr(SPRN_HFSCR);
 	unsigned long host_ciabr = mfspr(SPRN_CIABR);
-	unsigned long host_dawr = mfspr(SPRN_DAWR0);
-	unsigned long host_dawrx = mfspr(SPRN_DAWRX0);
+	unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
+	unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
 	unsigned long host_psscr = mfspr(SPRN_PSSCR);
 	unsigned long host_pidr = mfspr(SPRN_PID);
 
@@ -3477,8 +3477,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	mtspr(SPRN_SPURR, vcpu->arch.spurr);
 
 	if (dawr_enabled()) {
-		mtspr(SPRN_DAWR0, vcpu->arch.dawr);
-		mtspr(SPRN_DAWRX0, vcpu->arch.dawrx);
+		mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
+		mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
 	}
 	mtspr(SPRN_CIABR, vcpu->arch.ciabr);
 	mtspr(SPRN_IC, vcpu->arch.ic);
@@ -3530,8 +3530,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	      (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
 	mtspr(SPRN_HFSCR, host_hfscr);
 	mtspr(SPRN_CIABR, host_ciabr);
-	mtspr(SPRN_DAWR0, host_dawr);
-	mtspr(SPRN_DAWRX0, host_dawrx);
+	mtspr(SPRN_DAWR0, host_dawr0);
+	mtspr(SPRN_DAWRX0, host_dawrx0);
 	mtspr(SPRN_PID, host_pidr);
 
 	/*
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 2b433c3bacea..1d4b335485d0 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -33,8 +33,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 	hr->dpdes = vc->dpdes;
 	hr->hfscr = vcpu->arch.hfscr;
 	hr->tb_offset = vc->tb_offset;
-	hr->dawr0 = vcpu->arch.dawr;
-	hr->dawrx0 = vcpu->arch.dawrx;
+	hr->dawr0 = vcpu->arch.dawr0;
+	hr->dawrx0 = vcpu->arch.dawrx0;
 	hr->ciabr = vcpu->arch.ciabr;
 	hr->purr = vcpu->arch.purr;
 	hr->spurr = vcpu->arch.spurr;
@@ -151,8 +151,8 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 	vc->pcr = hr->pcr | PCR_MASK;
 	vc->dpdes = hr->dpdes;
 	vcpu->arch.hfscr = hr->hfscr;
-	vcpu->arch.dawr = hr->dawr0;
-	vcpu->arch.dawrx = hr->dawrx0;
+	vcpu->arch.dawr0 = hr->dawr0;
+	vcpu->arch.dawrx0 = hr->dawrx0;
 	vcpu->arch.ciabr = hr->ciabr;
 	vcpu->arch.purr = hr->purr;
 	vcpu->arch.spurr = hr->spurr;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 799d6d0f4ead..8e3fb393a980 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -52,8 +52,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 #define STACK_SLOT_PID		(SFS-32)
 #define STACK_SLOT_IAMR		(SFS-40)
 #define STACK_SLOT_CIABR	(SFS-48)
-#define STACK_SLOT_DAWR		(SFS-56)
-#define STACK_SLOT_DAWRX	(SFS-64)
+#define STACK_SLOT_DAWR0	(SFS-56)
+#define STACK_SLOT_DAWRX0	(SFS-64)
 #define STACK_SLOT_HFSCR	(SFS-72)
 #define STACK_SLOT_AMR		(SFS-80)
 #define STACK_SLOT_UAMOR	(SFS-88)
@@ -711,8 +711,8 @@ BEGIN_FTR_SECTION
 	mfspr	r7, SPRN_DAWRX0
 	mfspr	r8, SPRN_IAMR
 	std	r5, STACK_SLOT_CIABR(r1)
-	std	r6, STACK_SLOT_DAWR(r1)
-	std	r7, STACK_SLOT_DAWRX(r1)
+	std	r6, STACK_SLOT_DAWR0(r1)
+	std	r7, STACK_SLOT_DAWRX0(r1)
 	std	r8, STACK_SLOT_IAMR(r1)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
@@ -801,8 +801,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	lbz	r5, 0(r5)
 	cmpdi	r5, 0
 	beq	1f
-	ld	r5, VCPU_DAWR(r4)
-	ld	r6, VCPU_DAWRX(r4)
+	ld	r5, VCPU_DAWR0(r4)
+	ld	r6, VCPU_DAWRX0(r4)
 	mtspr	SPRN_DAWR0, r5
 	mtspr	SPRN_DAWRX0, r6
 1:
@@ -1759,8 +1759,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/* Restore host values of some registers */
 BEGIN_FTR_SECTION
 	ld	r5, STACK_SLOT_CIABR(r1)
-	ld	r6, STACK_SLOT_DAWR(r1)
-	ld	r7, STACK_SLOT_DAWRX(r1)
+	ld	r6, STACK_SLOT_DAWR0(r1)
+	ld	r7, STACK_SLOT_DAWRX0(r1)
 	mtspr	SPRN_CIABR, r5
 	/*
 	 * If the DAWR doesn't work, it's ok to write these here as
@@ -2566,8 +2566,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	rlwimi	r5, r4, 5, DAWRX_DR | DAWRX_DW
 	rlwimi	r5, r4, 2, DAWRX_WT
 	clrrdi	r4, r4, 3
-	std	r4, VCPU_DAWR(r3)
-	std	r5, VCPU_DAWRX(r3)
+	std	r4, VCPU_DAWR0(r3)
+	std	r5, VCPU_DAWRX0(r3)
 	/*
 	 * If came in through the real mode hcall handler then it is necessary
 	 * to write the registers since the return path won't. Otherwise it is
-- 
2.26.2


^ permalink raw reply related

* [PATCH v2 3/4] KVM: PPC: Add infrastructure to support 2nd DAWR
From: Ravi Bangoria @ 2020-11-24 10:59 UTC (permalink / raw)
  To: mpe, paulus
  Cc: christophe.leroy, ravi.bangoria, mikey, kvm, leobras.c, jniethe5,
	linux-kernel, npiggin, kvm-ppc, pbonzini, linuxppc-dev
In-Reply-To: <20201124105953.39325-1-ravi.bangoria@linux.ibm.com>

kvm code assumes single DAWR everywhere. Add code to support 2nd DAWR.
DAWR is a hypervisor resource and thus H_SET_MODE hcall is used to set/
unset it. Introduce new case H_SET_MODE_RESOURCE_SET_DAWR1 for 2nd DAWR.
Also, kvm will support 2nd DAWR only if CPU_FTR_DAWR1 is set.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
 Documentation/virt/kvm/api.rst            |  2 ++
 arch/powerpc/include/asm/hvcall.h         |  8 ++++-
 arch/powerpc/include/asm/kvm_host.h       |  2 ++
 arch/powerpc/include/uapi/asm/kvm.h       |  2 ++
 arch/powerpc/kernel/asm-offsets.c         |  2 ++
 arch/powerpc/kvm/book3s_hv.c              | 41 +++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_nested.c       |  7 ++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 23 +++++++++++++
 tools/arch/powerpc/include/uapi/asm/kvm.h |  2 ++
 9 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index eb3a1316f03e..72c98735aa52 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -2249,6 +2249,8 @@ registers, find a list below:
   PPC     KVM_REG_PPC_PSSCR               64
   PPC     KVM_REG_PPC_DEC_EXPIRY          64
   PPC     KVM_REG_PPC_PTCR                64
+  PPC     KVM_REG_PPC_DAWR1               64
+  PPC     KVM_REG_PPC_DAWRX1              64
   PPC     KVM_REG_PPC_TM_GPR0             64
   ...
   PPC     KVM_REG_PPC_TM_GPR31            64
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index a7073fddb657..4bacd27a348b 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -558,16 +558,22 @@ struct hv_guest_state {
 	u64 pidr;
 	u64 cfar;
 	u64 ppr;
+	/* Version 1 ends here */
+	u64 dawr1;
+	u64 dawrx1;
+	/* Version 2 ends here */
 };
 
 /* Latest version of hv_guest_state structure */
-#define HV_GUEST_STATE_VERSION	1
+#define HV_GUEST_STATE_VERSION	2
 
 static inline int hv_guest_state_size(unsigned int version)
 {
 	switch (version) {
 	case 1:
 		return offsetofend(struct hv_guest_state, ppr);
+	case 2:
+		return offsetofend(struct hv_guest_state, dawrx1);
 	default:
 		return -1;
 	}
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 62cadf1a596e..9804afdf8578 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -586,6 +586,8 @@ struct kvm_vcpu_arch {
 	ulong dabr;
 	ulong dawr0;
 	ulong dawrx0;
+	ulong dawr1;
+	ulong dawrx1;
 	ulong ciabr;
 	ulong cfar;
 	ulong ppr;
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index c3af3f324c5a..9f18fa090f1f 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_MMCR3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
 #define KVM_REG_PPC_SIER2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
 #define KVM_REG_PPC_SIER3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
+#define KVM_REG_PPC_DAWR1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
+#define KVM_REG_PPC_DAWRX1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index e4256f5b4602..26d4fa8fe51e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -549,6 +549,8 @@ int main(void)
 	OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx);
 	OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0);
 	OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0);
+	OFFSET(VCPU_DAWR1, kvm_vcpu, arch.dawr1);
+	OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1);
 	OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
 	OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
 	OFFSET(VCPU_DEC, kvm_vcpu, arch.dec);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d5c6efc8a76e..2ff645789e9e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -785,6 +785,20 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
 		vcpu->arch.dawr0  = value1;
 		vcpu->arch.dawrx0 = value2;
 		return H_SUCCESS;
+	case H_SET_MODE_RESOURCE_SET_DAWR1:
+		if (!kvmppc_power8_compatible(vcpu))
+			return H_P2;
+		if (!ppc_breakpoint_available())
+			return H_P2;
+		if (!cpu_has_feature(CPU_FTR_DAWR1))
+			return H_P2;
+		if (mflags)
+			return H_UNSUPPORTED_FLAG_START;
+		if (value2 & DABRX_HYP)
+			return H_P4;
+		vcpu->arch.dawr1  = value1;
+		vcpu->arch.dawrx1 = value2;
+		return H_SUCCESS;
 	case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
 		/* KVM does not support mflags=2 (AIL=2) */
 		if (mflags != 0 && mflags != 3)
@@ -1752,6 +1766,12 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_DAWRX:
 		*val = get_reg_val(id, vcpu->arch.dawrx0);
 		break;
+	case KVM_REG_PPC_DAWR1:
+		*val = get_reg_val(id, vcpu->arch.dawr1);
+		break;
+	case KVM_REG_PPC_DAWRX1:
+		*val = get_reg_val(id, vcpu->arch.dawrx1);
+		break;
 	case KVM_REG_PPC_CIABR:
 		*val = get_reg_val(id, vcpu->arch.ciabr);
 		break;
@@ -1984,6 +2004,12 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_DAWRX:
 		vcpu->arch.dawrx0 = set_reg_val(id, *val) & ~DAWRX_HYP;
 		break;
+	case KVM_REG_PPC_DAWR1:
+		vcpu->arch.dawr1 = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_DAWRX1:
+		vcpu->arch.dawrx1 = set_reg_val(id, *val) & ~DAWRX_HYP;
+		break;
 	case KVM_REG_PPC_CIABR:
 		vcpu->arch.ciabr = set_reg_val(id, *val);
 		/* Don't allow setting breakpoints in hypervisor code */
@@ -3441,6 +3467,13 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
 	unsigned long host_psscr = mfspr(SPRN_PSSCR);
 	unsigned long host_pidr = mfspr(SPRN_PID);
+	unsigned long host_dawr1 = 0;
+	unsigned long host_dawrx1 = 0;
+
+	if (cpu_has_feature(CPU_FTR_DAWR1)) {
+		host_dawr1 = mfspr(SPRN_DAWR1);
+		host_dawrx1 = mfspr(SPRN_DAWRX1);
+	}
 
 	/*
 	 * P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0,
@@ -3479,6 +3512,10 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	if (dawr_enabled()) {
 		mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
 		mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
+		if (cpu_has_feature(CPU_FTR_DAWR1)) {
+			mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
+			mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
+		}
 	}
 	mtspr(SPRN_CIABR, vcpu->arch.ciabr);
 	mtspr(SPRN_IC, vcpu->arch.ic);
@@ -3532,6 +3569,10 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	mtspr(SPRN_CIABR, host_ciabr);
 	mtspr(SPRN_DAWR0, host_dawr0);
 	mtspr(SPRN_DAWRX0, host_dawrx0);
+	if (cpu_has_feature(CPU_FTR_DAWR1)) {
+		mtspr(SPRN_DAWR1, host_dawr1);
+		mtspr(SPRN_DAWRX1, host_dawrx1);
+	}
 	mtspr(SPRN_PID, host_pidr);
 
 	/*
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 1d4b335485d0..aec86013f7de 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -49,6 +49,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 	hr->pidr = vcpu->arch.pid;
 	hr->cfar = vcpu->arch.cfar;
 	hr->ppr = vcpu->arch.ppr;
+	hr->dawr1 = vcpu->arch.dawr1;
+	hr->dawrx1 = vcpu->arch.dawrx1;
 }
 
 static void byteswap_pt_regs(struct pt_regs *regs)
@@ -91,6 +93,8 @@ static void byteswap_hv_regs(struct hv_guest_state *hr)
 	hr->pidr = swab64(hr->pidr);
 	hr->cfar = swab64(hr->cfar);
 	hr->ppr = swab64(hr->ppr);
+	hr->dawr1 = swab64(hr->dawr1);
+	hr->dawrx1 = swab64(hr->dawrx1);
 }
 
 static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
@@ -138,6 +142,7 @@ static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 
 	/* Don't let data address watchpoint match in hypervisor state */
 	hr->dawrx0 &= ~DAWRX_HYP;
+	hr->dawrx1 &= ~DAWRX_HYP;
 
 	/* Don't let completed instruction address breakpt match in HV state */
 	if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
@@ -167,6 +172,8 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 	vcpu->arch.pid = hr->pidr;
 	vcpu->arch.cfar = hr->cfar;
 	vcpu->arch.ppr = hr->ppr;
+	vcpu->arch.dawr1 = hr->dawr1;
+	vcpu->arch.dawrx1 = hr->dawrx1;
 }
 
 void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 8e3fb393a980..fe31c5338fdf 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -57,6 +57,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 #define STACK_SLOT_HFSCR	(SFS-72)
 #define STACK_SLOT_AMR		(SFS-80)
 #define STACK_SLOT_UAMOR	(SFS-88)
+#define STACK_SLOT_DAWR1	(SFS-96)
+#define STACK_SLOT_DAWRX1	(SFS-104)
 /* the following is used by the P9 short path */
 #define STACK_SLOT_NVGPRS	(SFS-152)	/* 18 gprs */
 
@@ -715,6 +717,12 @@ BEGIN_FTR_SECTION
 	std	r7, STACK_SLOT_DAWRX0(r1)
 	std	r8, STACK_SLOT_IAMR(r1)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
+	mfspr	r6, SPRN_DAWR1
+	mfspr	r7, SPRN_DAWRX1
+	std	r6, STACK_SLOT_DAWR1(r1)
+	std	r7, STACK_SLOT_DAWRX1(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
 
 	mfspr	r5, SPRN_AMR
 	std	r5, STACK_SLOT_AMR(r1)
@@ -805,6 +813,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	ld	r6, VCPU_DAWRX0(r4)
 	mtspr	SPRN_DAWR0, r5
 	mtspr	SPRN_DAWRX0, r6
+BEGIN_FTR_SECTION
+	ld	r5, VCPU_DAWR1(r4)
+	ld	r6, VCPU_DAWRX1(r4)
+	mtspr	SPRN_DAWR1, r5
+	mtspr	SPRN_DAWRX1, r6
+END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
 1:
 	ld	r7, VCPU_CIABR(r4)
 	ld	r8, VCPU_TAR(r4)
@@ -1769,6 +1783,12 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_DAWR0, r6
 	mtspr	SPRN_DAWRX0, r7
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
+	ld	r6, STACK_SLOT_DAWR1(r1)
+	ld	r7, STACK_SLOT_DAWRX1(r1)
+	mtspr	SPRN_DAWR1, r6
+	mtspr	SPRN_DAWRX1, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
 BEGIN_FTR_SECTION
 	ld	r5, STACK_SLOT_TID(r1)
 	ld	r6, STACK_SLOT_PSSCR(r1)
@@ -3335,6 +3355,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	mtspr	SPRN_IAMR, r0
 	mtspr	SPRN_CIABR, r0
 	mtspr	SPRN_DAWRX0, r0
+BEGIN_FTR_SECTION
+	mtspr	SPRN_DAWRX1, r0
+END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
 
 BEGIN_MMU_FTR_SECTION
 	b	4f
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index c3af3f324c5a..9f18fa090f1f 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_MMCR3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
 #define KVM_REG_PPC_SIER2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
 #define KVM_REG_PPC_SIER3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
+#define KVM_REG_PPC_DAWR1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
+#define KVM_REG_PPC_DAWRX1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
-- 
2.26.2


^ permalink raw reply related

* [PATCH v2 1/4] KVM: PPC: Allow nested guest creation when L0 hv_guest_state > L1
From: Ravi Bangoria @ 2020-11-24 10:59 UTC (permalink / raw)
  To: mpe, paulus
  Cc: christophe.leroy, ravi.bangoria, mikey, kvm, leobras.c, jniethe5,
	linux-kernel, npiggin, kvm-ppc, pbonzini, linuxppc-dev
In-Reply-To: <20201124105953.39325-1-ravi.bangoria@linux.ibm.com>

On powerpc, L1 hypervisor takes help of L0 using H_ENTER_NESTED
hcall to load L2 guest state in cpu. L1 hypervisor prepares the
L2 state in struct hv_guest_state and passes a pointer to it via
hcall. Using that pointer, L0 reads/writes that state directly
from/to L1 memory. Thus L0 must be aware of hv_guest_state layout
of L1. Currently it uses version field to achieve this. i.e. If
L0 hv_guest_state.version != L1 hv_guest_state.version, L0 won't
allow nested kvm guest.

This restriction can be loosen up a bit. L0 can be taught to
understand older layout of hv_guest_state, if we restrict the
new member to be added only at the end. i.e. we can allow
nested guest even when L0 hv_guest_state.version > L1
hv_guest_state.version. Though, the other way around is not
possible.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
 arch/powerpc/include/asm/hvcall.h   | 17 +++++++--
 arch/powerpc/kvm/book3s_hv_nested.c | 53 ++++++++++++++++++++++++-----
 2 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index fbb377055471..a7073fddb657 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -524,9 +524,12 @@ struct h_cpu_char_result {
 	u64 behaviour;
 };
 
-/* Register state for entering a nested guest with H_ENTER_NESTED */
+/*
+ * Register state for entering a nested guest with H_ENTER_NESTED.
+ * New member must be added at the end.
+ */
 struct hv_guest_state {
-	u64 version;		/* version of this structure layout */
+	u64 version;		/* version of this structure layout, must be first */
 	u32 lpid;
 	u32 vcpu_token;
 	/* These registers are hypervisor privileged (at least for writing) */
@@ -560,6 +563,16 @@ struct hv_guest_state {
 /* Latest version of hv_guest_state structure */
 #define HV_GUEST_STATE_VERSION	1
 
+static inline int hv_guest_state_size(unsigned int version)
+{
+	switch (version) {
+	case 1:
+		return offsetofend(struct hv_guest_state, ppr);
+	default:
+		return -1;
+	}
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 33b58549a9aa..2b433c3bacea 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -215,6 +215,45 @@ static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr)
 	}
 }
 
+static int kvmhv_read_guest_state_and_regs(struct kvm_vcpu *vcpu,
+					   struct hv_guest_state *l2_hv,
+					   struct pt_regs *l2_regs,
+					   u64 hv_ptr, u64 regs_ptr)
+{
+	int size;
+
+	if (kvm_vcpu_read_guest(vcpu, hv_ptr, &(l2_hv->version),
+				sizeof(l2_hv->version)))
+		return -1;
+
+	if (kvmppc_need_byteswap(vcpu))
+		l2_hv->version = swab64(l2_hv->version);
+
+	size = hv_guest_state_size(l2_hv->version);
+	if (size < 0)
+		return -1;
+
+	return kvm_vcpu_read_guest(vcpu, hv_ptr, l2_hv, size) ||
+		kvm_vcpu_read_guest(vcpu, regs_ptr, l2_regs,
+				    sizeof(struct pt_regs));
+}
+
+static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu,
+					    struct hv_guest_state *l2_hv,
+					    struct pt_regs *l2_regs,
+					    u64 hv_ptr, u64 regs_ptr)
+{
+	int size;
+
+	size = hv_guest_state_size(l2_hv->version);
+	if (size < 0)
+		return -1;
+
+	return kvm_vcpu_write_guest(vcpu, hv_ptr, l2_hv, size) ||
+		kvm_vcpu_write_guest(vcpu, regs_ptr, l2_regs,
+				     sizeof(struct pt_regs));
+}
+
 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 {
 	long int err, r;
@@ -235,17 +274,15 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 	hv_ptr = kvmppc_get_gpr(vcpu, 4);
 	regs_ptr = kvmppc_get_gpr(vcpu, 5);
 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-	err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv,
-				  sizeof(struct hv_guest_state)) ||
-		kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
-				    sizeof(struct pt_regs));
+	err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
+					      hv_ptr, regs_ptr);
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 	if (err)
 		return H_PARAMETER;
 
 	if (kvmppc_need_byteswap(vcpu))
 		byteswap_hv_regs(&l2_hv);
-	if (l2_hv.version != HV_GUEST_STATE_VERSION)
+	if (l2_hv.version > HV_GUEST_STATE_VERSION)
 		return H_P2;
 
 	if (kvmppc_need_byteswap(vcpu))
@@ -325,10 +362,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 		byteswap_pt_regs(&l2_regs);
 	}
 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-	err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv,
-				   sizeof(struct hv_guest_state)) ||
-		kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
-				   sizeof(struct pt_regs));
+	err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
+					       hv_ptr, regs_ptr);
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 	if (err)
 		return H_AUTHORITY;
-- 
2.26.2


^ permalink raw reply related

* [PATCH v2 0/4] KVM: PPC: Power10 2nd DAWR enablement
From: Ravi Bangoria @ 2020-11-24 10:59 UTC (permalink / raw)
  To: mpe, paulus
  Cc: christophe.leroy, ravi.bangoria, mikey, kvm, leobras.c, jniethe5,
	linux-kernel, npiggin, kvm-ppc, pbonzini, linuxppc-dev

Enable p10 2nd DAWR feature for Book3S kvm guest. DAWR is a hypervisor
resource and thus H_SET_MODE hcall is used to set/unset it. A new case
H_SET_MODE_RESOURCE_SET_DAWR1 is introduced in H_SET_MODE hcall for
setting/unsetting 2nd DAWR. Also, new capability KVM_CAP_PPC_DAWR1 has
been added to query 2nd DAWR support via kvm ioctl.

This feature also needs to be enabled in Qemu to really use it. I'll
post Qemu patches once kvm patches get accepted.

v1: https://lore.kernel.org/r/20200723102058.312282-1-ravi.bangoria@linux.ibm.com

v1->v2:
 - patch #1: New patch
 - patch #2: Don't rename KVM_REG_PPC_DAWR, it's an uapi macro
 - patch #3: Increment HV_GUEST_STATE_VERSION
 - Split kvm and selftests patches into different series
 - Patches rebased to paulus/kvm-ppc-next (cf59eb13e151) + few
   other watchpoint patches which are yet to be merged in
   paulus/kvm-ppc-next.

Ravi Bangoria (4):
  KVM: PPC: Allow nested guest creation when L0 hv_guest_state > L1
  KVM: PPC: Rename current DAWR macros and variables
  KVM: PPC: Add infrastructure to support 2nd DAWR
  KVM: PPC: Introduce new capability for 2nd DAWR

 Documentation/virt/kvm/api.rst            |  2 +
 arch/powerpc/include/asm/hvcall.h         | 25 ++++++++-
 arch/powerpc/include/asm/kvm_host.h       |  6 +-
 arch/powerpc/include/uapi/asm/kvm.h       |  2 +
 arch/powerpc/kernel/asm-offsets.c         |  6 +-
 arch/powerpc/kvm/book3s_hv.c              | 65 ++++++++++++++++++----
 arch/powerpc/kvm/book3s_hv_nested.c       | 68 ++++++++++++++++++-----
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 43 ++++++++++----
 arch/powerpc/kvm/powerpc.c                |  3 +
 include/uapi/linux/kvm.h                  |  1 +
 tools/arch/powerpc/include/uapi/asm/kvm.h |  2 +
 11 files changed, 181 insertions(+), 42 deletions(-)

-- 
2.26.2


^ permalink raw reply

* [PATCH v2 4/4] KVM: PPC: Introduce new capability for 2nd DAWR
From: Ravi Bangoria @ 2020-11-24 10:59 UTC (permalink / raw)
  To: mpe, paulus
  Cc: christophe.leroy, ravi.bangoria, mikey, kvm, leobras.c, jniethe5,
	linux-kernel, npiggin, kvm-ppc, pbonzini, linuxppc-dev
In-Reply-To: <20201124105953.39325-1-ravi.bangoria@linux.ibm.com>

Introduce KVM_CAP_PPC_DAWR1 which can be used by Qemu to query whether
kvm supports 2nd DAWR or not.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
 arch/powerpc/kvm/powerpc.c | 3 +++
 include/uapi/linux/kvm.h   | 1 +
 2 files changed, 4 insertions(+)

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 13999123b735..48763fe59fc5 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -679,6 +679,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 			!kvmppc_hv_ops->enable_svm(NULL);
 		break;
 #endif
+	case KVM_CAP_PPC_DAWR1:
+		r = cpu_has_feature(CPU_FTR_DAWR1);
+		break;
 	default:
 		r = 0;
 		break;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index f6d86033c4fa..0f32d6cbabc2 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1035,6 +1035,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_LAST_CPU 184
 #define KVM_CAP_SMALLER_MAXPHYADDR 185
 #define KVM_CAP_S390_DIAG318 186
+#define KVM_CAP_PPC_DAWR1 187
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH 3/3] selftests/powerpc: Add VF recovery tests
From: Oliver O'Halloran @ 2020-11-24 11:17 UTC (permalink / raw)
  To: Frederic Barrat; +Cc: linuxppc-dev
In-Reply-To: <d192485d-c8e0-7798-4a7f-85efe68a41fa@linux.ibm.com>

On Tue, Nov 24, 2020 at 9:14 PM Frederic Barrat <fbarrat@linux.ibm.com> wrote:
>
> Is it possible to run those tests on pseries? I haven't managed to set
> up a LPAR with a physical function which would let me enable a virtual
> function. All I could do is assign a virtual function to a LPAR. When
> assigning a physical function to the LPAR, enabling a virtual function
> fails because of missing properties in the device tree, so it looks like
> the hypervisor doesn't support it (?).
>
> Same story on qemu.
>
>    Fred

IIRC having the guest manage SR-IOV was a half-baked feature that
never made it into a production phyp build. I never managed to get any
real documentation from the phyp folks about how it worked either. As
far as I can tell it's pretty similar to what we do on PowerNV with
the PE configuration being handled by h-call rather than OPAL call.
The main difference would be in how EEH freezes are handled and I know
there's *something* going on there, but I never really understood it
due to the lack of documentation.

I've been tempted to rip out all that crap a few times, but never
really got around to it. There was also some noises about implementing
support for guest managed SRIOV in pseries qemu, but I'm not sure what
ever happened to that.

Oliver

^ permalink raw reply

* [PATCH 1/3] powerpc: Make NUMA depend on SMP
From: Michael Ellerman @ 2020-11-24 12:05 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: rdunlap, srikar

Our Kconfig allows NUMA to be enabled without SMP, but none of
our defconfigs use that combination. This means it can easily be
broken inadvertently by code changes, which has happened recently.

Although it's theoretically possible to have a machine with a single
CPU and multiple memory nodes, I can't think of any real systems where
that's the case. Even so if such a system exists, it can just run an
SMP kernel anyway.

So to avoid the need to add extra #ifdefs and/or build breaks, make
NUMA depend on SMP.

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e9f13fe08492..a22db3db6b96 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -660,7 +660,7 @@ config IRQ_ALL_CPUS
 
 config NUMA
 	bool "NUMA support"
-	depends on PPC64
+	depends on PPC64 && SMP
 	default y if SMP && PPC_PSERIES
 
 config NODES_SHIFT
-- 
2.25.1


^ permalink raw reply related

* [PATCH 3/3] powerpc: Update NUMA Kconfig description & help text
From: Michael Ellerman @ 2020-11-24 12:05 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: rdunlap, srikar
In-Reply-To: <20201124120547.1940635-1-mpe@ellerman.id.au>

Update the NUMA Kconfig description to match other architectures, and
add some help text. Shamelessly borrowed from x86/arm64.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Kconfig | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4d688b426353..7f4995b245a3 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -659,9 +659,15 @@ config IRQ_ALL_CPUS
 	  reported with SMP Power Macintoshes with this option enabled.
 
 config NUMA
-	bool "NUMA support"
+	bool "NUMA Memory Allocation and Scheduler Support"
 	depends on PPC64 && SMP
 	default y if PPC_PSERIES || PPC_POWERNV
+	help
+	  Enable NUMA (Non-Uniform Memory Access) support.
+
+	  The kernel will try to allocate memory used by a CPU on the
+	  local memory controller of the CPU and add some more
+	  NUMA awareness to the kernel.
 
 config NODES_SHIFT
 	int
-- 
2.25.1


^ permalink raw reply related

* [PATCH 2/3] powerpc: Make NUMA default y for powernv
From: Michael Ellerman @ 2020-11-24 12:05 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: rdunlap, srikar
In-Reply-To: <20201124120547.1940635-1-mpe@ellerman.id.au>

Our NUMA option is default y for pseries, but not powernv. The bulk of
powernv systems are NUMA, so make NUMA default y for powernv also.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a22db3db6b96..4d688b426353 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -661,7 +661,7 @@ config IRQ_ALL_CPUS
 config NUMA
 	bool "NUMA support"
 	depends on PPC64 && SMP
-	default y if SMP && PPC_PSERIES
+	default y if PPC_PSERIES || PPC_POWERNV
 
 config NODES_SHIFT
 	int
-- 
2.25.1


^ permalink raw reply related

* Re: [PATCH V2 4/5] ocxl: Add mmu notifier
From: Jason Gunthorpe @ 2020-11-24 13:45 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linuxppc-dev, Christophe Lombard, fbarrat, ajd
In-Reply-To: <20201124091738.GA26078@infradead.org>

On Tue, Nov 24, 2020 at 09:17:38AM +0000, Christoph Hellwig wrote:

> > @@ -470,6 +487,26 @@ void ocxl_link_release(struct pci_dev *dev, void *link_handle)
> >  }
> >  EXPORT_SYMBOL_GPL(ocxl_link_release);
> >  
> > +static void invalidate_range(struct mmu_notifier *mn,
> > +			     struct mm_struct *mm,
> > +			     unsigned long start, unsigned long end)
> > +{
> > +	struct pe_data *pe_data = container_of(mn, struct pe_data, mmu_notifier);
> > +	struct ocxl_link *link = pe_data->link;
> > +	unsigned long addr, pid, page_size = PAGE_SIZE;

The page_size variable seems unnecessary

> > +
> > +	pid = mm->context.id;
> > +
> > +	spin_lock(&link->atsd_lock);
> > +	for (addr = start; addr < end; addr += page_size)
> > +		pnv_ocxl_tlb_invalidate(&link->arva, pid, addr);
> > +	spin_unlock(&link->atsd_lock);
> > +}
> > +
> > +static const struct mmu_notifier_ops ocxl_mmu_notifier_ops = {
> > +	.invalidate_range = invalidate_range,
> > +};
> > +
> >  static u64 calculate_cfg_state(bool kernel)
> >  {
> >  	u64 state;
> > @@ -526,6 +563,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
> >  	pe_data->mm = mm;
> >  	pe_data->xsl_err_cb = xsl_err_cb;
> >  	pe_data->xsl_err_data = xsl_err_data;
> > +	pe_data->link = link;
> > +	pe_data->mmu_notifier.ops = &ocxl_mmu_notifier_ops;
> >  
> >  	memset(pe, 0, sizeof(struct ocxl_process_element));
> >  	pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0));
> > @@ -542,8 +581,16 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
> >  	 * by the nest MMU. If we have a kernel context, TLBIs are
> >  	 * already global.
> >  	 */
> > -	if (mm)
> > +	if (mm) {
> >  		mm_context_add_copro(mm);
> > +		if (link->arva) {
> > +			/* Use MMIO registers for the TLB Invalidate
> > +			 * operations.
> > +			 */
> > +			mmu_notifier_register(&pe_data->mmu_notifier, mm);

Every other place doing stuff like this is de-duplicating the
notifier. If you have multiple clients this will do multiple redundant
invalidations?

The notifier get/put API is designed to solve that problem, you'd get
a single notifier for the mm and then add the impacted arva's to some
list at the notifier.

Jason

^ permalink raw reply

* [PATCH] tpm: ibmvtpm: fix error return code in tpm_ibmvtpm_probe()
From: Wang Hai @ 2020-11-24 13:52 UTC (permalink / raw)
  To: mpe, benh, paulus, peterhuewe, jarkko, jgg, stefanb, nayna
  Cc: linux-integrity, linuxppc-dev, linux-kernel

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: d8d74ea3c002 ("tpm: ibmvtpm: Wait for buffer to be set before proceeding")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
---
 drivers/char/tpm/tpm_ibmvtpm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index 994385bf37c0..813eb2cac0ce 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -687,6 +687,7 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev,
 				ibmvtpm->rtce_buf != NULL,
 				HZ)) {
 		dev_err(dev, "CRQ response timed out\n");
+		rc = -ETIMEDOUT;
 		goto init_irq_cleanup;
 	}
 
-- 
2.17.1


^ permalink raw reply related

* [PATCH] ASoC: fsl_xcvr: fix potential resource leak
From: Viorel Suman (OSS) @ 2020-11-24 14:19 UTC (permalink / raw)
  To: Timur Tabi, Nicolin Chen, Xiubo Li, Fabio Estevam, Shengjiu Wang,
	Liam Girdwood, Mark Brown, Jaroslav Kysela, Takashi Iwai,
	alsa-devel, linuxppc-dev, linux-kernel
  Cc: Viorel Suman

From: Viorel Suman <viorel.suman@nxp.com>

"fw" variable must be relased before return.

Signed-off-by: Viorel Suman <viorel.suman@nxp.com>
---
 sound/soc/fsl/fsl_xcvr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c
index 2a28810d0e29..3d58c88ea603 100644
--- a/sound/soc/fsl/fsl_xcvr.c
+++ b/sound/soc/fsl/fsl_xcvr.c
@@ -706,6 +706,7 @@ static int fsl_xcvr_load_firmware(struct fsl_xcvr *xcvr)
 	/* RAM is 20KiB = 16KiB code + 4KiB data => max 10 pages 2KiB each */
 	if (rem > 16384) {
 		dev_err(dev, "FW size %d is bigger than 16KiB.\n", rem);
+		release_firmware(fw);
 		return -ENOMEM;
 	}
 
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH] tpm: ibmvtpm: fix error return code in tpm_ibmvtpm_probe()
From: Stefan Berger @ 2020-11-24 14:28 UTC (permalink / raw)
  To: Wang Hai, mpe, benh, paulus, peterhuewe, jarkko, jgg, nayna
  Cc: linux-integrity, linuxppc-dev, linux-kernel
In-Reply-To: <20201124135244.31932-1-wanghai38@huawei.com>

On 11/24/20 8:52 AM, Wang Hai wrote:
> Fix to return a negative error code from the error handling
> case instead of 0, as done elsewhere in this function.
>
> Fixes: d8d74ea3c002 ("tpm: ibmvtpm: Wait for buffer to be set before proceeding")
> Reported-by: Hulk Robot <hulkci@huawei.com>
> Signed-off-by: Wang Hai <wanghai38@huawei.com>
> ---
>   drivers/char/tpm/tpm_ibmvtpm.c | 1 +
>   1 file changed, 1 insertion(+)
>
> diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
> index 994385bf37c0..813eb2cac0ce 100644
> --- a/drivers/char/tpm/tpm_ibmvtpm.c
> +++ b/drivers/char/tpm/tpm_ibmvtpm.c
> @@ -687,6 +687,7 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev,
>   				ibmvtpm->rtce_buf != NULL,
>   				HZ)) {
>   		dev_err(dev, "CRQ response timed out\n");
> +		rc = -ETIMEDOUT;
>   		goto init_irq_cleanup;
>   	}
>   

Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>


^ permalink raw reply

* eBPF on powerpc
From: Christophe Leroy @ 2020-11-24 15:00 UTC (permalink / raw)
  To: Naveen N. Rao, linuxppc-dev@lists.ozlabs.org

Hi Naveen,

Few years ago, you implemented eBPF on PPC64.

Is there any reason for implementing it for PPC64 only ? Is there something that makes it impossible 
to have eBPF for PPC32 as well ?

Thanks
Christophe

^ permalink raw reply

* [PATCH v1 3/6] powerpc/8xx: Simplify INVALIDATE_ADJACENT_PAGES_CPU15
From: Christophe Leroy @ 2020-11-24 15:24 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <e796c5fcb5898de827c803cf1ab8ba1d7a5d4b76.1606231483.git.christophe.leroy@csgroup.eu>

We now have r11 available as a scratch register so
INVALIDATE_ADJACENT_PAGES_CPU15() can be simplified.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/head_8xx.S | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 775b4f4d011e..558c8e615ef9 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -180,14 +180,13 @@ SystemCall:
  */
 
 #ifdef CONFIG_8xx_CPU15
-#define INVALIDATE_ADJACENT_PAGES_CPU15(addr)	\
-	addi	addr, addr, PAGE_SIZE;	\
-	tlbie	addr;			\
-	addi	addr, addr, -(PAGE_SIZE << 1);	\
-	tlbie	addr;			\
-	addi	addr, addr, PAGE_SIZE
+#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp)	\
+	addi	tmp, addr, PAGE_SIZE;	\
+	tlbie	tmp;			\
+	addi	tmp, addr, -PAGE_SIZE;	\
+	tlbie	tmp
 #else
-#define INVALIDATE_ADJACENT_PAGES_CPU15(addr)
+#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp)
 #endif
 
 InstructionTLBMiss:
@@ -198,7 +197,7 @@ InstructionTLBMiss:
 	 * kernel page tables.
 	 */
 	mfspr	r10, SPRN_SRR0	/* Get effective address of fault */
-	INVALIDATE_ADJACENT_PAGES_CPU15(r10)
+	INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
 	mtspr	SPRN_MD_EPN, r10
 #ifdef CONFIG_MODULES
 	mfcr	r11
-- 
2.25.0


^ permalink raw reply related

* [PATCH v1 1/6] powerpc/8xx: DEBUG_PAGEALLOC doesn't require an ITLB miss exception handler
From: Christophe Leroy @ 2020-11-24 15:24 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Since commit e611939fc8ec ("powerpc/mm: Ensure change_page_attr()
doesn't invalidate pinned TLBs"), pinned TLBs are not anymore
invalidated by __kernel_map_pages() when CONFIG_DEBUG_PAGEALLOC is
selected.

Remove the dependency on CONFIG_DEBUG_PAGEALLOC.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/head_8xx.S | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index ee0bfebc375f..66ee62f30d36 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -47,8 +47,7 @@
  * - Either we have modules
  * - Or we have not pinned the first 8M
  */
-#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) || \
-    defined(CONFIG_DEBUG_PAGEALLOC)
+#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT)
 #define ITLB_MISS_KERNEL	1
 #endif
 
-- 
2.25.0


^ permalink raw reply related

* [PATCH v1 4/6] powerpc/8xx: Use SPRN_SPRG_SCRATCH2 in ITLB miss exception
From: Christophe Leroy @ 2020-11-24 15:24 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <e796c5fcb5898de827c803cf1ab8ba1d7a5d4b76.1606231483.git.christophe.leroy@csgroup.eu>

In order to re-enable MMU earlier, ensure ITLB miss exception
cannot clobber SPRN_SPRG_SCRATCH0 and SPRN_SPRG_SCRATCH1.
Do so by using SPRN_SPRG_SCRATCH2 and SPRN_M_TW instead, like
the DTLB miss exception.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/head_8xx.S | 12 ++++++------
 arch/powerpc/perf/8xx-pmu.c    |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 558c8e615ef9..45239b06b6ce 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -190,8 +190,8 @@ SystemCall:
 #endif
 
 InstructionTLBMiss:
-	mtspr	SPRN_SPRG_SCRATCH0, r10
-	mtspr	SPRN_SPRG_SCRATCH1, r11
+	mtspr	SPRN_SPRG_SCRATCH2, r10
+	mtspr	SPRN_M_TW, r11
 
 	/* If we are faulting a kernel address, we have to use the
 	 * kernel page tables.
@@ -230,8 +230,8 @@ InstructionTLBMiss:
 	mtspr	SPRN_MI_RPN, r10	/* Update TLB entry */
 
 	/* Restore registers */
-0:	mfspr	r10, SPRN_SPRG_SCRATCH0
-	mfspr	r11, SPRN_SPRG_SCRATCH1
+0:	mfspr	r10, SPRN_SPRG_SCRATCH2
+	mfspr	r11, SPRN_M_TW
 	rfi
 	patch_site	0b, patch__itlbmiss_exit_1
 
@@ -240,8 +240,8 @@ InstructionTLBMiss:
 0:	lwz	r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
 	addi	r10, r10, 1
 	stw	r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
-	mfspr	r10, SPRN_SPRG_SCRATCH0
-	mfspr	r11, SPRN_SPRG_SCRATCH1
+	mfspr	r10, SPRN_SPRG_SCRATCH2
+	mfspr	r11, SPRN_M_TW
 	rfi
 #endif
 
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
index e53c3c161257..02db58c7427a 100644
--- a/arch/powerpc/perf/8xx-pmu.c
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -165,9 +165,9 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags)
 		break;
 	case PERF_8xx_ID_ITLB_LOAD_MISS:
 		if (atomic_dec_return(&itlb_miss_ref) == 0) {
-			/* mfspr r10, SPRN_SPRG_SCRATCH0 */
+			/* mfspr r10, SPRN_SPRG_SCRATCH2 */
 			struct ppc_inst insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) |
-					    __PPC_SPR(SPRN_SPRG_SCRATCH0));
+					    __PPC_SPR(SPRN_SPRG_SCRATCH2));
 
 			patch_instruction_site(&patch__itlbmiss_exit_1, insn);
 		}
-- 
2.25.0


^ permalink raw reply related

* [PATCH v1 2/6] powerpc/8xx: Always pin kernel text TLB
From: Christophe Leroy @ 2020-11-24 15:24 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <e796c5fcb5898de827c803cf1ab8ba1d7a5d4b76.1606231483.git.christophe.leroy@csgroup.eu>

There is no big poing in not pinning kernel text anymore, as now
we can keep pinned TLB even with things like DEBUG_PAGEALLOC.

Remove CONFIG_PIN_TLB_TEXT, making it always right.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/Kconfig               |  3 +--
 arch/powerpc/kernel/head_8xx.S     | 20 +++-----------------
 arch/powerpc/mm/nohash/8xx.c       |  3 +--
 arch/powerpc/platforms/8xx/Kconfig |  7 -------
 4 files changed, 5 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e9f13fe08492..bf088b5b0a89 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -795,8 +795,7 @@ config DATA_SHIFT_BOOL
 	bool "Set custom data alignment"
 	depends on ADVANCED_OPTIONS
 	depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC
-	depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && \
-				     (!PIN_TLB_TEXT || !STRICT_KERNEL_RWX))
+	depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX)
 	help
 	  This option allows you to set the kernel data alignment. When
 	  RAM is mapped by blocks, the alignment needs to fit the size and
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 66ee62f30d36..775b4f4d011e 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -42,15 +42,6 @@
 #endif
 .endm
 
-/*
- * We need an ITLB miss handler for kernel addresses if:
- * - Either we have modules
- * - Or we have not pinned the first 8M
- */
-#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT)
-#define ITLB_MISS_KERNEL	1
-#endif
-
 /*
  * Value for the bits that have fixed value in RPN entries.
  * Also used for tagging DAR for DTLBerror.
@@ -209,12 +200,12 @@ InstructionTLBMiss:
 	mfspr	r10, SPRN_SRR0	/* Get effective address of fault */
 	INVALIDATE_ADJACENT_PAGES_CPU15(r10)
 	mtspr	SPRN_MD_EPN, r10
-#ifdef ITLB_MISS_KERNEL
+#ifdef CONFIG_MODULES
 	mfcr	r11
 	compare_to_kernel_boundary r10, r10
 #endif
 	mfspr	r10, SPRN_M_TWB	/* Get level 1 table */
-#ifdef ITLB_MISS_KERNEL
+#ifdef CONFIG_MODULES
 	blt+	3f
 	rlwinm	r10, r10, 0, 20, 31
 	oris	r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
@@ -618,10 +609,6 @@ start_here:
 	lis	r0, (MD_TWAM | MD_RSV4I)@h
 	mtspr	SPRN_MD_CTR, r0
 #endif
-#ifndef CONFIG_PIN_TLB_TEXT
-	li	r0, 0
-	mtspr	SPRN_MI_CTR, r0
-#endif
 #if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR)
 	lis	r0, MD_TWAM@h
 	mtspr	SPRN_MD_CTR, r0
@@ -739,7 +726,6 @@ _GLOBAL(mmu_pin_tlb)
 	mtspr	SPRN_MD_CTR, r6
 	tlbia
 
-#ifdef CONFIG_PIN_TLB_TEXT
 	LOAD_REG_IMMEDIATE(r5, 28 << 8)
 	LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
 	LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
@@ -760,7 +746,7 @@ _GLOBAL(mmu_pin_tlb)
 	bdnzt	lt, 2b
 	lis	r0, MI_RSV4I@h
 	mtspr	SPRN_MI_CTR, r0
-#endif
+
 	LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM)
 #ifdef CONFIG_PIN_TLB_DATA
 	LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 231ca95f9ffb..19a3eec1d8c5 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -186,8 +186,7 @@ void mmu_mark_initmem_nx(void)
 	mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false);
 	mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false);
 
-	if (IS_ENABLED(CONFIG_PIN_TLB_TEXT))
-		mmu_pin_tlb(block_mapped_ram, false);
+	mmu_pin_tlb(block_mapped_ram, false);
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index cdda034733ff..1a8400bfbe82 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -202,13 +202,6 @@ config PIN_TLB_IMMR
 	  CONFIG_PIN_TLB_DATA is also selected, it will reduce
 	  CONFIG_PIN_TLB_DATA to 24 Mbytes.
 
-config PIN_TLB_TEXT
-	bool "Pinned TLB for TEXT"
-	depends on PIN_TLB
-	default y
-	help
-	  This pins kernel text with 8M pages.
-
 endmenu
 
 endmenu
-- 
2.25.0


^ permalink raw reply related

* [PATCH v1 5/6] powerpc/8xx: Use SPRN_SPRG_SCRATCH2 in DTLB miss exception
From: Christophe Leroy @ 2020-11-24 15:24 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <e796c5fcb5898de827c803cf1ab8ba1d7a5d4b76.1606231483.git.christophe.leroy@csgroup.eu>

Use SPRN_SPRG_SCRATCH2 in DTLB miss exception instead of DAR
in order to be similar to ITLB miss exception.

This also simplifies mpc8xx_pmu_del()

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/head_8xx.S |  9 ++++-----
 arch/powerpc/perf/8xx-pmu.c    | 19 +++++++------------
 2 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 45239b06b6ce..35707e86c5f3 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -247,7 +247,7 @@ InstructionTLBMiss:
 
 	. = 0x1200
 DataStoreTLBMiss:
-	mtspr	SPRN_DAR, r10
+	mtspr	SPRN_SPRG_SCRATCH2, r10
 	mtspr	SPRN_M_TW, r11
 	mfcr	r11
 
@@ -286,11 +286,11 @@ DataStoreTLBMiss:
 	li	r11, RPN_PATTERN
 	rlwimi	r10, r11, 0, 24, 27	/* Set 24-27 */
 	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
+	mtspr	SPRN_DAR, r11		/* Tag DAR */
 
 	/* Restore registers */
 
-0:	mfspr	r10, SPRN_DAR
-	mtspr	SPRN_DAR, r11	/* Tag DAR */
+0:	mfspr	r10, SPRN_SPRG_SCRATCH2
 	mfspr	r11, SPRN_M_TW
 	rfi
 	patch_site	0b, patch__dtlbmiss_exit_1
@@ -300,8 +300,7 @@ DataStoreTLBMiss:
 0:	lwz	r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
 	addi	r10, r10, 1
 	stw	r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
-	mfspr	r10, SPRN_DAR
-	mtspr	SPRN_DAR, r11	/* Tag DAR */
+	mfspr	r10, SPRN_SPRG_SCRATCH2
 	mfspr	r11, SPRN_M_TW
 	rfi
 #endif
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
index 02db58c7427a..93004ee586a1 100644
--- a/arch/powerpc/perf/8xx-pmu.c
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -153,6 +153,11 @@ static void mpc8xx_pmu_read(struct perf_event *event)
 
 static void mpc8xx_pmu_del(struct perf_event *event, int flags)
 {
+	struct ppc_inst insn;
+
+	/* mfspr r10, SPRN_SPRG_SCRATCH2 */
+	insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) | __PPC_SPR(SPRN_SPRG_SCRATCH2));
+
 	mpc8xx_pmu_read(event);
 
 	/* If it was the last user, stop counting to avoid useles overhead */
@@ -164,22 +169,12 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags)
 			mtspr(SPRN_ICTRL, 7);
 		break;
 	case PERF_8xx_ID_ITLB_LOAD_MISS:
-		if (atomic_dec_return(&itlb_miss_ref) == 0) {
-			/* mfspr r10, SPRN_SPRG_SCRATCH2 */
-			struct ppc_inst insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) |
-					    __PPC_SPR(SPRN_SPRG_SCRATCH2));
-
+		if (atomic_dec_return(&itlb_miss_ref) == 0)
 			patch_instruction_site(&patch__itlbmiss_exit_1, insn);
-		}
 		break;
 	case PERF_8xx_ID_DTLB_LOAD_MISS:
-		if (atomic_dec_return(&dtlb_miss_ref) == 0) {
-			/* mfspr r10, SPRN_DAR */
-			struct ppc_inst insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) |
-					    __PPC_SPR(SPRN_DAR));
-
+		if (atomic_dec_return(&dtlb_miss_ref) == 0)
 			patch_instruction_site(&patch__dtlbmiss_exit_1, insn);
-		}
 		break;
 	}
 }
-- 
2.25.0


^ permalink raw reply related

* [PATCH v1 6/6] powerpc/ppc-opcode: Add PPC_RAW_MFSPR()
From: Christophe Leroy @ 2020-11-24 15:24 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <e796c5fcb5898de827c803cf1ab8ba1d7a5d4b76.1606231483.git.christophe.leroy@csgroup.eu>

Add PPC_RAW_MFSPR() to replace open coding done in 8xx-pmu.c

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/ppc-opcode.h | 3 ++-
 arch/powerpc/perf/8xx-pmu.c           | 5 +----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index a6e3700c4566..da6f300e9788 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -230,7 +230,6 @@
 #define PPC_INST_POPCNTB_MASK		0xfc0007fe
 #define PPC_INST_RFEBB			0x4c000124
 #define PPC_INST_RFID			0x4c000024
-#define PPC_INST_MFSPR			0x7c0002a6
 #define PPC_INST_MFSPR_DSCR		0x7c1102a6
 #define PPC_INST_MFSPR_DSCR_MASK	0xfc1ffffe
 #define PPC_INST_MTSPR_DSCR		0x7c1103a6
@@ -507,6 +506,8 @@
 
 #define PPC_RAW_NEG(d, a)		(0x7c0000d0 | ___PPC_RT(d) | ___PPC_RA(a))
 
+#define PPC_RAW_MFSPR(d, spr)		(0x7c0002a6 | ___PPC_RT(d) | __PPC_SPR(spr))
+
 /* Deal with instructions that older assemblers aren't aware of */
 #define	PPC_BCCTR_FLUSH		stringify_in_c(.long PPC_INST_BCCTR_FLUSH)
 #define	PPC_CP_ABORT		stringify_in_c(.long PPC_RAW_CP_ABORT)
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
index 93004ee586a1..f970d1510d3d 100644
--- a/arch/powerpc/perf/8xx-pmu.c
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -153,10 +153,7 @@ static void mpc8xx_pmu_read(struct perf_event *event)
 
 static void mpc8xx_pmu_del(struct perf_event *event, int flags)
 {
-	struct ppc_inst insn;
-
-	/* mfspr r10, SPRN_SPRG_SCRATCH2 */
-	insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) | __PPC_SPR(SPRN_SPRG_SCRATCH2));
+	struct ppc_inst insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2));
 
 	mpc8xx_pmu_read(event);
 
-- 
2.25.0


^ permalink raw reply related

* Re: [PATCH 1/3] perf/core: Flush PMU internal buffers for per-CPU events
From: Liang, Kan @ 2020-11-24 16:04 UTC (permalink / raw)
  To: Madhavan Srinivasan, Namhyung Kim, Michael Ellerman
  Cc: Ian Rogers, Andi Kleen, Peter Zijlstra, Jiri Olsa, linux-kernel,
	Stephane Eranian, Paul Mackerras, Arnaldo Carvalho de Melo,
	linuxppc-dev, Ingo Molnar, Gabriel Marin
In-Reply-To: <9657dc9f-e1a9-eb7e-8ac2-a108416d5a10@linux.ibm.com>



On 11/24/2020 12:42 AM, Madhavan Srinivasan wrote:
> 
> On 11/24/20 10:21 AM, Namhyung Kim wrote:
>> Hello,
>>
>> On Mon, Nov 23, 2020 at 8:00 PM Michael Ellerman <mpe@ellerman.id.au> 
>> wrote:
>>> Namhyung Kim <namhyung@kernel.org> writes:
>>>> Hi Peter and Kan,
>>>>
>>>> (Adding PPC folks)
>>>>
>>>> On Tue, Nov 17, 2020 at 2:01 PM Namhyung Kim <namhyung@kernel.org> 
>>>> wrote:
>>>>> Hello,
>>>>>
>>>>> On Thu, Nov 12, 2020 at 4:54 AM Liang, Kan 
>>>>> <kan.liang@linux.intel.com> wrote:
>>>>>>
>>>>>>
>>>>>> On 11/11/2020 11:25 AM, Peter Zijlstra wrote:
>>>>>>> On Mon, Nov 09, 2020 at 09:49:31AM -0500, Liang, Kan wrote:
>>>>>>>
>>>>>>>> - When the large PEBS was introduced (9c964efa4330), the 
>>>>>>>> sched_task() should
>>>>>>>> be invoked to flush the PEBS buffer in each context switch. 
>>>>>>>> However, The
>>>>>>>> perf_sched_events in account_event() is not updated accordingly. 
>>>>>>>> The
>>>>>>>> perf_event_task_sched_* never be invoked for a pure per-CPU 
>>>>>>>> context. Only
>>>>>>>> per-task event works.
>>>>>>>>      At that time, the perf_pmu_sched_task() is outside of
>>>>>>>> perf_event_context_sched_in/out. It means that perf has to double
>>>>>>>> perf_pmu_disable() for per-task event.
>>>>>>>> - The patch 1 tries to fix broken per-CPU events. The CPU 
>>>>>>>> context cannot be
>>>>>>>> retrieved from the task->perf_event_ctxp. So it has to be 
>>>>>>>> tracked in the
>>>>>>>> sched_cb_list. Yes, the code is very similar to the original 
>>>>>>>> codes, but it
>>>>>>>> is actually the new code for per-CPU events. The optimization 
>>>>>>>> for per-task
>>>>>>>> events is still kept.
>>>>>>>>     For the case, which has both a CPU context and a task 
>>>>>>>> context, yes, the
>>>>>>>> __perf_pmu_sched_task() in this patch is not invoked. Because the
>>>>>>>> sched_task() only need to be invoked once in a context switch. The
>>>>>>>> sched_task() will be eventually invoked in the task context.
>>>>>>> The thing is; your first two patches rely on PERF_ATTACH_SCHED_CB 
>>>>>>> and
>>>>>>> only set that for large pebs. Are you sure the other users (Intel 
>>>>>>> LBR
>>>>>>> and PowerPC BHRB) don't need it?
>>>>>> I didn't set it for LBR, because the perf_sched_events is always 
>>>>>> enabled
>>>>>> for LBR. But, yes, we should explicitly set the PERF_ATTACH_SCHED_CB
>>>>>> for LBR.
>>>>>>
>>>>>>          if (has_branch_stack(event))
>>>>>>                  inc = true;
>>>>>>
>>>>>>> If they indeed do not require the pmu::sched_task() callback for CPU
>>>>>>> events, then I still think the whole perf_sched_cb_{inc,dec}() 
>>>>>>> interface
>>>>>> No, LBR requires the pmu::sched_task() callback for CPU events.
>>>>>>
>>>>>> Now, The LBR registers have to be reset in sched in even for CPU 
>>>>>> events.
>>>>>>
>>>>>> To fix the shorter LBR callstack issue for CPU events, we also 
>>>>>> need to
>>>>>> save/restore LBRs in pmu::sched_task().
>>>>>> https://lore.kernel.org/lkml/1578495789-95006-4-git-send-email-kan.liang@linux.intel.com/ 
>>>>>>
>>>>>>
>>>>>>> is confusing at best.
>>>>>>>
>>>>>>> Can't we do something like this instead?
>>>>>>>
>>>>>> I think the below patch may have two issues.
>>>>>> - PERF_ATTACH_SCHED_CB is required for LBR (maybe PowerPC BHRB as 
>>>>>> well) now.
>>>>>> - We may disable the large PEBS later if not all PEBS events support
>>>>>> large PEBS. The PMU need a way to notify the generic code to decrease
>>>>>> the nr_sched_task.
>>>>> Any updates on this?  I've reviewed and tested Kan's patches
>>>>> and they all look good.
>>>>>
>>>>> Maybe we can talk to PPC folks to confirm the BHRB case?
>>>> Can we move this forward?  I saw patch 3/3 also adds 
>>>> PERF_ATTACH_SCHED_CB
>>>> for PowerPC too.  But it'd be nice if ppc folks can confirm the change.
>>> Sorry I've read the whole thread, but I'm still not entirely sure I
>>> understand the question.
>> Thanks for your time and sorry about not being clear enough.
>>
>> We found per-cpu events are not calling pmu::sched_task()
>> on context switches.  So PERF_ATTACH_SCHED_CB was
>> added to indicate the core logic that it needs to invoke the
>> callback.
>>
>> The patch 3/3 added the flag to PPC (for BHRB) with other
>> changes (I think it should be split like in the patch 2/3) and
>> want to get ACKs from the PPC folks.
> 
> Sorry for delay.
> 
> I guess first it will be better to split the ppc change to a separate 
> patch,

Both PPC and X86 invokes the perf_sched_cb_inc() directly. The patch 
changes the parameters of the perf_sched_cb_inc(). I think we have to 
update the PPC and X86 codes together. Otherwise, there will be a 
compile error, if someone may only applies the change for the 
perf_sched_cb_inc() but forget to applies the changes in PPC or X86 
specific codes.

> 
> secondly, we are missing the changes needed in the power_pmu_bhrb_disable()
> 
> where perf_sched_cb_dec() needs the "state" to be included.
> 

Ah, right. The below patch should fix the issue.

diff --git a/arch/powerpc/perf/core-book3s.c 
b/arch/powerpc/perf/core-book3s.c
index bced502f64a1..6756d1602a67 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -391,13 +391,18 @@ static void power_pmu_bhrb_enable(struct 
perf_event *event)
  static void power_pmu_bhrb_disable(struct perf_event *event)
  {
  	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+	int state = PERF_SCHED_CB_SW_IN;

  	if (!ppmu->bhrb_nr)
  		return;

  	WARN_ON_ONCE(!cpuhw->bhrb_users);
  	cpuhw->bhrb_users--;
-	perf_sched_cb_dec(event->ctx->pmu);
+
+	if (!(event->attach_state & PERF_ATTACH_TASK))
+		state |= PERF_SCHED_CB_CPU;
+
+	perf_sched_cb_dec(event->ctx->pmu, state);

  	if (!cpuhw->disabled && !cpuhw->bhrb_users) {
  		/* BHRB cannot be turned off when other



Thanks,
Kan

^ permalink raw reply related

* eBPF on powerpc
From: Naveen N. Rao @ 2020-11-24 16:35 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev@lists.ozlabs.org
In-Reply-To: <d69650b0-4024-5759-3ccb-ede5c0394500@csgroup.eu>

Hi Christophe,

Christophe Leroy wrote:
> Hi Naveen,
> 
> Few years ago, you implemented eBPF on PPC64.
> 
> Is there any reason for implementing it for PPC64 only ?

I focused on ppc64 since eBPF is a 64-bit VM and it was more 
straight-forward to target.

> Is there something that makes it impossible to have eBPF for PPC32 as 
> well ?

No, I just wasn't sure if it would be performant enough to warrant it.  
Since then however, there have been arm32 and riscv 32-bit JIT 
implementations and atleast the arm32 JIT seems to be showing ~50% 
better performance compared to the interpreter (*). So, it would be 
worthwhile to add support for ppc32.

Note that there might be a few instructions which would be difficult to 
support on 32-bit, but those can fallback to the interpreter, while 
allowing other programs to be JIT'ed.


- Naveen

(*) 
http://lkml.kernel.org/r/CAGXu5jLYunVCJGCfHPebKDaoQ71hdMGq4HhdDxTYpBQw_HXUYQ@mail.gmail.com
(*) http://lkml.kernel.org/r/b63fae4b-cb74-1928-b210-80914f3c8995@fb.com
(*) http://lkml.kernel.org/r/20200305050207.4159-1-luke.r.nels@gmail.com

^ permalink raw reply

* [PATCH net 0/2] ibmvnic: Bug fixes for queue descriptor processing
From: Thomas Falcon @ 2020-11-24 17:26 UTC (permalink / raw)
  To: netdev
  Cc: cforno12, ljp, ricklind, dnbanerg, tlfalcon, drt, brking, sukadev,
	linuxppc-dev

This series resolves a few issues in the ibmvnic driver's
RX buffer and TX completion processing. The first patch
includes memory barriers to synchronize queue descriptor
reads. The second patch fixes a memory leak that could
occur if the device returns a TX completion with an error
code in the descriptor, in which case the respective socket
buffer and other relevant data structures may not be freed
or updated properly.

Thomas Falcon (2):
  ibmvnic: Ensure that SCRQ entry reads are correctly ordered
  ibmvnic: Fix TX completion error handling

 drivers/net/ethernet/ibm/ibmvnic.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

-- 
1.8.3.1


^ permalink raw reply

* [PATCH net 1/2] ibmvnic: Ensure that SCRQ entry reads are correctly ordered
From: Thomas Falcon @ 2020-11-24 17:26 UTC (permalink / raw)
  To: netdev
  Cc: cforno12, ljp, ricklind, dnbanerg, tlfalcon, drt, brking, sukadev,
	linuxppc-dev
In-Reply-To: <1606238776-30259-1-git-send-email-tlfalcon@linux.ibm.com>

Ensure that received Subordinate Command-Response Queue (SCRQ)
entries are properly read in order by the driver. These queues
are used in the ibmvnic device to process RX buffer and TX completion
descriptors. dma_rmb barriers have been added after checking for a
pending descriptor to ensure the correct descriptor entry is checked
and after reading the SCRQ descriptor to ensure the entire
descriptor is read before processing.

Fixes: 032c5e828 ("Driver for IBM System i/p VNIC protocol")
Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 2aa40b2..489ed5e 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2403,6 +2403,8 @@ static int ibmvnic_poll(struct napi_struct *napi, int budget)
 
 		if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num]))
 			break;
+		/* ensure that we do not prematurely exit the polling loop */
+		dma_rmb();
 		next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]);
 		rx_buff =
 		    (struct ibmvnic_rx_buff *)be64_to_cpu(next->
@@ -3098,6 +3100,9 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 		unsigned int pool = scrq->pool_index;
 		int num_entries = 0;
 
+		/* ensure that the correct descriptor entry is read */
+		dma_rmb();
+
 		next = ibmvnic_next_scrq(adapter, scrq);
 		for (i = 0; i < next->tx_comp.num_comps; i++) {
 			if (next->tx_comp.rcs[i]) {
@@ -3498,6 +3503,9 @@ static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter,
 	}
 	spin_unlock_irqrestore(&scrq->lock, flags);
 
+	/* ensure that the entire SCRQ descriptor is read */
+	dma_rmb();
+
 	return entry;
 }
 
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH net 2/2] ibmvnic: Fix TX completion error handling
From: Thomas Falcon @ 2020-11-24 17:26 UTC (permalink / raw)
  To: netdev
  Cc: cforno12, ljp, ricklind, dnbanerg, tlfalcon, drt, brking, sukadev,
	linuxppc-dev
In-Reply-To: <1606238776-30259-1-git-send-email-tlfalcon@linux.ibm.com>

TX completions received with an error return code are not
being processed properly. When an error code is seen, do not
proceed to the next completion before cleaning up the existing
entry's data structures.

Fixes: 032c5e828 ("Driver for IBM System i/p VNIC protocol")
Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 489ed5e..7097bcb 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -3105,11 +3105,9 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 
 		next = ibmvnic_next_scrq(adapter, scrq);
 		for (i = 0; i < next->tx_comp.num_comps; i++) {
-			if (next->tx_comp.rcs[i]) {
+			if (next->tx_comp.rcs[i])
 				dev_err(dev, "tx error %x\n",
 					next->tx_comp.rcs[i]);
-				continue;
-			}
 			index = be32_to_cpu(next->tx_comp.correlators[i]);
 			if (index & IBMVNIC_TSO_POOL_MASK) {
 				tx_pool = &adapter->tso_pool[pool];
-- 
1.8.3.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox