LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 4/4] KVM: PPC: Book3S HV: Use POWER9 SLBIA IH=6 variant to clear SLB
From: Nicholas Piggin @ 2021-01-18  6:28 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210118062809.1430920-1-npiggin@gmail.com>

IH=6 may preserve hypervisor real-mode ERAT entries and is the
recommended SLBIA hint for switching partitions.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 9f0fdbae4b44..8cf1f69f442e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -898,7 +898,7 @@ BEGIN_MMU_FTR_SECTION
 	/* Radix host won't have populated the SLB, so no need to clear */
 	li	r6, 0
 	slbmte	r6, r6
-	slbia
+	PPC_SLBIA(6)
 	ptesync
 END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
@@ -1506,7 +1506,7 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	/* Finally clear out the SLB */
 	li	r0,0
 	slbmte	r0,r0
-	slbia
+	PPC_SLBIA(6)
 	ptesync
 	stw	r5,VCPU_SLB_MAX(r9)
 
@@ -3329,7 +3329,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 
 	/* Clear hash and radix guest SLB, see guest_exit_short_path comment. */
 	slbmte	r0, r0
-	slbia
+	PPC_SLBIA(6)
 
 BEGIN_MMU_FTR_SECTION
 	b	4f
-- 
2.23.0


^ permalink raw reply related

* [PATCH 3/4] KVM: PPC: Book3S HV: No need to clear radix host SLB before loading guest
From: Nicholas Piggin @ 2021-01-18  6:28 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210118062809.1430920-1-npiggin@gmail.com>

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 0e1f5bf168a1..9f0fdbae4b44 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -888,15 +888,19 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	cmpdi	r3, 512		/* 1 microsecond */
 	blt	hdec_soon
 
-	/* For hash guest, clear out and reload the SLB */
 	ld	r6, VCPU_KVM(r4)
 	lbz	r0, KVM_RADIX(r6)
 	cmpwi	r0, 0
 	bne	9f
+
+	/* For hash guest, clear out and reload the SLB */
+BEGIN_MMU_FTR_SECTION
+	/* Radix host won't have populated the SLB, so no need to clear */
 	li	r6, 0
 	slbmte	r6, r6
 	slbia
 	ptesync
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
 	/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
 	lwz	r5,VCPU_SLB_MAX(r4)
-- 
2.23.0


^ permalink raw reply related

* [PATCH 2/4] KVM: PPC: Book3S HV: Fix radix guest SLB side channel
From: Nicholas Piggin @ 2021-01-18  6:28 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210118062809.1430920-1-npiggin@gmail.com>

The slbmte instruction is legal in radix mode, including radix guest
mode. This means radix guests can load the SLB with arbitrary data.

KVM host does not clear the SLB when exiting a guest if it was a
radix guest, which would allow a rogue radix guest to use the SLB as
a side channel to communicate with other guests.

Fix this by ensuring the SLB is cleared when coming out of a radix
guest. Only the first 4 entries are a concern, because radix guests
always run with LPCR[UPRT]=1, which limits the reach of slbmte. slbia
is not used (except in a non-performance-critical path) because it
can clear cached translations.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 39 ++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index d5a9b57ec129..0e1f5bf168a1 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1157,6 +1157,20 @@ EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
 	mr	r4, r3
 	b	fast_guest_entry_c
 guest_exit_short_path:
+	/*
+	 * Malicious or buggy radix guests may have inserted SLB entries
+	 * (only 0..3 because radix always runs with UPRT=1), so these must
+	 * be cleared here to avoid side-channels. slbmte is used rather
+	 * than slbia, as it won't clear cached translations.
+	 */
+	li	r0,0
+	slbmte	r0,r0
+	li	r4,1
+	slbmte	r0,r4
+	li	r4,2
+	slbmte	r0,r4
+	li	r4,3
+	slbmte	r0,r4
 
 	li	r0, KVM_GUEST_MODE_NONE
 	stb	r0, HSTATE_IN_GUEST(r13)
@@ -1469,7 +1483,7 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	lbz	r0, KVM_RADIX(r5)
 	li	r5, 0
 	cmpwi	r0, 0
-	bne	3f			/* for radix, save 0 entries */
+	bne	0f			/* for radix, save 0 entries */
 	lwz	r0,VCPU_SLB_NR(r9)	/* number of entries in SLB */
 	mtctr	r0
 	li	r6,0
@@ -1490,12 +1504,9 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	slbmte	r0,r0
 	slbia
 	ptesync
-3:	stw	r5,VCPU_SLB_MAX(r9)
+	stw	r5,VCPU_SLB_MAX(r9)
 
 	/* load host SLB entries */
-BEGIN_MMU_FTR_SECTION
-	b	0f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 	ld	r8,PACA_SLBSHADOWPTR(r13)
 
 	.rept	SLB_NUM_BOLTED
@@ -1508,7 +1519,17 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 	slbmte	r6,r5
 1:	addi	r8,r8,16
 	.endr
-0:
+	b	guest_bypass
+
+0:	/* Sanitise radix guest SLB, see guest_exit_short_path comment. */
+	li	r0,0
+	slbmte	r0,r0
+	li	r4,1
+	slbmte	r0,r4
+	li	r4,2
+	slbmte	r0,r4
+	li	r4,3
+	slbmte	r0,r4
 
 guest_bypass:
 	stw	r12, STACK_SLOT_TRAP(r1)
@@ -3302,12 +3323,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	mtspr	SPRN_CIABR, r0
 	mtspr	SPRN_DAWRX0, r0
 
+	/* Clear hash and radix guest SLB, see guest_exit_short_path comment. */
+	slbmte	r0, r0
+	slbia
+
 BEGIN_MMU_FTR_SECTION
 	b	4f
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 
-	slbmte	r0, r0
-	slbia
 	ptesync
 	ld	r8, PACA_SLBSHADOWPTR(r13)
 	.rept	SLB_NUM_BOLTED
-- 
2.23.0


^ permalink raw reply related

* [PATCH 0/4] a few KVM patches
From: Nicholas Piggin @ 2021-01-18  6:28 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin

These patches are unrelated except touching some of the same code.
The first two fix actual guest exploitable issues, the other two try
to tidy up SLB management slightly.

Thanks,
Nick

Nicholas Piggin (4):
  KVM: PPC: Book3S HV: Remove support for running HPT guest on RPT host
    without mixed mode support
  KVM: PPC: Book3S HV: Fix radix guest SLB side channel
  KVM: PPC: Book3S HV: No need to clear radix host SLB before loading
    guest
  KVM: PPC: Book3S HV: Use POWER9 SLBIA IH=6 variant to clear SLB

 arch/powerpc/include/asm/kvm_book3s_asm.h |  11 --
 arch/powerpc/kernel/asm-offsets.c         |   3 -
 arch/powerpc/kvm/book3s_hv.c              |  56 ++--------
 arch/powerpc/kvm/book3s_hv_builtin.c      | 108 +-----------------
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 129 ++++++++++------------
 5 files changed, 70 insertions(+), 237 deletions(-)

-- 
2.23.0


^ permalink raw reply

* [PATCH 1/4] KVM: PPC: Book3S HV: Remove support for running HPT guest on RPT host without mixed mode support
From: Nicholas Piggin @ 2021-01-18  6:28 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210118062809.1430920-1-npiggin@gmail.com>

This reverts much of commit c01015091a770 ("KVM: PPC: Book3S HV: Run HPT
guests on POWER9 radix hosts"), which was required to run HPT guests on
RPT hosts on early POWER9 CPUs without support for "mixed mode", which
meant the host could not run with MMU on while guests were running.

This code has some corner case bugs, e.g., when the guest hits a machine
check or HMI the primary locks up waiting for secondaries to switch LPCR
to host, which they never do. This could all be fixed in software, but
most CPUs in production have mixed mode support, and those that don't
are believed to be all in installations that don't use this capability.
So simplify things and remove support.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/kvm_book3s_asm.h |  11 ---
 arch/powerpc/kernel/asm-offsets.c         |   3 -
 arch/powerpc/kvm/book3s_hv.c              |  56 +++--------
 arch/powerpc/kvm/book3s_hv_builtin.c      | 108 +---------------------
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |  80 ++++------------
 5 files changed, 32 insertions(+), 226 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 078f4648ea27..b6d31bff5209 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -74,16 +74,6 @@ struct kvm_split_mode {
 	u8		do_nap;
 	u8		napped[MAX_SMT_THREADS];
 	struct kvmppc_vcore *vc[MAX_SUBCORES];
-	/* Bits for changing lpcr on P9 */
-	unsigned long	lpcr_req;
-	unsigned long	lpidr_req;
-	unsigned long	host_lpcr;
-	u32		do_set;
-	u32		do_restore;
-	union {
-		u32	allphases;
-		u8	phase[4];
-	} lpcr_sync;
 };
 
 /*
@@ -110,7 +100,6 @@ struct kvmppc_host_state {
 	u8 hwthread_state;
 	u8 host_ipi;
 	u8 ptid;		/* thread number within subcore when split */
-	u8 tid;			/* thread number within whole core */
 	u8 fake_suspend;
 	struct kvm_vcpu *kvm_vcpu;
 	struct kvmppc_vcore *kvm_vcore;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index b12d7c049bfe..489a22cf1a92 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -668,7 +668,6 @@ int main(void)
 	HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
 	HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
 	HSTATE_FIELD(HSTATE_PTID, ptid);
-	HSTATE_FIELD(HSTATE_TID, tid);
 	HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend);
 	HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
 	HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
@@ -698,8 +697,6 @@ int main(void)
 	OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar);
 	OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap);
 	OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped);
-	OFFSET(KVM_SPLIT_DO_SET, kvm_split_mode, do_set);
-	OFFSET(KVM_SPLIT_DO_RESTORE, kvm_split_mode, do_restore);
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6f612d240392..2d8627dbd9f6 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -134,7 +134,7 @@ static inline bool nesting_enabled(struct kvm *kvm)
 }
 
 /* If set, the threads on each CPU core have to be in the same MMU mode */
-static bool no_mixing_hpt_and_radix;
+static bool no_mixing_hpt_and_radix __read_mostly;
 
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
@@ -2862,11 +2862,6 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
 	if (one_vm_per_core && vc->kvm != cip->vc[0]->kvm)
 		return false;
 
-	/* Some POWER9 chips require all threads to be in the same MMU mode */
-	if (no_mixing_hpt_and_radix &&
-	    kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
-		return false;
-
 	if (n_threads < cip->max_subcore_threads)
 		n_threads = cip->max_subcore_threads;
 	if (!subcore_config_ok(cip->n_subcores + 1, n_threads))
@@ -2905,6 +2900,9 @@ static void prepare_threads(struct kvmppc_vcore *vc)
 	for_each_runnable_thread(i, vcpu, vc) {
 		if (signal_pending(vcpu->arch.run_task))
 			vcpu->arch.ret = -EINTR;
+		else if (no_mixing_hpt_and_radix &&
+			 kvm_is_radix(vc->kvm) != radix_enabled())
+			vcpu->arch.ret = -EINVAL;
 		else if (vcpu->arch.vpa.update_pending ||
 			 vcpu->arch.slb_shadow.update_pending ||
 			 vcpu->arch.dtl.update_pending)
@@ -3110,7 +3108,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	int controlled_threads;
 	int trap;
 	bool is_power8;
-	bool hpt_on_radix;
 
 	/*
 	 * Remove from the list any threads that have a signal pending
@@ -3143,11 +3140,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	 * this is a HPT guest on a radix host machine where the
 	 * CPU threads may not be in different MMU modes.
 	 */
-	hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() &&
-		!kvm_is_radix(vc->kvm);
-	if (((controlled_threads > 1) &&
-	     ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
-	    (hpt_on_radix && vc->kvm->arch.threads_indep)) {
+	if ((controlled_threads > 1) &&
+	    ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
 		for_each_runnable_thread(i, vcpu, vc) {
 			vcpu->arch.ret = -EBUSY;
 			kvmppc_remove_runnable(vc, vcpu);
@@ -3215,7 +3209,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
 		&& !cpu_has_feature(CPU_FTR_ARCH_300);
 
-	if (split > 1 || hpt_on_radix) {
+	if (split > 1) {
 		sip = &split_info;
 		memset(&split_info, 0, sizeof(split_info));
 		for (sub = 0; sub < core_info.n_subcores; ++sub)
@@ -3237,13 +3231,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 			split_info.subcore_size = subcore_size;
 		} else {
 			split_info.subcore_size = 1;
-			if (hpt_on_radix) {
-				/* Use the split_info for LPCR/LPIDR changes */
-				split_info.lpcr_req = vc->lpcr;
-				split_info.lpidr_req = vc->kvm->arch.lpid;
-				split_info.host_lpcr = vc->kvm->arch.host_lpcr;
-				split_info.do_set = 1;
-			}
 		}
 
 		/* order writes to split_info before kvm_split_mode pointer */
@@ -3253,7 +3240,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	for (thr = 0; thr < controlled_threads; ++thr) {
 		struct paca_struct *paca = paca_ptrs[pcpu + thr];
 
-		paca->kvm_hstate.tid = thr;
 		paca->kvm_hstate.napping = 0;
 		paca->kvm_hstate.kvm_split_mode = sip;
 	}
@@ -3327,10 +3313,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	 * When doing micro-threading, poke the inactive threads as well.
 	 * This gets them to the nap instruction after kvm_do_nap,
 	 * which reduces the time taken to unsplit later.
-	 * For POWER9 HPT guest on radix host, we need all the secondary
-	 * threads woken up so they can do the LPCR/LPIDR change.
 	 */
-	if (cmd_bit || hpt_on_radix) {
+	if (cmd_bit) {
 		split_info.do_nap = 1;	/* ask secondaries to nap when done */
 		for (thr = 1; thr < threads_per_subcore; ++thr)
 			if (!(active & (1 << thr)))
@@ -3391,19 +3375,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 			cpu_relax();
 			++loops;
 		}
-	} else if (hpt_on_radix) {
-		/* Wait for all threads to have seen final sync */
-		for (thr = 1; thr < controlled_threads; ++thr) {
-			struct paca_struct *paca = paca_ptrs[pcpu + thr];
-
-			while (paca->kvm_hstate.kvm_split_mode) {
-				HMT_low();
-				barrier();
-			}
-			HMT_medium();
-		}
+		split_info.do_nap = 0;
 	}
-	split_info.do_nap = 0;
 
 	kvmppc_set_host_core(pcpu);
 
@@ -4173,7 +4146,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	kvmppc_clear_host_core(pcpu);
 
-	local_paca->kvm_hstate.tid = 0;
 	local_paca->kvm_hstate.napping = 0;
 	local_paca->kvm_hstate.kvm_split_mode = NULL;
 	kvmppc_start_thread(vcpu, vc);
@@ -4358,15 +4330,11 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 
 	do {
 		/*
-		 * The early POWER9 chips that can't mix radix and HPT threads
-		 * on the same core also need the workaround for the problem
-		 * where the TLB would prefetch entries in the guest exit path
-		 * for radix guests using the guest PIDR value and LPID 0.
-		 * The workaround is in the old path (kvmppc_run_vcpu())
-		 * but not the new path (kvmhv_run_single_vcpu()).
+		 * The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
+		 * path, which also handles hash and dependent threads mode.
 		 */
 		if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
-		    !no_mixing_hpt_and_radix)
+		    !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
 			r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
 						  vcpu->arch.vcore->lpcr);
 		else
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 8053efdf7ea7..f3d3183249fe 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -277,8 +277,7 @@ void kvmhv_commence_exit(int trap)
 	struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
 	int ptid = local_paca->kvm_hstate.ptid;
 	struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
-	int me, ee, i, t;
-	int cpu0;
+	int me, ee, i;
 
 	/* Set our bit in the threads-exiting-guest map in the 0xff00
 	   bits of vcore->entry_exit_map */
@@ -320,22 +319,6 @@ void kvmhv_commence_exit(int trap)
 		if ((ee >> 8) == 0)
 			kvmhv_interrupt_vcore(vc, ee);
 	}
-
-	/*
-	 * On POWER9 when running a HPT guest on a radix host (sip != NULL),
-	 * we have to interrupt inactive CPU threads to get them to
-	 * restore the host LPCR value.
-	 */
-	if (sip->lpcr_req) {
-		if (cmpxchg(&sip->do_restore, 0, 1) == 0) {
-			vc = local_paca->kvm_hstate.kvm_vcore;
-			cpu0 = vc->pcpu + ptid - local_paca->kvm_hstate.tid;
-			for (t = 1; t < threads_per_core; ++t) {
-				if (sip->napped[t])
-					kvmhv_rm_send_ipi(cpu0 + t);
-			}
-		}
-	}
 }
 
 struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
@@ -667,95 +650,6 @@ void kvmppc_bad_interrupt(struct pt_regs *regs)
 	panic("Bad KVM trap");
 }
 
-/*
- * Functions used to switch LPCR HR and UPRT bits on all threads
- * when entering and exiting HPT guests on a radix host.
- */
-
-#define PHASE_REALMODE		1	/* in real mode */
-#define PHASE_SET_LPCR		2	/* have set LPCR */
-#define PHASE_OUT_OF_GUEST	4	/* have finished executing in guest */
-#define PHASE_RESET_LPCR	8	/* have reset LPCR to host value */
-
-#define ALL(p)		(((p) << 24) | ((p) << 16) | ((p) << 8) | (p))
-
-static void wait_for_sync(struct kvm_split_mode *sip, int phase)
-{
-	int thr = local_paca->kvm_hstate.tid;
-
-	sip->lpcr_sync.phase[thr] |= phase;
-	phase = ALL(phase);
-	while ((sip->lpcr_sync.allphases & phase) != phase) {
-		HMT_low();
-		barrier();
-	}
-	HMT_medium();
-}
-
-void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip)
-{
-	int num_sets;
-	unsigned long rb, set;
-
-	/* wait for every other thread to get to real mode */
-	wait_for_sync(sip, PHASE_REALMODE);
-
-	/* Set LPCR and LPIDR */
-	mtspr(SPRN_LPCR, sip->lpcr_req);
-	mtspr(SPRN_LPID, sip->lpidr_req);
-	isync();
-
-	/*
-	 * P10 will flush all the congruence class with a single tlbiel
-	 */
-	if (cpu_has_feature(CPU_FTR_ARCH_31))
-		num_sets =  1;
-	else
-		num_sets = POWER9_TLB_SETS_RADIX;
-
-	/* Invalidate the TLB on thread 0 */
-	if (local_paca->kvm_hstate.tid == 0) {
-		sip->do_set = 0;
-		asm volatile("ptesync" : : : "memory");
-		for (set = 0; set < num_sets; ++set) {
-			rb = TLBIEL_INVAL_SET_LPID +
-				(set << TLBIEL_INVAL_SET_SHIFT);
-			asm volatile(PPC_TLBIEL(%0, %1, 0, 0, 0) : :
-				     "r" (rb), "r" (0));
-		}
-		asm volatile("ptesync" : : : "memory");
-	}
-
-	/* indicate that we have done so and wait for others */
-	wait_for_sync(sip, PHASE_SET_LPCR);
-	/* order read of sip->lpcr_sync.allphases vs. sip->do_set */
-	smp_rmb();
-}
-
-/*
- * Called when a thread that has been in the guest needs
- * to reload the host LPCR value - but only on POWER9 when
- * running a HPT guest on a radix host.
- */
-void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
-{
-	/* we're out of the guest... */
-	wait_for_sync(sip, PHASE_OUT_OF_GUEST);
-
-	mtspr(SPRN_LPID, 0);
-	mtspr(SPRN_LPCR, sip->host_lpcr);
-	isync();
-
-	if (local_paca->kvm_hstate.tid == 0) {
-		sip->do_restore = 0;
-		smp_wmb();	/* order store of do_restore vs. phase */
-	}
-
-	wait_for_sync(sip, PHASE_RESET_LPCR);
-	smp_mb();
-	local_paca->kvm_hstate.kvm_split_mode = NULL;
-}
-
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.ceded = 0;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index cd9995ee8441..d5a9b57ec129 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -85,19 +85,6 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
 	RFI_TO_KERNEL
 
 kvmppc_call_hv_entry:
-BEGIN_FTR_SECTION
-	/* On P9, do LPCR setting, if necessary */
-	ld	r3, HSTATE_SPLIT_MODE(r13)
-	cmpdi	r3, 0
-	beq	46f
-	lwz	r4, KVM_SPLIT_DO_SET(r3)
-	cmpwi	r4, 0
-	beq	46f
-	bl	kvmhv_p9_set_lpcr
-	nop
-46:
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
 	ld	r4, HSTATE_KVM_VCPU(r13)
 	bl	kvmppc_hv_entry
 
@@ -361,11 +348,11 @@ kvm_secondary_got_guest:
 	LOAD_REG_ADDR(r6, decrementer_max)
 	ld	r6, 0(r6)
 	mtspr	SPRN_HDEC, r6
+BEGIN_FTR_SECTION
 	/* and set per-LPAR registers, if doing dynamic micro-threading */
 	ld	r6, HSTATE_SPLIT_MODE(r13)
 	cmpdi	r6, 0
 	beq	63f
-BEGIN_FTR_SECTION
 	ld	r0, KVM_SPLIT_RPR(r6)
 	mtspr	SPRN_RPR, r0
 	ld	r0, KVM_SPLIT_PMMAR(r6)
@@ -373,16 +360,7 @@ BEGIN_FTR_SECTION
 	ld	r0, KVM_SPLIT_LDBAR(r6)
 	mtspr	SPRN_LDBAR, r0
 	isync
-FTR_SECTION_ELSE
-	/* On P9 we use the split_info for coordinating LPCR changes */
-	lwz	r4, KVM_SPLIT_DO_SET(r6)
-	cmpwi	r4, 0
-	beq	1f
-	mr	r3, r6
-	bl	kvmhv_p9_set_lpcr
-	nop
-1:
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 63:
 	/* Order load of vcpu after load of vcore */
 	lwsync
@@ -452,19 +430,15 @@ kvm_no_guest:
 	mtcr	r5
 	blr
 
-53:	HMT_LOW
+53:
+BEGIN_FTR_SECTION
+	HMT_LOW
 	ld	r5, HSTATE_KVM_VCORE(r13)
 	cmpdi	r5, 0
 	bne	60f
 	ld	r3, HSTATE_SPLIT_MODE(r13)
 	cmpdi	r3, 0
 	beq	kvm_no_guest
-	lwz	r0, KVM_SPLIT_DO_SET(r3)
-	cmpwi	r0, 0
-	bne	kvmhv_do_set
-	lwz	r0, KVM_SPLIT_DO_RESTORE(r3)
-	cmpwi	r0, 0
-	bne	kvmhv_do_restore
 	lbz	r0, KVM_SPLIT_DO_NAP(r3)
 	cmpwi	r0, 0
 	beq	kvm_no_guest
@@ -472,24 +446,19 @@ kvm_no_guest:
 	b	kvm_unsplit_nap
 60:	HMT_MEDIUM
 	b	kvm_secondary_got_guest
+FTR_SECTION_ELSE
+	HMT_LOW
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	cmpdi	r5, 0
+	beq	kvm_no_guest
+	HMT_MEDIUM
+	b	kvm_secondary_got_guest
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 
 54:	li	r0, KVM_HWTHREAD_IN_KVM
 	stb	r0, HSTATE_HWTHREAD_STATE(r13)
 	b	kvm_no_guest
 
-kvmhv_do_set:
-	/* Set LPCR, LPIDR etc. on P9 */
-	HMT_MEDIUM
-	bl	kvmhv_p9_set_lpcr
-	nop
-	b	kvm_no_guest
-
-kvmhv_do_restore:
-	HMT_MEDIUM
-	bl	kvmhv_p9_restore_lpcr
-	nop
-	b	kvm_no_guest
-
 /*
  * Here the primary thread is trying to return the core to
  * whole-core mode, so we need to nap.
@@ -527,7 +496,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	/* Set kvm_split_mode.napped[tid] = 1 */
 	ld	r3, HSTATE_SPLIT_MODE(r13)
 	li	r0, 1
-	lbz	r4, HSTATE_TID(r13)
+	lhz	r4, PACAPACAINDEX(r13)
+	clrldi	r4, r4, 61	/* micro-threading => P8 => 8 threads/core */
 	addi	r4, r4, KVM_SPLIT_NAPPED
 	stbx	r0, r3, r4
 	/* Check the do_nap flag again after setting napped[] */
@@ -1938,24 +1908,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 19:	lis	r8,0x7fff		/* MAX_INT@h */
 	mtspr	SPRN_HDEC,r8
 
-16:
-BEGIN_FTR_SECTION
-	/* On POWER9 with HPT-on-radix we need to wait for all other threads */
-	ld	r3, HSTATE_SPLIT_MODE(r13)
-	cmpdi	r3, 0
-	beq	47f
-	lwz	r8, KVM_SPLIT_DO_RESTORE(r3)
-	cmpwi	r8, 0
-	beq	47f
-	bl	kvmhv_p9_restore_lpcr
-	nop
-	b	48f
-47:
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	ld	r8,KVM_HOST_LPCR(r4)
+16:	ld	r8,KVM_HOST_LPCR(r4)
 	mtspr	SPRN_LPCR,r8
 	isync
-48:
+
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
 	/* Finish timing, if we have a vcpu */
 	ld	r4, HSTATE_KVM_VCPU(r13)
@@ -2779,8 +2735,10 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 	beq	kvm_end_cede
 	cmpwi	r0, NAPPING_NOVCPU
 	beq	kvm_novcpu_wakeup
+BEGIN_FTR_SECTION
 	cmpwi	r0, NAPPING_UNSPLIT
 	beq	kvm_unsplit_wakeup
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	twi	31,0,0 /* Nap state must not be zero */
 
 33:	mr	r4, r3
-- 
2.23.0


^ permalink raw reply related

* Re: [PATCH 10/18] arch: powerpc: Stop building and using oprofile
From: Viresh Kumar @ 2021-01-18  4:47 UTC (permalink / raw)
  To: Linus Torvalds, Robert Richter, Michael Ellerman,
	Benjamin Herrenschmidt, Paul Mackerras, Arnd Bergmann,
	Jeremy Kerr
  Cc: Arnd Bergmann, Vincent Guittot, Christoph Hellwig, linux-kernel,
	anmar.oueja, oprofile-list, Alexander Viro, William Cohen,
	linuxppc-dev
In-Reply-To: <fd155a0a1e52650ddc9ec57a1d211fdc777beddb.1610622251.git.viresh.kumar@linaro.org>

On 14-01-21, 17:05, Viresh Kumar wrote:
> The "oprofile" user-space tools don't use the kernel OPROFILE support
> any more, and haven't in a long time. User-space has been converted to
> the perf interfaces.
> 
> This commits stops building oprofile for powerpc and removes any
> reference to it from directories in arch/powerpc/ apart from
> arch/powerpc/oprofile, which will be removed in the next commit (this is
> broken into two commits as the size of the commit became very big, ~5k
> lines).
> 
> Note that the member "oprofile_cpu_type" in "struct cpu_spec" isn't
> removed as it was also used by other parts of the code.
> 
> Suggested-by: Christoph Hellwig <hch@infradead.org>
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
> ---
>  arch/powerpc/Kconfig                          |   1 -
>  arch/powerpc/Makefile                         |   2 -
>  arch/powerpc/configs/44x/akebono_defconfig    |   1 -
>  arch/powerpc/configs/44x/currituck_defconfig  |   1 -
>  arch/powerpc/configs/44x/fsp2_defconfig       |   1 -
>  arch/powerpc/configs/44x/iss476-smp_defconfig |   1 -
>  arch/powerpc/configs/cell_defconfig           |   1 -
>  arch/powerpc/configs/g5_defconfig             |   1 -
>  arch/powerpc/configs/maple_defconfig          |   1 -
>  arch/powerpc/configs/pasemi_defconfig         |   1 -
>  arch/powerpc/configs/pmac32_defconfig         |   1 -
>  arch/powerpc/configs/powernv_defconfig        |   1 -
>  arch/powerpc/configs/ppc64_defconfig          |   1 -
>  arch/powerpc/configs/ppc64e_defconfig         |   1 -
>  arch/powerpc/configs/ppc6xx_defconfig         |   1 -
>  arch/powerpc/configs/ps3_defconfig            |   1 -
>  arch/powerpc/configs/pseries_defconfig        |   1 -
>  arch/powerpc/include/asm/cputable.h           |  20 ---
>  arch/powerpc/include/asm/oprofile_impl.h      | 135 ------------------
>  arch/powerpc/include/asm/spu.h                |  33 -----
>  arch/powerpc/kernel/cputable.c                |  67 ---------
>  arch/powerpc/kernel/dt_cpu_ftrs.c             |   2 -
>  arch/powerpc/platforms/cell/Kconfig           |   5 -
>  arch/powerpc/platforms/cell/spu_notify.c      |  55 -------

+ this..

diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index 10064a33ca96..7ea6692f67e2 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -19,7 +19,6 @@ spu-priv1-$(CONFIG_PPC_CELL_COMMON)   += spu_priv1_mmio.o
 spu-manage-$(CONFIG_PPC_CELL_COMMON)   += spu_manage.o
 
 obj-$(CONFIG_SPU_BASE)                 += spu_callbacks.o spu_base.o \
-                                          spu_notify.o \
                                           spu_syscalls.o \
                                           $(spu-priv1-y) \
                                           $(spu-manage-y) \

-- 
viresh

^ permalink raw reply related

* Re: [PATCH 5/6] powerpc/rtas: rename RTAS_RMOBUF_MAX to RTAS_USER_REGION_SIZE
From: Alexey Kardashevskiy @ 2021-01-18  4:15 UTC (permalink / raw)
  To: Nathan Lynch, linuxppc-dev; +Cc: tyreld, brking, ajd, aneesh.kumar
In-Reply-To: <87a6taxkgf.fsf@linux.ibm.com>



On 16/01/2021 02:56, Nathan Lynch wrote:
> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>> On 15/01/2021 09:00, Nathan Lynch wrote:
>>> diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
>>> index 332e1000ca0f..1aa7ab1cbc84 100644
>>> --- a/arch/powerpc/include/asm/rtas.h
>>> +++ b/arch/powerpc/include/asm/rtas.h
>>> @@ -19,8 +19,11 @@
>>>    #define RTAS_UNKNOWN_SERVICE (-1)
>>>    #define RTAS_INSTANTIATE_MAX (1ULL<<30) /* Don't instantiate rtas at/above this value */
>>>    
>>> -/* Buffer size for ppc_rtas system call. */
>>> -#define RTAS_RMOBUF_MAX (64 * 1024)
>>> +/* Work areas shared with RTAS must be 4K, naturally aligned. */
>>
>> Why exactly 4K and not (for example) PAGE_SIZE?
> 
> 4K is a platform requirement and isn't related to Linux's configured
> page size. See the PAPR specification for RTAS functions such as
> ibm,configure-connector, ibm,update-nodes, ibm,update-properties.

Good, since we are documenting things here - add to the comment ("per 
PAPR")?


> There are other calls with work area parameters where alignment isn't
> specified (e.g. ibm,get-system-parameter) but 4KB alignment is a safe
> choice for those.
> 
>>> +#define RTAS_WORK_AREA_SIZE   4096
>>> +
>>> +/* Work areas allocated for user space access. */
>>> +#define RTAS_USER_REGION_SIZE (RTAS_WORK_AREA_SIZE * 16)
>>
>> This is still 64K but no clarity why. There is 16 of something, what
>> is it?
> 
> There are 16 4KB work areas in the region. I can name it
> RTAS_NR_USER_WORK_AREAS or similar.


Why 16? PAPR (then add "per PAPR") or we just like 16 ("should be enough")?


-- 
Alexey

^ permalink raw reply

* Re: [PATCH 6/6] powerpc/rtas: constrain user region allocation to RMA
From: Alexey Kardashevskiy @ 2021-01-18  4:12 UTC (permalink / raw)
  To: Nathan Lynch, linuxppc-dev; +Cc: tyreld, brking, ajd, aneesh.kumar
In-Reply-To: <87czy6xlap.fsf@linux.ibm.com>



On 16/01/2021 02:38, Nathan Lynch wrote:
> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>> On 15/01/2021 09:00, Nathan Lynch wrote:
>>> Memory locations passed as arguments from the OS to RTAS usually need
>>> to be addressable in 32-bit mode and must reside in the Real Mode
>>> Area. On PAPR guests, the RMA starts at logical address 0 and is the
>>> first logical memory block reported in the LPAR’s device tree.
>>>
>>> On powerpc targets with RTAS, Linux makes available to user space a
>>> region of memory suitable for arguments to be passed to RTAS via
>>> sys_rtas(). This region (rtas_rmo_buf) is allocated via the memblock
>>> API during boot in order to ensure that it satisfies the requirements
>>> described above.
>>>
>>> With radix MMU, the upper limit supplied to the memblock allocation
>>> can exceed the bounds of the first logical memory block, since
>>> ppc64_rma_size is ULONG_MAX and RTAS_INSTANTIATE_MAX is 1GB. (512MB is
>>> a common size of the first memory block according to a small sample of
>>> LPARs I have checked.) This leads to failures when user space invokes
>>> an RTAS function that uses a work area, such as
>>> ibm,configure-connector.
>>>
>>> Alter the determination of the upper limit for rtas_rmo_buf's
>>> allocation to consult the device tree directly, ensuring placement
>>> within the RMA regardless of the MMU in use.
>>
>> Can we tie this with RTAS (which also needs to be in RMA) and simply add
>> extra 64K in prom_instantiate_rtas() and advertise this address
>> (ALIGH_UP(rtas-base + rtas-size, PAGE_SIZE)) to the user space? We do
>> not need this RMO area before that point.
> 
> Can you explain more about what advantage that would bring? I'm not
> seeing it. It's a more significant change than what I've written
> here.


We already allocate space for RTAS and (like RMO) it needs to be in RMA, 
and RMO is useless without RTAS. We can reuse RTAS allocation code for 
RMO like this:

===
diff --git a/arch/powerpc/kernel/prom_init.c 
b/arch/powerpc/kernel/prom_init.c
index e9d4eb6144e1..d9527d3e01d2 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1821,7 +1821,8 @@ static void __init prom_instantiate_rtas(void)
         if (size == 0)
                 return;

-       base = alloc_down(size, PAGE_SIZE, 0);
+       /* One page for RTAS, one for RMO */
+       base = alloc_down(size, PAGE_SIZE + PAGE_SIZE, 0);
         if (base == 0)
                 prom_panic("Could not allocate memory for RTAS\n");

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index d126d71ea5bd..885d95cf4ed3 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1186,6 +1186,7 @@ void __init rtas_initialize(void)
         rtas.size = size;
         no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", 
&entry);
         rtas.entry = no_entry ? rtas.base : entry;
+       rtas_rmo_buf = rtas.base + PAGE_SIZE;

         /* If RTAS was found, allocate the RMO buffer for it and look for
          * the stop-self token if any
@@ -1196,11 +1197,6 @@ void __init rtas_initialize(void)
                 ibm_suspend_me_token = rtas_token("ibm,suspend-me");
         }
  #endif
-       rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE,
-                                                0, rtas_region);
-       if (!rtas_rmo_buf)
-               panic("ERROR: RTAS: Failed to allocate %lx bytes below 
%pa\n",
-                     PAGE_SIZE, &rtas_region);
===


May be store in the FDT as "linux,rmo-base" next to "linux,rtas-base", 
for clarity, as sharing symbols between prom and main kernel is a bit 
tricky.

The benefit is that we do not do the same thing   (== find 64K in RMA) 
in 2 different ways and if the RMO allocated my way is broken - we'll 
know it much sooner as RTAS itself will break too.


> Would it interact well with kexec?


Good point. For this, the easiest will be setting rtas-size in the FDT 
to the allocated RTAS space (PAGE_SIZE*2 with the hunk above applied). 
Probably.


>> And probably do the same with per-cpu RTAS argument structures mentioned
>> in the cover letter?
> 
> I don't think so, since those need to be allocated with the pacas and
> limited to the maximum possible CPUs, which is discovered by the kernel
> much later.


The first cell of /proc/device-tree/cpus/ibm,drc-indexes is the number 
of cores, it is there when RTAS is instantiated, we know SMT after 
"ibm,client-architecture-support" (if I remember correctly).


> But maybe I misunderstand what you're suggesting.


Usually it is me missing the bigger picture :)


-- 
Alexey

^ permalink raw reply related

* [powerpc:merge] BUILD SUCCESS 41d8cb7ece7c81e4eb897ed7ec7d3c3d72fd0af4
From: kernel test robot @ 2021-01-17 23:05 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git merge
branch HEAD: 41d8cb7ece7c81e4eb897ed7ec7d3c3d72fd0af4  Automatic merge of 'master' into merge (2021-01-17 21:16)

elapsed time: 758m

configs tested: 112
configs skipped: 2

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
arc                          axs101_defconfig
arc                      axs103_smp_defconfig
mips                     loongson1b_defconfig
c6x                         dsk6455_defconfig
mips                       rbtx49xx_defconfig
sh                           se7343_defconfig
powerpc                    adder875_defconfig
powerpc                 linkstation_defconfig
mips                       capcella_defconfig
powerpc                      katmai_defconfig
h8300                            allyesconfig
powerpc                       holly_defconfig
powerpc                     stx_gp3_defconfig
arm                      integrator_defconfig
mips                  maltasmvp_eva_defconfig
parisc                generic-32bit_defconfig
sh                           sh2007_defconfig
m68k                        m5272c3_defconfig
sh                   rts7751r2dplus_defconfig
powerpc                         ps3_defconfig
arm                        clps711x_defconfig
arc                        vdk_hs38_defconfig
arm                           sama5_defconfig
powerpc                  iss476-smp_defconfig
nds32                               defconfig
sh                         ap325rxa_defconfig
parisc                              defconfig
powerpc                  storcenter_defconfig
xtensa                       common_defconfig
powerpc                  mpc885_ads_defconfig
alpha                            alldefconfig
mips                           rs90_defconfig
arm                          pxa910_defconfig
sh                         ecovec24_defconfig
mips                malta_kvm_guest_defconfig
nios2                            alldefconfig
powerpc                    klondike_defconfig
mips                        nlm_xlr_defconfig
arm                        multi_v7_defconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
c6x                              allyesconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
arc                                 defconfig
sh                               allmodconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                               tinyconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
x86_64               randconfig-a006-20210117
x86_64               randconfig-a004-20210117
x86_64               randconfig-a001-20210117
x86_64               randconfig-a005-20210117
x86_64               randconfig-a003-20210117
x86_64               randconfig-a002-20210117
i386                 randconfig-a005-20210117
i386                 randconfig-a006-20210117
i386                 randconfig-a004-20210117
i386                 randconfig-a002-20210117
i386                 randconfig-a003-20210117
i386                 randconfig-a001-20210117
i386                 randconfig-a012-20210117
i386                 randconfig-a011-20210117
i386                 randconfig-a016-20210117
i386                 randconfig-a013-20210117
i386                 randconfig-a015-20210117
i386                 randconfig-a014-20210117
riscv                    nommu_k210_defconfig
riscv                            allyesconfig
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                      rhel-8.3-kbuiltin
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a015-20210117
x86_64               randconfig-a016-20210117
x86_64               randconfig-a014-20210117
x86_64               randconfig-a012-20210117
x86_64               randconfig-a013-20210117
x86_64               randconfig-a011-20210117

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* Re: [PATCH 1/2] dt-bindings: powerpc: Add a schema for the 'sleep' property
From: Johan Jonker @ 2021-01-17 16:10 UTC (permalink / raw)
  To: Rob Herring, devicetree
  Cc: Heiko Stuebner, linux-kernel, open list:ARM/Rockchip SoC...,
	Paul Mackerras, linuxppc-dev
In-Reply-To: <20201008142420.2083861-1-robh@kernel.org>

Hi Rob,

This patch generates notifications in the Rockchip ARM and arm64 tree.
Could you limit the scope to PowerPC only.

Kind regards,

Johan Jonker

make ARCH=arm dtbs_check
DT_SCHEMA_FILES=Documentation/devicetree/bindings/powerpc/sleep.yaml

make ARCH=arm64 dtbs_check
DT_SCHEMA_FILES=Documentation/devicetree/bindings/powerpc/sleep.yaml

Example:

/arch/arm64/boot/dts/rockchip/rk3399pro-rock-pi-n10.dt.yaml: pinctrl:
sleep: {'ddrio-pwroff': {'rockchip,pins': [[0, 1, 1, 168]]},
'ap-pwroff': {'rockchip,pins': [[1, 5, 1, 168]]}} is not of type 'array'
	From schema: /Documentation/devicetree/bindings/powerpc/sleep.yaml

On 10/8/20 4:24 PM, Rob Herring wrote:
> Document the PowerPC specific 'sleep' property as a schema. It is
> currently only documented in booting-without-of.rst which is getting
> removed.
> 
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Rob Herring <robh@kernel.org>
> ---
>  .../devicetree/bindings/powerpc/sleep.yaml    | 47 +++++++++++++++++++
>  1 file changed, 47 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/powerpc/sleep.yaml
> 
> diff --git a/Documentation/devicetree/bindings/powerpc/sleep.yaml b/Documentation/devicetree/bindings/powerpc/sleep.yaml
> new file mode 100644
> index 000000000000..6494c7d08b93
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/powerpc/sleep.yaml
> @@ -0,0 +1,47 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +%YAML 1.2
> +---
> +$id: http://devicetree.org/schemas/powerpc/sleep.yaml#
> +$schema: http://devicetree.org/meta-schemas/core.yaml#
> +
> +title: PowerPC sleep property
> +
> +maintainers:
> +  - Rob Herring <robh@kernel.org>
> +
> +description: |
> +  Devices on SOCs often have mechanisms for placing devices into low-power
> +  states that are decoupled from the devices' own register blocks.  Sometimes,
> +  this information is more complicated than a cell-index property can
> +  reasonably describe.  Thus, each device controlled in such a manner
> +  may contain a "sleep" property which describes these connections.
> +
> +  The sleep property consists of one or more sleep resources, each of
> +  which consists of a phandle to a sleep controller, followed by a
> +  controller-specific sleep specifier of zero or more cells.
> +
> +  The semantics of what type of low power modes are possible are defined
> +  by the sleep controller.  Some examples of the types of low power modes
> +  that may be supported are:
> +
> +   - Dynamic: The device may be disabled or enabled at any time.
> +   - System Suspend: The device may request to be disabled or remain
> +     awake during system suspend, but will not be disabled until then.
> +   - Permanent: The device is disabled permanently (until the next hard
> +     reset).
> +
> +  Some devices may share a clock domain with each other, such that they should
> +  only be suspended when none of the devices are in use.  Where reasonable,
> +  such nodes should be placed on a virtual bus, where the bus has the sleep
> +  property.  If the clock domain is shared among devices that cannot be
> +  reasonably grouped in this manner, then create a virtual sleep controller
> +  (similar to an interrupt nexus, except that defining a standardized
> +  sleep-map should wait until its necessity is demonstrated).
> +
> +select: true
> +
> +properties:
> +  sleep:
> +    $ref: /schemas/types.yaml#definitions/phandle-array
> +
> +additionalProperties: true
> 


^ permalink raw reply

* Re: [GIT PULL] Please pull powerpc/linux.git powerpc-5.11-4 tag
From: pr-tracker-bot @ 2021-01-17 20:31 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: arielmarcovitch, schwab, Linus Torvalds, linuxppc-dev,
	linux-kernel
In-Reply-To: <87o8hnn9of.fsf@mpe.ellerman.id.au>

The pull request you sent on Sun, 17 Jan 2021 21:24:00 +1100:

> https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git tags/powerpc-5.11-4

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/a1339d6355ac42e1bf4fcdfce8bfce61172f8891

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

^ permalink raw reply

* Re: [PATCH v3 1/8] powerpc/uaccess: Add unsafe_copy_from_user
From: Christopher M. Riedl @ 2021-01-17 17:19 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev
In-Reply-To: <eb4f68f4-f606-007d-6552-8a46cb9912b4@csgroup.eu>

On Mon Jan 11, 2021 at 7:22 AM CST, Christophe Leroy wrote:
>
>
> Le 09/01/2021 à 04:25, Christopher M. Riedl a écrit :
> > Implement raw_copy_from_user_allowed() which assumes that userspace read
> > access is open. Use this new function to implement raw_copy_from_user().
> > Finally, wrap the new function to follow the usual "unsafe_" convention
> > of taking a label argument.
>
> I think there is no point implementing raw_copy_from_user_allowed(), see
> https://github.com/linuxppc/linux/commit/4b842e4e25b1 and
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/8c74fc9ce8131cabb10b3e95dc0e430f396ee83e.1610369143.git.christophe.leroy@csgroup.eu/
>
> You should simply do:
>
> #define unsafe_copy_from_user(d, s, l, e) \
> unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
>

I gave this a try and the signal ops decreased by ~8K. Now, to be
honest, I am not sure what an "acceptable" benchmark number here
actually is - so maybe this is ok? Same loss with both radix and hash:

	|                                      | hash   | radix  |
	| ------------------------------------ | ------ | ------ |
	| linuxppc/next                        | 118693 | 133296 |
	| linuxppc/next w/o KUAP+KUEP          | 228911 | 228654 |
	| unsafe-signal64                      | 200480 | 234067 |
	| unsafe-signal64 (__copy_tofrom_user) | 192467 | 225119 |

To put this into perspective, prior to KUAP and uaccess flush, signal
performance in this benchmark was ~290K on hash.

>
> Christophe
>
> > 
> > The new raw_copy_from_user_allowed() calls non-inline __copy_tofrom_user()
> > internally. This is still safe to call inside user access blocks formed
> > with user_*_access_begin()/user_*_access_end() since asm functions are not
> > instrumented for tracing.
> > 
> > Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
> > ---
> >   arch/powerpc/include/asm/uaccess.h | 28 +++++++++++++++++++---------
> >   1 file changed, 19 insertions(+), 9 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
> > index 501c9a79038c..698f3a6d6ae5 100644
> > --- a/arch/powerpc/include/asm/uaccess.h
> > +++ b/arch/powerpc/include/asm/uaccess.h
> > @@ -403,38 +403,45 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
> >   }
> >   #endif /* __powerpc64__ */
> >   
> > -static inline unsigned long raw_copy_from_user(void *to,
> > -		const void __user *from, unsigned long n)
> > +static inline unsigned long
> > +raw_copy_from_user_allowed(void *to, const void __user *from, unsigned long n)
> >   {
> > -	unsigned long ret;
> >   	if (__builtin_constant_p(n) && (n <= 8)) {
> > -		ret = 1;
> > +		unsigned long ret = 1;
> >   
> >   		switch (n) {
> >   		case 1:
> >   			barrier_nospec();
> > -			__get_user_size(*(u8 *)to, from, 1, ret);
> > +			__get_user_size_allowed(*(u8 *)to, from, 1, ret);
> >   			break;
> >   		case 2:
> >   			barrier_nospec();
> > -			__get_user_size(*(u16 *)to, from, 2, ret);
> > +			__get_user_size_allowed(*(u16 *)to, from, 2, ret);
> >   			break;
> >   		case 4:
> >   			barrier_nospec();
> > -			__get_user_size(*(u32 *)to, from, 4, ret);
> > +			__get_user_size_allowed(*(u32 *)to, from, 4, ret);
> >   			break;
> >   		case 8:
> >   			barrier_nospec();
> > -			__get_user_size(*(u64 *)to, from, 8, ret);
> > +			__get_user_size_allowed(*(u64 *)to, from, 8, ret);
> >   			break;
> >   		}
> >   		if (ret == 0)
> >   			return 0;
> >   	}
> >   
> > +	return __copy_tofrom_user((__force void __user *)to, from, n);
> > +}
> > +
> > +static inline unsigned long
> > +raw_copy_from_user(void *to, const void __user *from, unsigned long n)
> > +{
> > +	unsigned long ret;
> > +
> >   	barrier_nospec();
> >   	allow_read_from_user(from, n);
> > -	ret = __copy_tofrom_user((__force void __user *)to, from, n);
> > +	ret = raw_copy_from_user_allowed(to, from, n);
> >   	prevent_read_from_user(from, n);
> >   	return ret;
> >   }
> > @@ -542,6 +549,9 @@ user_write_access_begin(const void __user *ptr, size_t len)
> >   #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
> >   #define unsafe_put_user(x, p, e) __put_user_goto(x, p, e)
> >   
> > +#define unsafe_copy_from_user(d, s, l, e) \
> > +	unsafe_op_wrap(raw_copy_from_user_allowed(d, s, l), e)
> > +
> >   #define unsafe_copy_to_user(d, s, l, e) \
> >   do {									\
> >   	u8 __user *_dst = (u8 __user *)(d);				\
> > 


^ permalink raw reply

* Re: [PATCH v3 4/8] powerpc/signal64: Remove TM ifdefery in middle of if/else block
From: Christopher M. Riedl @ 2021-01-17 17:16 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev
In-Reply-To: <80d49969-880f-5161-c829-cf319f94351d@csgroup.eu>

On Mon Jan 11, 2021 at 7:29 AM CST, Christophe Leroy wrote:
>
>
> Le 09/01/2021 à 04:25, Christopher M. Riedl a écrit :
> > Rework the messy ifdef breaking up the if-else for TM similar to
> > commit f1cf4f93de2f ("powerpc/signal32: Remove ifdefery in middle of if/else").
> > 
> > Unlike that commit for ppc32, the ifdef can't be removed entirely since
> > uc_transact in sigframe depends on CONFIG_PPC_TRANSACTIONAL_MEM.
> > 
> > Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
> > ---
> >   arch/powerpc/kernel/signal_64.c | 17 +++++++----------
> >   1 file changed, 7 insertions(+), 10 deletions(-)
> > 
> > diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
> > index b211a8ea4f6e..dd3787f67a78 100644
> > --- a/arch/powerpc/kernel/signal_64.c
> > +++ b/arch/powerpc/kernel/signal_64.c
> > @@ -710,9 +710,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
> >   	struct pt_regs *regs = current_pt_regs();
> >   	struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1];
> >   	sigset_t set;
> > -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> >   	unsigned long msr;
> > -#endif
> >   
> >   	/* Always make any pending restarted system calls return -EINTR */
> >   	current->restart_block.fn = do_no_restart_syscall;
> > @@ -762,10 +760,12 @@ SYSCALL_DEFINE0(rt_sigreturn)
> >   	 * restore_tm_sigcontexts.
> >   	 */
> >   	regs->msr &= ~MSR_TS_MASK;
> > +#endif
> >   
> >   	if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
> >   		goto badframe;
>
> This means you are doing that __get_user() even when msr is not used.
> That should be avoided.
>

Thanks, I moved it into the #ifdef block right above it instead for the
next spin.

> >   	if (MSR_TM_ACTIVE(msr)) {
> > +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> >   		/* We recheckpoint on return. */
> >   		struct ucontext __user *uc_transact;
> >   
> > @@ -778,9 +778,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
> >   		if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
> >   					   &uc_transact->uc_mcontext))
> >   			goto badframe;
> > -	} else
> >   #endif
> > -	{
> > +	} else {
> >   		/*
> >   		 * Fall through, for non-TM restore
> >   		 *
> > @@ -818,10 +817,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
> >   	unsigned long newsp = 0;
> >   	long err = 0;
> >   	struct pt_regs *regs = tsk->thread.regs;
> > -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> >   	/* Save the thread's msr before get_tm_stackpointer() changes it */
> > -	unsigned long msr = regs->msr;
> > -#endif
> > +	unsigned long msr __maybe_unused = regs->msr;
>
> I don't thing __maybe_unused() is the right solution.
>
> I think MSR_TM_ACTIVE() should be fixed instead, either by changing it
> into a static inline
> function, or doing something similar to
> https://github.com/linuxppc/linux/commit/05a4ab823983d9136a460b7b5e0d49ee709a6f86
>

Agreed, I'll change MSR_TM_ACTIVE() to reference its argument in the
macro. This keeps it consistent with all the other MSR_TM_* macros in
reg.h. Probably better than changing it to static inline since that
would mean changing all the macros too which seems unecessary.

> >   
> >   	frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
> >   	if (!access_ok(frame, sizeof(*frame)))
> > @@ -836,8 +833,9 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
> >   	/* Create the ucontext.  */
> >   	err |= __put_user(0, &frame->uc.uc_flags);
> >   	err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]);
> > -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> > +
> >   	if (MSR_TM_ACTIVE(msr)) {
> > +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> >   		/* The ucontext_t passed to userland points to the second
> >   		 * ucontext_t (for transactional state) with its uc_link ptr.
> >   		 */
> > @@ -847,9 +845,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
> >   					    tsk, ksig->sig, NULL,
> >   					    (unsigned long)ksig->ka.sa.sa_handler,
> >   					    msr);
> > -	} else
> >   #endif
> > -	{
> > +	} else {
> >   		err |= __put_user(0, &frame->uc.uc_link);
> >   		prepare_setup_sigcontext(tsk, 1);
> >   		err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
> > 
>
> Christophe


^ permalink raw reply

* Re: [PATCH v15 09/10] arm64: Call kmalloc() to allocate DTB buffer
From: Mimi Zohar @ 2021-01-17 13:23 UTC (permalink / raw)
  To: Lakshmi Ramasubramanian, bauerman, robh, takahiro.akashi, gregkh,
	will, catalin.marinas, mpe, Arnd Bergmann
  Cc: mark.rutland, bhsharma, tao.li, paulus, vincenzo.frascino,
	frowand.list, sashal, masahiroy, jmorris, linux-arm-kernel, serge,
	devicetree, pasha.tatashin, prsriva, hsinyi, allison,
	christophe.leroy, mbrugger, balajib, dmitry.kasatkin,
	linux-kernel, james.morse, linux-integrity, linuxppc-dev
In-Reply-To: <20210115173017.30617-10-nramas@linux.microsoft.com>

Hi Ard,

On Fri, 2021-01-15 at 09:30 -0800, Lakshmi Ramasubramanian wrote:
> create_dtb() function allocates kernel virtual memory for
> the device tree blob (DTB).  This is not consistent with other
> architectures, such as powerpc, which calls kmalloc() for allocating
> memory for the DTB.
> 
> Call kmalloc() to allocate memory for the DTB, and kfree() to free
> the allocated memory.

The vmalloc() function description says, "vmalloc - allocate virtually
contiguous memory".  I'd appreciate your reviewing this patch, in
particular, which replaces vmalloc() with kmalloc().

thanks,

Mimi

> 
> Co-developed-by: Prakhar Srivastava <prsriva@linux.microsoft.com>
> Signed-off-by: Prakhar Srivastava <prsriva@linux.microsoft.com>
> Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
> ---
>  arch/arm64/kernel/machine_kexec_file.c | 12 +++++++-----
>  1 file changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
> index 7de9c47dee7c..51c40143d6fa 100644
> --- a/arch/arm64/kernel/machine_kexec_file.c
> +++ b/arch/arm64/kernel/machine_kexec_file.c
> @@ -29,7 +29,7 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
>  
>  int arch_kimage_file_post_load_cleanup(struct kimage *image)
>  {
> -	vfree(image->arch.dtb);
> +	kfree(image->arch.dtb);
>  	image->arch.dtb = NULL;
>  
>  	vfree(image->arch.elf_headers);
> @@ -59,19 +59,21 @@ static int create_dtb(struct kimage *image,
>  			+ cmdline_len + DTB_EXTRA_SPACE;
>  
>  	for (;;) {
> -		buf = vmalloc(buf_size);
> +		buf = kmalloc(buf_size, GFP_KERNEL);
>  		if (!buf)
>  			return -ENOMEM;
>  
>  		/* duplicate a device tree blob */
>  		ret = fdt_open_into(initial_boot_params, buf, buf_size);
> -		if (ret)
> +		if (ret) {
> +			kfree(buf);
>  			return -EINVAL;
> +		}
>  
>  		ret = of_kexec_setup_new_fdt(image, buf, initrd_load_addr,
>  					     initrd_len, cmdline);
>  		if (ret) {
> -			vfree(buf);
> +			kfree(buf);
>  			if (ret == -ENOMEM) {
>  				/* unlikely, but just in case */
>  				buf_size += DTB_EXTRA_SPACE;
> @@ -217,6 +219,6 @@ int load_other_segments(struct kimage *image,
>  	return 0;
>  
>  out_err:
> -	vfree(dtb);
> +	kfree(dtb);
>  	return ret;
>  }



^ permalink raw reply

* [GIT PULL] Please pull powerpc/linux.git powerpc-5.11-4 tag
From: Michael Ellerman @ 2021-01-17 10:24 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: arielmarcovitch, schwab, linuxppc-dev, linux-kernel

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256

Hi Linus,

Please pull some more powerpc fixes for 5.11:

The following changes since commit 3ce47d95b7346dcafd9bed3556a8d072cb2b8571:

  powerpc: Handle .text.{hot,unlikely}.* in linker script (2021-01-06 21:59:04 +1100)

are available in the git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git tags/powerpc-5.11-4

for you to fetch changes up to 41131a5e54ae7ba5a2bb8d7b30d1818b3f5b13d2:

  powerpc/vdso: Fix clock_gettime_fallback for vdso32 (2021-01-14 15:56:44 +1100)

- ------------------------------------------------------------------
powerpc fixes for 5.11 #4

One fix for a lack of alignment in our linker script, that can lead to crashes
depending on configuration etc.

One fix for the 32-bit VDSO after the C VDSO conversion.

Thanks to:
  Andreas Schwab, Ariel Marcovitch, Christophe Leroy.

- ------------------------------------------------------------------
Andreas Schwab (1):
      powerpc/vdso: Fix clock_gettime_fallback for vdso32

Ariel Marcovitch (1):
      powerpc: Fix alignment bug within the init sections


 arch/powerpc/include/asm/vdso/gettimeofday.h | 16 +++++++++++++++-
 arch/powerpc/kernel/vmlinux.lds.S            |  8 ++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)
-----BEGIN PGP SIGNATURE-----

iQIzBAEBCAAdFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAmAED74ACgkQUevqPMjh
pYAyFRAAr2/3tLnaJgu8LIkr64AUuNAXsAhTaVb3MEemJtR7FSEK8pJjw/AtQmHh
aTB9Y4qp2kOSQq6C6j4cMitGUYOiDEayCHPP1SserIRVJXmC453jqKb/iHrpaVWo
zBdrqrMzgFhlfT6/IDVw/+e5rjycwp9QicQZ0DRX15ZXlqlMSr1b6VH3opku4DyV
a9OP/LlR6PAgZQn+qTfeB/z7HnwOdy9R5i/UnrALqrzKGOneQXd+jv7THbudMs/D
aVTapfuoon1SPSLWy7xSVKIjFxwV4KUMi0R5kjWlWkXFqdLA2r8XRE3sKcLW1IN1
0Yibv1DRddsnluqe5lclQgzWPfRLdjPhgoIwIq3Ze50aSMuLXU5TatPzXVVKFDWT
emVMyQ/SOzWdI7mwsbN1GK85x7cvWW7wMLtEnvJ82vQJpJtAJEyWEZ9UozfpBrPq
/H2rrisWMFyZhl3eDdcJCwV7YeOxdCnmqmJnnkTMypRRXyWlfJDHs0CP7fWiKu+j
XMsPhxM1hyfrueOW7iPBEt/ZkB17Eq1V0Z2OQU+chXqJmmh9gwSBb/F8iJ48Iphi
4L2ynxJTAHwFY27xE1CQIF0VKycIc7djkDhYoJaL8PaVXQkUo/NWy4zOVNzJpeen
HbeLjHKGeeGetWxOniBCgD0PxoOQH8ThQauz+NwzeACGgyPzkM0=
=fJl5
-----END PGP SIGNATURE-----

^ permalink raw reply

* ibmvnic: Race condition in remove callback
From: Uwe Kleine-König @ 2021-01-17 10:12 UTC (permalink / raw)
  To: Dany Madden, Lijun Pan, Sukadev Bhattiprolu, Michael Ellerman,
	Juliet Kim
  Cc: Greg Kroah-Hartman, Paul Mackerras, kernel, netdev,
	Jakub Kicinski, linuxppc-dev, David S. Miller

[-- Attachment #1: Type: text/plain, Size: 1952 bytes --]

Hello,

while working on some cleanup I stumbled over a problem in the ibmvnic's
remove callback. Since commit

        7d7195a026ba ("ibmvnic: Do not process device remove during device reset")

there is the following code in the remove callback:

        static int ibmvnic_remove(struct vio_dev *dev)
        {
                ...
                spin_lock_irqsave(&adapter->state_lock, flags);
                if (test_bit(0, &adapter->resetting)) {
                        spin_unlock_irqrestore(&adapter->state_lock, flags);
                        return -EBUSY;
                }

                adapter->state = VNIC_REMOVING;
                spin_unlock_irqrestore(&adapter->state_lock, flags);

                flush_work(&adapter->ibmvnic_reset);
                flush_delayed_work(&adapter->ibmvnic_delayed_reset);
                ...
        }

Unfortunately returning -EBUSY doesn't work as intended. That's because
the return value of this function is ignored[1] and the device is
considered unbound by the device core (shortly) after ibmvnic_remove()
returns.

While looking into fixing that I noticed a worse problem:

If ibmvnic_reset() (e.g. called by the tx_timeout callback) calls
schedule_work(&adapter->ibmvnic_reset); just after the work queue is
flushed above the problem that 7d7195a026ba intends to fix will trigger
resulting in a use-after-free.

Also ibmvnic_reset() checks for adapter->state without holding the lock
which might be racy, too.

Best regards
Uwe

[1] vio_bus_remove (in arch/powerpc/platforms/pseries/vio.c) records the
    return value and passes it on. But the driver core doesn't care for
    the return value (see __device_release_driver() in drivers/base/dd.c
    calling dev->bus->remove()).

-- 
Pengutronix e.K.                           | Uwe Kleine-König            |
Industrial Linux Solutions                 | https://www.pengutronix.de/ |

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [RESEND v2 0/7] Rid W=1 warnings in Ethernet
From: patchwork-bot+netdevbpf @ 2021-01-17  2:20 UTC (permalink / raw)
  To: Lee Jones
  Cc: paul, kurt, ast, gustavoars, pc, paulus, sukadev, wei.liu, daniel,
	john.fastabend, santi_leon, kuba, grygorii.strashko, tlfalcon,
	hawk, Jens.Osterkamp, rusty, dnevil, ljp, xen-devel,
	ivan.khoronzhuk, nico, geoff, netdev, linux-kernel, erik, jallen,
	utz.bacher, drt, bpf, linuxppc-dev, davem, rmk
In-Reply-To: <20210115200905.3470941-1-lee.jones@linaro.org>

Hello:

This series was applied to netdev/net-next.git (refs/heads/master):

On Fri, 15 Jan 2021 20:08:58 +0000 you wrote:
> Resending the stragglers again.
> This set is part of a larger effort attempting to clean-up W=1
> kernel builds, which are currently overwhelmingly riddled with
> niggly little warnings.
> 
> No changes since v2, just a rebase onto net-next.
> 
> [...]

Here is the summary with links:
  - [1/7] net: ethernet: smsc: smc91x: Fix function name in kernel-doc header
    https://git.kernel.org/netdev/net-next/c/7d2a92445e3f
  - [2/7] net: xen-netback: xenbus: Demote nonconformant kernel-doc headers
    https://git.kernel.org/netdev/net-next/c/090c7ae8e0d0
  - [3/7] net: ethernet: ti: am65-cpsw-qos: Demote non-conformant function header
    https://git.kernel.org/netdev/net-next/c/935888cda820
  - [4/7] net: ethernet: ti: am65-cpts: Document am65_cpts_rx_enable()'s 'en' parameter
    https://git.kernel.org/netdev/net-next/c/e49e4647f3e2
  - [5/7] net: ethernet: ibm: ibmvnic: Fix some kernel-doc misdemeanours
    https://git.kernel.org/netdev/net-next/c/807086021bf5
  - [6/7] net: ethernet: toshiba: ps3_gelic_net: Fix some kernel-doc misdemeanours
    https://git.kernel.org/netdev/net-next/c/b51036321461
  - [7/7] net: ethernet: toshiba: spider_net: Document a whole bunch of function parameters
    https://git.kernel.org/netdev/net-next/c/e242d5989965

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH] Adds a new ioctl32 syscall for backwards compatibility layers
From: Rich Felker @ 2021-01-15 22:23 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: linux-xtensa@linux-xtensa.org, Jan Kara, Miklos Szeredi,
	Dominik Brodowski, linux-kernel@vger.kernel.org,
	James E.J. Bottomley, Max Filippov, Paul Mackerras,
	H. Peter Anvin, sparclinux@vger.kernel.org,
	linux-ia64@vger.kernel.org, Christian Brauner, Vincenzo Frascino,
	Will Deacon, linux-arch@vger.kernel.org, Stephen Rothwell,
	Arnd Bergmann, Yoshinori Sato, linux-sh@vger.kernel.org,
	Helge Deller, x86@kernel.org, YueHaibing, Russell King,
	Christian Borntraeger, Ingo Molnar, Geert Uytterhoeven,
	linux-arm-kernel@lists.infradead.org, Catalin Marinas,
	Matt Turner, linux-mips@vger.kernel.org, Fenghua Yu,
	Arnaldo Carvalho de Melo, Vasily Gorbik, sonicadvance1@gmail.com,
	Brian Gerst, Heiko Carstens, David Rientjes, Willem de Bruijn,
	Nicholas Piggin, Suren Baghdasaryan, Aleksa Sarai,
	Thomas Bogendoerfer, Ivan Kokshaysky, Alexander Viro,
	Andy Lutomirski, Thomas Gleixner, Xiaoming Ni, Vlastimil Babka,
	Richard Henderson, Chris Zankel, Michal Simek, Tony Luck,
	linux-parisc@vger.kernel.org, linux-m68k@lists.linux-m68k.org,
	linux-s390@vger.kernel.org, Oleg Nesterov, Minchan Kim,
	David Laight, Eric W. Biederman, linux-alpha@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, Borislav Petkov, Andrew Morton,
	linux-api@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	David S. Miller
In-Reply-To: <CAK8P3a1gqt-gBCPTdNeY+8SaG8eUGN4zkCrNKSjA=aEL-TkaUQ@mail.gmail.com>

On Fri, Jan 15, 2021 at 11:17:09PM +0100, Arnd Bergmann wrote:
> On Fri, Jan 15, 2021 at 9:01 PM David Laight <David.Laight@aculab.com> wrote:
> >
> > From: sonicadvance1@gmail.com
> > > Sent: 15 January 2021 07:03
> > > Problem presented:
> > > A backwards compatibility layer that allows running x86-64 and x86
> > > processes inside of an AArch64 process.
> > >   - CPU is emulated
> > >   - Syscall interface is mostly passthrough
> > >   - Some syscalls require patching or emulation depending on behaviour
> > >   - Not viable from the emulator design to use an AArch32 host process
> > >
> >
> > You are going to need to add all the x86 compatibility code into
> > your arm64 kernel.
> > This is likely to be different from the 32bit arm compatibility
> > because 64bit items are only aligned on 32bit boundaries.
> > The x86 x32 compatibility will be more like the 32bit arm 'compat'
> > code - I'm pretty sure arm32 64bit aligned 64bit data.
> 
> All other architectures that have both 32-bit and 64-bit variants
> use the same alignment for all types, except for x86.
> 
> There are additional differences though, especially if one
> were to try to generalize the interface to all architectures.
> A subset of the issues includes
> 
> - x32 has 64-bit types in places of some types that are
>   32 bit everywhere else (time_t, ino_t, off_t, clock_t, ...)
> 
> - m68k aligns struct members to at most 16 bits
> 
> - uid_t/gid_t/ino_t/dev_t/... are
> 
> > You'll then need to remember how the process entered the kernel
> > to work out which compatibility code to invoke.
> > This is what x86 does.
> > It allows a single process to do all three types of system call.
> >
> > Trying to 'patch up' structures outside the kernel, or in the
> > syscall interface code will always cause grief somewhere.
> > The only sane place is in the code that uses the structures.
> > Which, for ioctls, means inside the driver that parses them.
> 
> He's already doing the system call emulation for all the system
> calls other than ioctl in user space though. In my experience,
> there are actually fairly few ioctl commands that are different
> between architectures -- most of them have no misaligned
> or architecture-defined struct members at all.
> 
> Once you have conversion functions to deal with the 32/64-bit
> interface differences and architecture specifics of sockets,
> sysvipc, signals, stat, and input_event, handling the
> x86-32 specific ioctl commands is comparably easy.

Indeed, all of this should just be done in userspace. Note (as you of
course know, but others on CC probably don't) that we did this in musl
libc for the sake of being able to run a time64 userspace on a
pre-time64 kernel, with translation from the new time64 ioctl
structures to the versions needed by the old ioctls and back using a
fairly simple table:

https://git.musl-libc.org/cgit/musl/tree/src/misc/ioctl.c?id=v1.2.2

I imagine there's a fair bit more to be done for 32-/64-bit mismatch
in size/long/pointer types and different alignments, but the problem
is almost certainly tractable, and much easier than what they already
have to be doing for syscalls.

Rich

^ permalink raw reply

* RE: [PATCH] Adds a new ioctl32 syscall for backwards compatibility layers
From: David Laight @ 2021-01-15 22:39 UTC (permalink / raw)
  To: 'Arnd Bergmann'
  Cc: linux-xtensa@linux-xtensa.org, Rich Felker, Jan Kara,
	Miklos Szeredi, Dominik Brodowski, linux-kernel@vger.kernel.org,
	James E.J. Bottomley, Max Filippov, Paul Mackerras,
	H. Peter Anvin, sparclinux@vger.kernel.org,
	linux-ia64@vger.kernel.org, Christian Brauner, Vincenzo Frascino,
	Will Deacon, linux-arch@vger.kernel.org, Stephen Rothwell,
	Arnd Bergmann, Yoshinori Sato, linux-sh@vger.kernel.org,
	Helge Deller, x86@kernel.org, YueHaibing, Russell King,
	Christian Borntraeger, Ingo Molnar, Geert Uytterhoeven,
	linux-arm-kernel@lists.infradead.org, Catalin Marinas,
	Matt Turner, linux-mips@vger.kernel.org, Fenghua Yu,
	Arnaldo Carvalho de Melo, Vasily Gorbik, sonicadvance1@gmail.com,
	Brian Gerst, Heiko Carstens, David Rientjes, Willem de Bruijn,
	Nicholas Piggin, Suren Baghdasaryan, Aleksa Sarai,
	Thomas Bogendoerfer, Ivan Kokshaysky, Alexander Viro,
	Andy Lutomirski, Thomas Gleixner, Xiaoming Ni, Vlastimil Babka,
	Richard Henderson, Chris Zankel, Michal Simek, Tony Luck,
	linux-parisc@vger.kernel.org, linux-m68k@lists.linux-m68k.org,
	linux-s390@vger.kernel.org, Oleg Nesterov, Minchan Kim,
	Eric W. Biederman, linux-alpha@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, Borislav Petkov, Andrew Morton,
	linux-api@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	David S. Miller
In-Reply-To: <CAK8P3a1gqt-gBCPTdNeY+8SaG8eUGN4zkCrNKSjA=aEL-TkaUQ@mail.gmail.com>

...
> He's already doing the system call emulation for all the system
> calls other than ioctl in user space though. In my experience,
> there are actually fairly few ioctl commands that are different
> between architectures -- most of them have no misaligned
> or architecture-defined struct members at all.

Aren't there also some intractable issues with socket options?
IIRC the kernel code that tried to change them to 64bit was
horribly broken in some obscure cases.

Pushing the conversion down the stack not only identified the
issues, it also made them easier to fix.

If you change the kernel so a 64bit process can execute 32bit
system calls then a lot of the problems do go away.
This is probably easiest done by setting a high bit on the
system call number - as x86_64 does for x32 calls.

You still have to solve the different alignment of 64bit data
on i386.

Of course the system call numbers are different - but that is
just a lookup.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

^ permalink raw reply

* Re: [PATCH] Adds a new ioctl32 syscall for backwards compatibility layers
From: Arnd Bergmann @ 2021-01-15 22:17 UTC (permalink / raw)
  To: David Laight
  Cc: linux-xtensa@linux-xtensa.org, Rich Felker, Jan Kara,
	Miklos Szeredi, Dominik Brodowski, linux-kernel@vger.kernel.org,
	James E.J. Bottomley, Max Filippov, Paul Mackerras,
	H. Peter Anvin, sparclinux@vger.kernel.org,
	linux-ia64@vger.kernel.org, Christian Brauner, Vincenzo Frascino,
	Will Deacon, linux-arch@vger.kernel.org, Stephen Rothwell,
	Arnd Bergmann, Yoshinori Sato, linux-sh@vger.kernel.org,
	Helge Deller, x86@kernel.org, YueHaibing, Russell King,
	Christian Borntraeger, Ingo Molnar, Geert Uytterhoeven,
	linux-arm-kernel@lists.infradead.org, Catalin Marinas,
	Matt Turner, linux-mips@vger.kernel.org, Fenghua Yu,
	Arnaldo Carvalho de Melo, Vasily Gorbik, sonicadvance1@gmail.com,
	Brian Gerst, Heiko Carstens, David Rientjes, Willem de Bruijn,
	Nicholas Piggin, Suren Baghdasaryan, Aleksa Sarai,
	Thomas Bogendoerfer, Ivan Kokshaysky, Alexander Viro,
	Andy Lutomirski, Thomas Gleixner, Xiaoming Ni, Vlastimil Babka,
	Richard Henderson, Chris Zankel, Michal Simek, Tony Luck,
	linux-parisc@vger.kernel.org, linux-m68k@lists.linux-m68k.org,
	linux-s390@vger.kernel.org, Oleg Nesterov, Minchan Kim,
	Eric W. Biederman, linux-alpha@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, Borislav Petkov, Andrew Morton,
	linux-api@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	David S. Miller
In-Reply-To: <b15672b1caec4cf980f2753d06b03596@AcuMS.aculab.com>

On Fri, Jan 15, 2021 at 9:01 PM David Laight <David.Laight@aculab.com> wrote:
>
> From: sonicadvance1@gmail.com
> > Sent: 15 January 2021 07:03
> > Problem presented:
> > A backwards compatibility layer that allows running x86-64 and x86
> > processes inside of an AArch64 process.
> >   - CPU is emulated
> >   - Syscall interface is mostly passthrough
> >   - Some syscalls require patching or emulation depending on behaviour
> >   - Not viable from the emulator design to use an AArch32 host process
> >
>
> You are going to need to add all the x86 compatibility code into
> your arm64 kernel.
> This is likely to be different from the 32bit arm compatibility
> because 64bit items are only aligned on 32bit boundaries.
> The x86 x32 compatibility will be more like the 32bit arm 'compat'
> code - I'm pretty sure arm32 64bit aligned 64bit data.

All other architectures that have both 32-bit and 64-bit variants
use the same alignment for all types, except for x86.

There are additional differences though, especially if one
were to try to generalize the interface to all architectures.
A subset of the issues includes

- x32 has 64-bit types in places of some types that are
  32 bit everywhere else (time_t, ino_t, off_t, clock_t, ...)

- m68k aligns struct members to at most 16 bits

- uid_t/gid_t/ino_t/dev_t/... are

> You'll then need to remember how the process entered the kernel
> to work out which compatibility code to invoke.
> This is what x86 does.
> It allows a single process to do all three types of system call.
>
> Trying to 'patch up' structures outside the kernel, or in the
> syscall interface code will always cause grief somewhere.
> The only sane place is in the code that uses the structures.
> Which, for ioctls, means inside the driver that parses them.

He's already doing the system call emulation for all the system
calls other than ioctl in user space though. In my experience,
there are actually fairly few ioctl commands that are different
between architectures -- most of them have no misaligned
or architecture-defined struct members at all.

Once you have conversion functions to deal with the 32/64-bit
interface differences and architecture specifics of sockets,
sysvipc, signals, stat, and input_event, handling the
x86-32 specific ioctl commands is comparably easy.

         Arnd

^ permalink raw reply

* RE: [PATCH] Adds a new ioctl32 syscall for backwards compatibility layers
From: David Laight @ 2021-01-15 20:01 UTC (permalink / raw)
  To: 'sonicadvance1@gmail.com'
  Cc: linux-xtensa@linux-xtensa.org, Rich Felker, Jan Kara,
	Miklos Szeredi, Dominik Brodowski, linux-kernel@vger.kernel.org,
	James E.J. Bottomley, Max Filippov, Paul Mackerras,
	H. Peter Anvin, sparclinux@vger.kernel.org,
	linux-ia64@vger.kernel.org, Christian Brauner, Vincenzo Frascino,
	Will Deacon, linux-arch@vger.kernel.org, Stephen Rothwell,
	Arnd Bergmann, Yoshinori Sato, linux-sh@vger.kernel.org,
	Helge Deller, x86@kernel.org, YueHaibing, Russell King,
	Christian Borntraeger, Ingo Molnar, Geert Uytterhoeven,
	linux-arm-kernel@lists.infradead.org, Catalin Marinas,
	Matt Turner, linux-mips@vger.kernel.org, Fenghua Yu,
	Arnaldo Carvalho de Melo, Vasily Gorbik,
	linux-s390@vger.kernel.org, Brian Gerst, Heiko Carstens,
	David Rientjes, Willem de Bruijn, Nicholas Piggin,
	Suren Baghdasaryan, Aleksa Sarai, Thomas Bogendoerfer,
	Ivan Kokshaysky, Alexander Viro, Andy Lutomirski, Thomas Gleixner,
	Xiaoming Ni, Vlastimil Babka, Richard Henderson, Chris Zankel,
	Michal Simek, Tony Luck, linux-parisc@vger.kernel.org,
	linux-m68k@lists.linux-m68k.org, linux-api@vger.kernel.org,
	Oleg Nesterov, Minchan Kim, Eric W. Biederman,
	linux-alpha@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	Borislav Petkov, Andrew Morton, linuxppc-dev@lists.ozlabs.org,
	David S. Miller
In-Reply-To: <20210115070326.294332-1-Sonicadvance1@gmail.com>

From: sonicadvance1@gmail.com
> Sent: 15 January 2021 07:03
> Problem presented:
> A backwards compatibility layer that allows running x86-64 and x86
> processes inside of an AArch64 process.
>   - CPU is emulated
>   - Syscall interface is mostly passthrough
>   - Some syscalls require patching or emulation depending on behaviour
>   - Not viable from the emulator design to use an AArch32 host process
> 

You are going to need to add all the x86 compatibility code into
your arm64 kernel.
This is likely to be different from the 32bit arm compatibility
because 64bit items are only aligned on 32bit boundaries.
The x86 x32 compatibility will be more like the 32bit arm 'compat'
code - I'm pretty sure arm32 64bit aligned 64bit data.

You'll then need to remember how the process entered the kernel
to work out which compatibility code to invoke.
This is what x86 does.
It allows a single process to do all three types of system call.

Trying to 'patch up' structures outside the kernel, or in the
syscall interface code will always cause grief somewhere.
The only sane place is in the code that uses the structures.
Which, for ioctls, means inside the driver that parses them.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply

* Re: CONFIG_PPC_VAS depends on 64k pages...?
From: Carlos Eduardo de Paula @ 2021-01-15 13:54 UTC (permalink / raw)
  To: Will Springer
  Cc: Tulio Magno Quites Machado Filho, daniel, haren, Bulent Abali,
	Sukadev Bhattiprolu, linuxppc-dev, Raphael M Zinsly
In-Reply-To: <7176054.EvYhyI6sBW@sheen>

[-- Attachment #1: Type: text/plain, Size: 6228 bytes --]

Hi all, any news on this matter?

Can a patch be submitted for evaluation?

Thanks,
Carlos

On Wed, Dec 2, 2020 at 4:19 AM Will Springer <skirmisher@protonmail.com>
wrote:

> On Tuesday, December 1, 2020 5:16:51 AM PST Bulent Abali wrote:
> > I don't know anything about VAS page size requirements in the kernel.  I
> > checked the user compression library and saw that we do a sysconf to
> > get the page size; so the library should be immune to page size by
> > design. But it wouldn't surprise me if a 64KB constant is inadvertently
> > hardcoded somewhere else in the library.  Giving heads up to Tulio and
> > Raphael who are owners of the github repo.
> >
> > https://github.com/libnxz/power-gzip/blob/master/lib/nx_zlib.c#L922
> >
> > If we got this wrong in the library it might manifest itself as an error
> > message of the sort "excessive page faults".  The library must touch
> > pages ahead to make them present in the memory; occasional page faults
> > is acceptable. It will retry.
>
> Hm, good to know. As I said I haven't noticed any problems so far, over a
> few different days of testing. My change is now in the Void Linux kernel
> package, and is working for others as well (including the Void maintainer
> Daniel/q66 who I CC'd initially).
>
> >
> > Bulent
> >
> >
> >
> >
> > From:        "Sukadev Bhattiprolu" <sukadev@linux.ibm.com>
> > To:        "Christophe Leroy" <christophe.leroy@csgroup.eu>
> > Cc:        "Will Springer" <skirmisher@protonmail.com>,
> > linuxppc-dev@lists.ozlabs.org, daniel@octaforge.org, Bulent
> > Abali/Watson/IBM@IBM, haren@linux.ibm.com Date:        12/01/2020 12:53
> > AM
> > Subject:        Re: CONFIG_PPC_VAS depends on 64k pages...?
> >
> > Christophe Leroy [christophe.leroy@csgroup.eu] wrote:
> > > Hi,
> > >
> > > Le 19/11/2020 à 11:58, Will Springer a écrit :
> > > > I learned about the POWER9 gzip accelerator a few months ago when
> > > > the
> > > > support hit upstream Linux 5.8. However, for some reason the Kconfig
> > > > dictates that VAS depends on a 64k page size, which is problematic
> > > > as I
> > > > run Void Linux, which uses a 4k-page kernel.
> > > >
> > > > Some early poking by others indicated there wasn't an obvious page
> > > > size
> > > > dependency in the code, and suggested I try modifying the config to
> > > > switch it on. I did so, but was stopped by a minor complaint of an
> > > > "unexpected DT configuration" by the VAS code. I wasn't equipped to
> > > > figure out exactly what this meant, even after finding the
> > > > offending condition, so after writing a very drawn-out forum post
> > > > asking for help, I dropped the subject.
> > > >
> > > > Fast forward to today, when I was reminded of the whole thing again,
> > > > and decided to debug a bit further. Apparently the VAS platform
> > > > device (derived from the DT node) has 5 resources on my 4k kernel,
> > > > instead of 4 (which evidently works for others who have had success
> > > > on 64k kernels). I have no idea what this means in practice (I
> > > > don't know how to introspect it), but after making a tiny patch[1],
> > > > everything came up smoothly and I was doing blazing-fast gzip
> > > > (de)compression in no time.
> > > >
> > > > Everything seems to work fine on 4k pages. So, what's up? Are there
> > > > pitfalls lurking around that I've yet to stumble over? More
> > > > reasonably,
> > > > I'm curious as to why the feature supposedly depends on 64k pages,
> > > > or if there's anything else I should be concerned about.
> >
> > Will,
> >
> > The reason I put in that config check is because we were only able to
> > test 64K pages at that point.
> >
> > It is interesting that it is working for you. Following code in skiboot
> > https://github.com/open-power/skiboot/blob/master/hw/vas.cshould
> > restrict it to 64K pages. IIRC there is also a corresponding change in
> > some NX registers that should also be configured to allow 4K pages.
>
> Huh, that is interesting indeed. As far as the kernel code, the only thing
> specific to 64k pages I could find was in [1], where
> VAS_XLATE_LPCR_PAGE_SIZE is set. There is also NX_PAGE_SIZE in drivers/
> crypto/nx/nx.h, which is set to 4096, but I don't know if that's related
> to
> kernel page size at all. Without a better idea of the code base, I didn't
> examine more thoroughly.
>
> [1]:
> https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/arch/powerpc/platforms/powernv/vas-window.c#n293
>
> >                 static int init_north_ctl(struct proc_chip *chip)
> >                 {
> >                                  uint64_t val = 0ULL;
> >
> >                                  val = SETFIELD(VAS_64K_MODE_MASK, val,
> > true); val = SETFIELD(VAS_ACCEPT_PASTE_MASK, val, true); val =
> > SETFIELD(VAS_ENABLE_WC_MMIO_BAR, val, true); val =
> > SETFIELD(VAS_ENABLE_UWC_MMIO_BAR, val, true); val =
> > SETFIELD(VAS_ENABLE_RMA_MMIO_BAR, val, true);
> >
> >                                  return vas_scom_write(chip,
> > VAS_MISC_N_CTL, val); }
> >
> > I am copying Bulent Albali and Haren Myneni who have been working with
> > VAS/NX for their thoughts/experience.
>
> Thanks for this and for your input, by the way.
>
> >
> > > Maybe ask Sukadev who did the implementation and is maintaining it ?
> > >
> > > > I do have to say I'm quite satisfied with the results of the NX
> > > > accelerator, though. Being able to shuffle data to a RaptorCS box
> > > > over gigE and get compressed data back faster than most software
> > > > gzip could ever hope to achieve is no small feat, let alone the
> > > > instantaneous results locally.> >
> > > > :)
> > > >
> > > > Cheers,
> > > > Will Springer [she/her]
> > > >
> > > > [1]:
> > > >
> https://github.com/Skirmisher/void-packages/blob/vas-4k-pages/srcpkgs/linux5.9/patches/ppc-vas-on-4k.patch
> > > Christophe
>
> Will [she/her]
>
>
>
>
>

-- 
________________________________________
Carlos Eduardo de Paula
me@carlosedp.com
http://carlosedp.com
https://twitter.com/carlosedp
https://www.linkedin.com/in/carlosedp/
________________________________________

[-- Attachment #2: Type: text/html, Size: 9450 bytes --]

^ permalink raw reply

* Re: [PATCH v2] powerpc: Handle .text.{hot,unlikely}.* in linker script
From: Nathan Chancellor @ 2021-01-16 18:52 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: Nick Desaulniers, linux-kernel, stable, clang-built-linux,
	Paul Mackerras, linuxppc-dev
In-Reply-To: <20210116184438.GE30983@gate.crashing.org>

On Sat, Jan 16, 2021 at 12:44:38PM -0600, Segher Boessenkool wrote:
> Hi!
> 
> Very late of course, and the patch is fine, but:
> 
> On Mon, Jan 04, 2021 at 01:59:53PM -0700, Nathan Chancellor wrote:
> > Commit eff8728fe698 ("vmlinux.lds.h: Add PGO and AutoFDO input
> > sections") added ".text.unlikely.*" and ".text.hot.*" due to an LLVM
> > change [1].
> > 
> > After another LLVM change [2], these sections are seen in some PowerPC
> > builds, where there is a orphan section warning then build failure:
> > 
> > $ make -skj"$(nproc)" \
> >        ARCH=powerpc CROSS_COMPILE=powerpc64le-linux-gnu- LLVM=1 O=out \
> >        distclean powernv_defconfig zImage.epapr
> > ld.lld: warning: kernel/built-in.a(panic.o):(.text.unlikely.) is being placed in '.text.unlikely.'
> 
> Is the section really called ".text.unlikely.", i.e. the name ending in
> a dot?  How very unusual, is there some bug elsewhere?

No, this was an intention change done by LLVM:
https://reviews.llvm.org/D79600

Cheers,
Nathan

^ permalink raw reply

* Re: [PATCH v2] powerpc: Handle .text.{hot, unlikely}.* in linker script
From: Segher Boessenkool @ 2021-01-16 18:44 UTC (permalink / raw)
  To: Nathan Chancellor
  Cc: Nick Desaulniers, linux-kernel, stable, clang-built-linux,
	Paul Mackerras, linuxppc-dev
In-Reply-To: <20210104205952.1399409-1-natechancellor@gmail.com>

Hi!

Very late of course, and the patch is fine, but:

On Mon, Jan 04, 2021 at 01:59:53PM -0700, Nathan Chancellor wrote:
> Commit eff8728fe698 ("vmlinux.lds.h: Add PGO and AutoFDO input
> sections") added ".text.unlikely.*" and ".text.hot.*" due to an LLVM
> change [1].
> 
> After another LLVM change [2], these sections are seen in some PowerPC
> builds, where there is a orphan section warning then build failure:
> 
> $ make -skj"$(nproc)" \
>        ARCH=powerpc CROSS_COMPILE=powerpc64le-linux-gnu- LLVM=1 O=out \
>        distclean powernv_defconfig zImage.epapr
> ld.lld: warning: kernel/built-in.a(panic.o):(.text.unlikely.) is being placed in '.text.unlikely.'

Is the section really called ".text.unlikely.", i.e. the name ending in
a dot?  How very unusual, is there some bug elsewhere?


Segher

^ permalink raw reply

* Re: [PATCH v6 01/39] KVM: PPC: Book3S HV: Context tracking exit guest context before enabling irqs
From: Nicholas Piggin @ 2021-01-16 10:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: kvm-ppc
In-Reply-To: <20210115165012.1260253-2-npiggin@gmail.com>

Excerpts from Nicholas Piggin's message of January 16, 2021 2:49 am:
> Interrupts that occur in kernel mode expect that context tracking
> is set to kernel. Enabling local irqs before context tracking
> switches from guest to host means interrupts can come in and trigger
> warnings about wrong context, and possibly worse.

I think this is not actually a fix per se with context as it is today
because the interrupt handlers will save and update the state. It only 
starts throwing warnings when moving to the more precise tracking
where kernel interrupts always expect context to be in kernel mode.

The patch stands on its own just fine, but I'll reword slightly and
move it in the series to where it's needed.

Thanks,
Nick

> 
> Cc: kvm-ppc@vger.kernel.org
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/kvm/book3s_hv.c | 6 ++++--
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 6f612d240392..d348e77cee20 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -3407,8 +3407,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
>  
>  	kvmppc_set_host_core(pcpu);
>  
> +	guest_exit_irqoff();
> +
>  	local_irq_enable();
> -	guest_exit();
>  
>  	/* Let secondaries go back to the offline loop */
>  	for (i = 0; i < controlled_threads; ++i) {
> @@ -4217,8 +4218,9 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
>  
>  	kvmppc_set_host_core(pcpu);
>  
> +	guest_exit_irqoff();
> +
>  	local_irq_enable();
> -	guest_exit();
>  
>  	cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest);
>  
> -- 
> 2.23.0
> 
> 

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox