LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 06/16] powerpc/pseries: lparcfg don't include slb_size line in radix mode
From: Nicholas Piggin @ 2021-10-21  3:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211021035417.2157804-1-npiggin@gmail.com>

This avoids a change in behaviour in the later patch making hash
support configurable. This is possibly a user interface change, so
the alternative would be a hard-coded slb_size=0 here.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/platforms/pseries/lparcfg.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index f71eac74ea92..3354c00914fa 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -532,7 +532,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 		   lppaca_shared_proc(get_lppaca()));
 
 #ifdef CONFIG_PPC_BOOK3S_64
-	seq_printf(m, "slb_size=%d\n", mmu_slb_size);
+	if (!radix_enabled())
+		seq_printf(m, "slb_size=%d\n", mmu_slb_size);
 #endif
 	parse_em_data(m);
 	maxmem_data(m);
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 05/16] powerpc/pseries: move pseries_lpar_register_process_table() out from hash specific code
From: Nicholas Piggin @ 2021-10-21  3:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211021035417.2157804-1-npiggin@gmail.com>

This reduces ifdefs in a later change making hash support configurable.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/platforms/pseries/lpar.c | 56 +++++++++++++--------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 3df6bdfea475..06d6a824c0dc 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -712,6 +712,34 @@ void vpa_init(int cpu)
 
 #ifdef CONFIG_PPC_BOOK3S_64
 
+static int pseries_lpar_register_process_table(unsigned long base,
+			unsigned long page_size, unsigned long table_size)
+{
+	long rc;
+	unsigned long flags = 0;
+
+	if (table_size)
+		flags |= PROC_TABLE_NEW;
+	if (radix_enabled()) {
+		flags |= PROC_TABLE_RADIX;
+		if (mmu_has_feature(MMU_FTR_GTSE))
+			flags |= PROC_TABLE_GTSE;
+	} else
+		flags |= PROC_TABLE_HPT_SLB;
+	for (;;) {
+		rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
+					page_size, table_size);
+		if (!H_IS_LONG_BUSY(rc))
+			break;
+		mdelay(get_longbusy_msecs(rc));
+	}
+	if (rc != H_SUCCESS) {
+		pr_err("Failed to register process table (rc=%ld)\n", rc);
+		BUG();
+	}
+	return rc;
+}
+
 static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 				     unsigned long vpn, unsigned long pa,
 				     unsigned long rflags, unsigned long vflags,
@@ -1680,34 +1708,6 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
 	return 0;
 }
 
-static int pseries_lpar_register_process_table(unsigned long base,
-			unsigned long page_size, unsigned long table_size)
-{
-	long rc;
-	unsigned long flags = 0;
-
-	if (table_size)
-		flags |= PROC_TABLE_NEW;
-	if (radix_enabled()) {
-		flags |= PROC_TABLE_RADIX;
-		if (mmu_has_feature(MMU_FTR_GTSE))
-			flags |= PROC_TABLE_GTSE;
-	} else
-		flags |= PROC_TABLE_HPT_SLB;
-	for (;;) {
-		rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
-					page_size, table_size);
-		if (!H_IS_LONG_BUSY(rc))
-			break;
-		mdelay(get_longbusy_msecs(rc));
-	}
-	if (rc != H_SUCCESS) {
-		pr_err("Failed to register process table (rc=%ld)\n", rc);
-		BUG();
-	}
-	return rc;
-}
-
 void __init hpte_init_pseries(void)
 {
 	mmu_hash_ops.hpte_invalidate	 = pSeries_lpar_hpte_invalidate;
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 04/16] powerpc/64s: Move and rename do_bad_slb_fault as it is not hash specific
From: Nicholas Piggin @ 2021-10-21  3:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211021035417.2157804-1-npiggin@gmail.com>

slb.c is hash-specific SLB management, but do_bad_slb_fault deals with
segment interrupts that occur with radix MMU as well.
---
 arch/powerpc/include/asm/interrupt.h |  2 +-
 arch/powerpc/kernel/exceptions-64s.S |  4 ++--
 arch/powerpc/mm/book3s64/slb.c       | 16 ----------------
 arch/powerpc/mm/fault.c              | 17 +++++++++++++++++
 4 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index a1d238255f07..3487aab12229 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -564,7 +564,7 @@ DECLARE_INTERRUPT_HANDLER(kernel_bad_stack);
 
 /* slb.c */
 DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault);
-DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault);
+DECLARE_INTERRUPT_HANDLER(do_bad_segment_interrupt);
 
 /* hash_utils.c */
 DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index eaf1f72131a1..046c99e31d01 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1430,7 +1430,7 @@ MMU_FTR_SECTION_ELSE
 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 	std	r3,RESULT(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	do_bad_slb_fault
+	bl	do_bad_segment_interrupt
 	b	interrupt_return_srr
 
 
@@ -1510,7 +1510,7 @@ MMU_FTR_SECTION_ELSE
 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 	std	r3,RESULT(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	do_bad_slb_fault
+	bl	do_bad_segment_interrupt
 	b	interrupt_return_srr
 
 
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index f0037bcc47a0..31f4cef3adac 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -868,19 +868,3 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
 		return err;
 	}
 }
-
-DEFINE_INTERRUPT_HANDLER(do_bad_slb_fault)
-{
-	int err = regs->result;
-
-	if (err == -EFAULT) {
-		if (user_mode(regs))
-			_exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
-		else
-			bad_page_fault(regs, SIGSEGV);
-	} else if (err == -EINVAL) {
-		unrecoverable_exception(regs);
-	} else {
-		BUG();
-	}
-}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index a8d0ce85d39a..53ddcae0ac9e 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -35,6 +35,7 @@
 #include <linux/kfence.h>
 #include <linux/pkeys.h>
 
+#include <asm/asm-prototypes.h>
 #include <asm/firmware.h>
 #include <asm/interrupt.h>
 #include <asm/page.h>
@@ -620,4 +621,20 @@ DEFINE_INTERRUPT_HANDLER(do_bad_page_fault_segv)
 {
 	bad_page_fault(regs, SIGSEGV);
 }
+
+DEFINE_INTERRUPT_HANDLER(do_bad_segment_interrupt)
+{
+	int err = regs->result;
+
+	if (err == -EFAULT) {
+		if (user_mode(regs))
+			_exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
+		else
+			bad_page_fault(regs, SIGSEGV);
+	} else if (err == -EINVAL) {
+		unrecoverable_exception(regs);
+	} else {
+		BUG();
+	}
+}
 #endif
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 03/16] powerpc/pseries: Stop selecting PPC_HASH_MMU_NATIVE
From: Nicholas Piggin @ 2021-10-21  3:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211021035417.2157804-1-npiggin@gmail.com>

The pseries platform does not use the native hash code but the PAPR
virtualised hash interfaces, so remove PPC_HASH_MMU_NATIVE.

This requires moving tlbiel code from hash_native.c to hash_utils.c.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/book3s/64/tlbflush.h |   4 -
 arch/powerpc/mm/book3s64/hash_native.c        | 104 ------------------
 arch/powerpc/mm/book3s64/hash_utils.c         | 104 ++++++++++++++++++
 arch/powerpc/platforms/pseries/Kconfig        |   1 -
 4 files changed, 104 insertions(+), 109 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 215973b4cb26..d2e80f178b6d 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -14,7 +14,6 @@ enum {
 	TLB_INVAL_SCOPE_LPID = 1,	/* invalidate TLBs for current LPID */
 };
 
-#ifdef CONFIG_PPC_NATIVE
 static inline void tlbiel_all(void)
 {
 	/*
@@ -30,9 +29,6 @@ static inline void tlbiel_all(void)
 	else
 		hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL);
 }
-#else
-static inline void tlbiel_all(void) { BUG(); }
-#endif
 
 static inline void tlbiel_all_lpid(bool radix)
 {
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index d8279bfe68ea..d2a320828c0b 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -43,110 +43,6 @@
 
 static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
 
-static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
-{
-	unsigned long rb;
-
-	rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
-
-	asm volatile("tlbiel %0" : : "r" (rb));
-}
-
-/*
- * tlbiel instruction for hash, set invalidation
- * i.e., r=1 and is=01 or is=10 or is=11
- */
-static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
-					unsigned int pid,
-					unsigned int ric, unsigned int prs)
-{
-	unsigned long rb;
-	unsigned long rs;
-	unsigned int r = 0; /* hash format */
-
-	rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
-	rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
-
-	asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
-		     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r)
-		     : "memory");
-}
-
-
-static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
-{
-	unsigned int set;
-
-	asm volatile("ptesync": : :"memory");
-
-	for (set = 0; set < num_sets; set++)
-		tlbiel_hash_set_isa206(set, is);
-
-	ppc_after_tlbiel_barrier();
-}
-
-static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
-{
-	unsigned int set;
-
-	asm volatile("ptesync": : :"memory");
-
-	/*
-	 * Flush the partition table cache if this is HV mode.
-	 */
-	if (early_cpu_has_feature(CPU_FTR_HVMODE))
-		tlbiel_hash_set_isa300(0, is, 0, 2, 0);
-
-	/*
-	 * Now invalidate the process table cache. UPRT=0 HPT modes (what
-	 * current hardware implements) do not use the process table, but
-	 * add the flushes anyway.
-	 *
-	 * From ISA v3.0B p. 1078:
-	 *     The following forms are invalid.
-	 *      * PRS=1, R=0, and RIC!=2 (The only process-scoped
-	 *        HPT caching is of the Process Table.)
-	 */
-	tlbiel_hash_set_isa300(0, is, 0, 2, 1);
-
-	/*
-	 * Then flush the sets of the TLB proper. Hash mode uses
-	 * partition scoped TLB translations, which may be flushed
-	 * in !HV mode.
-	 */
-	for (set = 0; set < num_sets; set++)
-		tlbiel_hash_set_isa300(set, is, 0, 0, 0);
-
-	ppc_after_tlbiel_barrier();
-
-	asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
-}
-
-void hash__tlbiel_all(unsigned int action)
-{
-	unsigned int is;
-
-	switch (action) {
-	case TLB_INVAL_SCOPE_GLOBAL:
-		is = 3;
-		break;
-	case TLB_INVAL_SCOPE_LPID:
-		is = 2;
-		break;
-	default:
-		BUG();
-	}
-
-	if (early_cpu_has_feature(CPU_FTR_ARCH_300))
-		tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is);
-	else if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
-		tlbiel_all_isa206(POWER8_TLB_SETS, is);
-	else if (early_cpu_has_feature(CPU_FTR_ARCH_206))
-		tlbiel_all_isa206(POWER7_TLB_SETS, is);
-	else
-		WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
-}
-
 static inline unsigned long  ___tlbie(unsigned long vpn, int psize,
 						int apsize, int ssize)
 {
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index ebe3044711ce..ffc52ff0b3f0 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -175,6 +175,110 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = {
 	},
 };
 
+static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
+{
+	unsigned long rb;
+
+	rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+
+	asm volatile("tlbiel %0" : : "r" (rb));
+}
+
+/*
+ * tlbiel instruction for hash, set invalidation
+ * i.e., r=1 and is=01 or is=10 or is=11
+ */
+static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
+					unsigned int pid,
+					unsigned int ric, unsigned int prs)
+{
+	unsigned long rb;
+	unsigned long rs;
+	unsigned int r = 0; /* hash format */
+
+	rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+	rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
+
+	asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
+		     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r)
+		     : "memory");
+}
+
+
+static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
+{
+	unsigned int set;
+
+	asm volatile("ptesync": : :"memory");
+
+	for (set = 0; set < num_sets; set++)
+		tlbiel_hash_set_isa206(set, is);
+
+	ppc_after_tlbiel_barrier();
+}
+
+static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
+{
+	unsigned int set;
+
+	asm volatile("ptesync": : :"memory");
+
+	/*
+	 * Flush the partition table cache if this is HV mode.
+	 */
+	if (early_cpu_has_feature(CPU_FTR_HVMODE))
+		tlbiel_hash_set_isa300(0, is, 0, 2, 0);
+
+	/*
+	 * Now invalidate the process table cache. UPRT=0 HPT modes (what
+	 * current hardware implements) do not use the process table, but
+	 * add the flushes anyway.
+	 *
+	 * From ISA v3.0B p. 1078:
+	 *     The following forms are invalid.
+	 *      * PRS=1, R=0, and RIC!=2 (The only process-scoped
+	 *        HPT caching is of the Process Table.)
+	 */
+	tlbiel_hash_set_isa300(0, is, 0, 2, 1);
+
+	/*
+	 * Then flush the sets of the TLB proper. Hash mode uses
+	 * partition scoped TLB translations, which may be flushed
+	 * in !HV mode.
+	 */
+	for (set = 0; set < num_sets; set++)
+		tlbiel_hash_set_isa300(set, is, 0, 0, 0);
+
+	ppc_after_tlbiel_barrier();
+
+	asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
+}
+
+void hash__tlbiel_all(unsigned int action)
+{
+	unsigned int is;
+
+	switch (action) {
+	case TLB_INVAL_SCOPE_GLOBAL:
+		is = 3;
+		break;
+	case TLB_INVAL_SCOPE_LPID:
+		is = 2;
+		break;
+	default:
+		BUG();
+	}
+
+	if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+		tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is);
+	else if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+		tlbiel_all_isa206(POWER8_TLB_SETS, is);
+	else if (early_cpu_has_feature(CPU_FTR_ARCH_206))
+		tlbiel_all_isa206(POWER7_TLB_SETS, is);
+	else
+		WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
+}
+
 /*
  * 'R' and 'C' update notes:
  *  - Under pHyp or KVM, the updatepp path will not set C, thus it *will*
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 69a1ff8c079b..98ab9697ab5e 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -17,7 +17,6 @@ config PPC_PSERIES
 	select PPC_RTAS_DAEMON
 	select RTAS_ERROR_LOGGING
 	select PPC_UDBG_16550
-	select PPC_HASH_MMU_NATIVE
 	select PPC_DOORBELL
 	select HOTPLUG_CPU
 	select ARCH_RANDOM
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 02/16] powerpc: Rename PPC_NATIVE to PPC_HASH_MMU_NATIVE
From: Nicholas Piggin @ 2021-10-21  3:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211021035417.2157804-1-npiggin@gmail.com>

PPC_NATIVE now only controls the native HPT code, so rename it to be
more descriptive. Restrict it to Book3S only.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/mm/book3s64/Makefile          | 2 +-
 arch/powerpc/mm/book3s64/hash_utils.c      | 2 +-
 arch/powerpc/platforms/52xx/Kconfig        | 2 +-
 arch/powerpc/platforms/Kconfig             | 4 ++--
 arch/powerpc/platforms/cell/Kconfig        | 2 +-
 arch/powerpc/platforms/chrp/Kconfig        | 2 +-
 arch/powerpc/platforms/embedded6xx/Kconfig | 2 +-
 arch/powerpc/platforms/maple/Kconfig       | 2 +-
 arch/powerpc/platforms/microwatt/Kconfig   | 2 +-
 arch/powerpc/platforms/pasemi/Kconfig      | 2 +-
 arch/powerpc/platforms/powermac/Kconfig    | 2 +-
 arch/powerpc/platforms/powernv/Kconfig     | 2 +-
 arch/powerpc/platforms/pseries/Kconfig     | 2 +-
 13 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile
index 1b56d3af47d4..319f4b7f3357 100644
--- a/arch/powerpc/mm/book3s64/Makefile
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -6,7 +6,7 @@ CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
 
 obj-y				+= hash_pgtable.o hash_utils.o slb.o \
 				   mmu_context.o pgtable.o hash_tlb.o
-obj-$(CONFIG_PPC_NATIVE)	+= hash_native.o
+obj-$(CONFIG_PPC_HASH_MMU_NATIVE)	+= hash_native.o
 obj-$(CONFIG_PPC_RADIX_MMU)	+= radix_pgtable.o radix_tlb.o
 obj-$(CONFIG_PPC_4K_PAGES)	+= hash_4k.o
 obj-$(CONFIG_PPC_64K_PAGES)	+= hash_64k.o
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index c145776d3ae5..ebe3044711ce 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1091,7 +1091,7 @@ void __init hash__early_init_mmu(void)
 		ps3_early_mm_init();
 	else if (firmware_has_feature(FW_FEATURE_LPAR))
 		hpte_init_pseries();
-	else if (IS_ENABLED(CONFIG_PPC_NATIVE))
+	else if (IS_ENABLED(CONFIG_PPC_HASH_MMU_NATIVE))
 		hpte_init_native();
 
 	if (!mmu_hash_ops.hpte_insert)
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
index 99d60acc20c8..b72ed2950ca8 100644
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -34,7 +34,7 @@ config PPC_EFIKA
 	bool "bPlan Efika 5k2. MPC5200B based computer"
 	depends on PPC_MPC52xx
 	select PPC_RTAS
-	select PPC_NATIVE
+	select PPC_HASH_MMU_NATIVE
 
 config PPC_LITE5200
 	bool "Freescale Lite5200 Eval Board"
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index e02d29a9d12f..d41dad227de8 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -40,9 +40,9 @@ config EPAPR_PARAVIRT
 
 	  In case of doubt, say Y
 
-config PPC_NATIVE
+config PPC_HASH_MMU_NATIVE
 	bool
-	depends on PPC_BOOK3S_32 || PPC64
+	depends on PPC_BOOK3S
 	help
 	  Support for running natively on the hardware, i.e. without
 	  a hypervisor. This option is not user-selectable but should
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index cb70c5f25bc6..db4465c51b56 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -8,7 +8,7 @@ config PPC_CELL_COMMON
 	select PPC_DCR_MMIO
 	select PPC_INDIRECT_PIO
 	select PPC_INDIRECT_MMIO
-	select PPC_NATIVE
+	select PPC_HASH_MMU_NATIVE
 	select PPC_RTAS
 	select IRQ_EDGE_EOI_HANDLER
 
diff --git a/arch/powerpc/platforms/chrp/Kconfig b/arch/powerpc/platforms/chrp/Kconfig
index 9b5c5505718a..ff30ed579a39 100644
--- a/arch/powerpc/platforms/chrp/Kconfig
+++ b/arch/powerpc/platforms/chrp/Kconfig
@@ -11,6 +11,6 @@ config PPC_CHRP
 	select RTAS_ERROR_LOGGING
 	select PPC_MPC106
 	select PPC_UDBG_16550
-	select PPC_NATIVE
+	select PPC_HASH_MMU_NATIVE
 	select FORCE_PCI
 	default y
diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig
index 4c6d703a4284..c54786f8461e 100644
--- a/arch/powerpc/platforms/embedded6xx/Kconfig
+++ b/arch/powerpc/platforms/embedded6xx/Kconfig
@@ -55,7 +55,7 @@ config MVME5100
 	select FORCE_PCI
 	select PPC_INDIRECT_PCI
 	select PPC_I8259
-	select PPC_NATIVE
+	select PPC_HASH_MMU_NATIVE
 	select PPC_UDBG_16550
 	help
 	  This option enables support for the Motorola (now Emerson) MVME5100
diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig
index 86ae210bee9a..7fd84311ade5 100644
--- a/arch/powerpc/platforms/maple/Kconfig
+++ b/arch/powerpc/platforms/maple/Kconfig
@@ -9,7 +9,7 @@ config PPC_MAPLE
 	select GENERIC_TBSYNC
 	select PPC_UDBG_16550
 	select PPC_970_NAP
-	select PPC_NATIVE
+	select PPC_HASH_MMU_NATIVE
 	select PPC_RTAS
 	select MMIO_NVRAM
 	select ATA_NONSTANDARD if ATA
diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig
index 8f6a81978461..62b51e37fc05 100644
--- a/arch/powerpc/platforms/microwatt/Kconfig
+++ b/arch/powerpc/platforms/microwatt/Kconfig
@@ -5,7 +5,7 @@ config PPC_MICROWATT
 	select PPC_XICS
 	select PPC_ICS_NATIVE
 	select PPC_ICP_NATIVE
-	select PPC_NATIVE
+	select PPC_HASH_MMU_NATIVE
 	select PPC_UDBG_16550
 	select ARCH_RANDOM
 	help
diff --git a/arch/powerpc/platforms/pasemi/Kconfig b/arch/powerpc/platforms/pasemi/Kconfig
index c52731a7773f..bc7137353a7f 100644
--- a/arch/powerpc/platforms/pasemi/Kconfig
+++ b/arch/powerpc/platforms/pasemi/Kconfig
@@ -5,7 +5,7 @@ config PPC_PASEMI
 	select MPIC
 	select FORCE_PCI
 	select PPC_UDBG_16550
-	select PPC_NATIVE
+	select PPC_HASH_MMU_NATIVE
 	select MPIC_BROKEN_REGREAD
 	help
 	  This option enables support for PA Semi's PWRficient line
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
index b97bf12801eb..2b56df145b82 100644
--- a/arch/powerpc/platforms/powermac/Kconfig
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -6,7 +6,7 @@ config PPC_PMAC
 	select FORCE_PCI
 	select PPC_INDIRECT_PCI if PPC32
 	select PPC_MPC106 if PPC32
-	select PPC_NATIVE
+	select PPC_HASH_MMU_NATIVE
 	select ZONE_DMA if PPC32
 	default y
 
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 043eefbbdd28..cd754e116184 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -2,7 +2,7 @@
 config PPC_POWERNV
 	depends on PPC64 && PPC_BOOK3S
 	bool "IBM PowerNV (Non-Virtualized) platform support"
-	select PPC_NATIVE
+	select PPC_HASH_MMU_NATIVE
 	select PPC_XICS
 	select PPC_ICP_NATIVE
 	select PPC_XIVE_NATIVE
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 5e037df2a3a1..69a1ff8c079b 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -17,7 +17,7 @@ config PPC_PSERIES
 	select PPC_RTAS_DAEMON
 	select RTAS_ERROR_LOGGING
 	select PPC_UDBG_16550
-	select PPC_NATIVE
+	select PPC_HASH_MMU_NATIVE
 	select PPC_DOORBELL
 	select HOTPLUG_CPU
 	select ARCH_RANDOM
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 01/16] powerpc: Remove unused FW_FEATURE_NATIVE references
From: Nicholas Piggin @ 2021-10-21  3:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20211021035417.2157804-1-npiggin@gmail.com>

FW_FEATURE_NATIVE_ALWAYS and FW_FEATURE_NATIVE_POSSIBLE are always
zero and never do anything. Remove them.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/firmware.h | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 97a3bd9ffeb9..9b702d2b80fb 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -80,8 +80,6 @@ enum {
 	FW_FEATURE_POWERNV_ALWAYS = 0,
 	FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
 	FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
-	FW_FEATURE_NATIVE_POSSIBLE = 0,
-	FW_FEATURE_NATIVE_ALWAYS = 0,
 	FW_FEATURE_POSSIBLE =
 #ifdef CONFIG_PPC_PSERIES
 		FW_FEATURE_PSERIES_POSSIBLE |
@@ -91,9 +89,6 @@ enum {
 #endif
 #ifdef CONFIG_PPC_PS3
 		FW_FEATURE_PS3_POSSIBLE |
-#endif
-#ifdef CONFIG_PPC_NATIVE
-		FW_FEATURE_NATIVE_ALWAYS |
 #endif
 		0,
 	FW_FEATURE_ALWAYS =
@@ -105,9 +100,6 @@ enum {
 #endif
 #ifdef CONFIG_PPC_PS3
 		FW_FEATURE_PS3_ALWAYS &
-#endif
-#ifdef CONFIG_PPC_NATIVE
-		FW_FEATURE_NATIVE_ALWAYS &
 #endif
 		FW_FEATURE_POSSIBLE,
 
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 00/16] powerpc: Make hash MMU code build configurable
From: Nicholas Piggin @ 2021-10-21  3:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

Now that there's a platform that can make good use of it, here's
a series that can prevent the hash MMU code being built for 64s
platforms that don't need it.

Since v1:
- Split out most of the Kconfig change from the conditional compilation
  changes.
- Split out several more changes into preparatory patches.
- Reduced some ifdefs.
- Caught a few missing hash bits: pgtable dump, lkdtm,
  memremap_compat_align.

Since RFC:
- Split out large code movement from other changes.
- Used mmu ftr test constant folding rather than adding new constant
  true/false for radix_enabled().
- Restore tlbie trace point that had to be commented out in the
  previous.
- Avoid minor (probably unreachable) behaviour change in machine check
  handler when hash was not compiled.
- Fix microwatt updates so !HASH is not enforced.
- Rebase, build fixes.

Thanks,
Nick

Nicholas Piggin (16):
  powerpc: Remove unused FW_FEATURE_NATIVE references
  powerpc: Rename PPC_NATIVE to PPC_HASH_MMU_NATIVE
  powerpc/pseries: Stop selecting PPC_HASH_MMU_NATIVE
  powerpc/64s: Move and rename do_bad_slb_fault as it is not hash
    specific
  powerpc/pseries: move pseries_lpar_register_process_table() out from
    hash specific code
  powerpc/pseries: lparcfg don't include slb_size line in radix mode
  powerpc/64s: move THP trace point creation out of hash specific file
  powerpc/64s: Make flush_and_reload_slb a no-op when radix is enabled
  powerpc/64s: move page size definitions from hash specific file
  powerpc/64s: Rename hash_hugetlbpage.c to hugetlbpage.c
  powerpc/64: pcpu setup avoid reading mmu_linear_psize on 64e or radix
  powerpc/64e: remove mmu_linear_psize
  powerpc/64s: Move hash MMU code under a new Kconfig name
  powerpc/64s: Make hash MMU support configurable
  powerpc/configs/microwatt: add POWER9_CPU
  powerpc/microwatt: Don't select the hash MMU code

 arch/powerpc/Kconfig                          |   3 +-
 arch/powerpc/configs/microwatt_defconfig      |   2 +-
 arch/powerpc/include/asm/book3s/64/mmu.h      |  19 ++-
 .../include/asm/book3s/64/tlbflush-hash.h     |   7 ++
 arch/powerpc/include/asm/book3s/64/tlbflush.h |   4 -
 arch/powerpc/include/asm/book3s/pgtable.h     |   4 +
 arch/powerpc/include/asm/firmware.h           |   8 --
 arch/powerpc/include/asm/interrupt.h          |   2 +-
 arch/powerpc/include/asm/mmu.h                |  16 ++-
 arch/powerpc/include/asm/mmu_context.h        |   2 +
 arch/powerpc/include/asm/paca.h               |   8 ++
 arch/powerpc/kernel/asm-offsets.c             |   2 +
 arch/powerpc/kernel/dt_cpu_ftrs.c             |  14 ++-
 arch/powerpc/kernel/entry_64.S                |   4 +-
 arch/powerpc/kernel/exceptions-64s.S          |  20 +++-
 arch/powerpc/kernel/mce.c                     |   2 +-
 arch/powerpc/kernel/mce_power.c               |  16 ++-
 arch/powerpc/kernel/paca.c                    |  18 ++-
 arch/powerpc/kernel/process.c                 |  13 +-
 arch/powerpc/kernel/prom.c                    |   2 +
 arch/powerpc/kernel/setup_64.c                |  26 +++-
 arch/powerpc/kexec/core_64.c                  |   4 +-
 arch/powerpc/kexec/ranges.c                   |   4 +
 arch/powerpc/kvm/Kconfig                      |   1 +
 arch/powerpc/mm/book3s64/Makefile             |  19 +--
 arch/powerpc/mm/book3s64/hash_native.c        | 104 ----------------
 arch/powerpc/mm/book3s64/hash_pgtable.c       |   1 -
 arch/powerpc/mm/book3s64/hash_utils.c         | 111 +++++++++++++++++-
 .../{hash_hugetlbpage.c => hugetlbpage.c}     |   2 +
 arch/powerpc/mm/book3s64/mmu_context.c        |  34 +++++-
 arch/powerpc/mm/book3s64/pgtable.c            |   8 ++
 arch/powerpc/mm/book3s64/radix_pgtable.c      |   4 +
 arch/powerpc/mm/book3s64/slb.c                |  16 ---
 arch/powerpc/mm/book3s64/trace.c              |   8 ++
 arch/powerpc/mm/copro_fault.c                 |   2 +
 arch/powerpc/mm/fault.c                       |  17 +++
 arch/powerpc/mm/ioremap.c                     |  19 ++-
 arch/powerpc/mm/nohash/tlb.c                  |   9 --
 arch/powerpc/mm/pgtable.c                     |  10 +-
 arch/powerpc/mm/ptdump/Makefile               |   2 +-
 arch/powerpc/platforms/52xx/Kconfig           |   2 +-
 arch/powerpc/platforms/Kconfig                |   4 +-
 arch/powerpc/platforms/Kconfig.cputype        |  21 +++-
 arch/powerpc/platforms/cell/Kconfig           |   3 +-
 arch/powerpc/platforms/chrp/Kconfig           |   2 +-
 arch/powerpc/platforms/embedded6xx/Kconfig    |   2 +-
 arch/powerpc/platforms/maple/Kconfig          |   3 +-
 arch/powerpc/platforms/microwatt/Kconfig      |   1 -
 arch/powerpc/platforms/pasemi/Kconfig         |   3 +-
 arch/powerpc/platforms/powermac/Kconfig       |   3 +-
 arch/powerpc/platforms/powernv/Kconfig        |   2 +-
 arch/powerpc/platforms/powernv/idle.c         |   2 +
 arch/powerpc/platforms/powernv/setup.c        |   2 +
 arch/powerpc/platforms/pseries/Kconfig        |   1 -
 arch/powerpc/platforms/pseries/lpar.c         |  67 ++++++-----
 arch/powerpc/platforms/pseries/lparcfg.c      |   5 +-
 arch/powerpc/platforms/pseries/mobility.c     |   6 +
 arch/powerpc/platforms/pseries/ras.c          |   2 +
 arch/powerpc/platforms/pseries/reconfig.c     |   2 +
 arch/powerpc/platforms/pseries/setup.c        |   6 +-
 arch/powerpc/xmon/xmon.c                      |   8 +-
 drivers/misc/lkdtm/Makefile                   |   2 +-
 drivers/misc/lkdtm/core.c                     |   2 +-
 63 files changed, 448 insertions(+), 270 deletions(-)
 rename arch/powerpc/mm/book3s64/{hash_hugetlbpage.c => hugetlbpage.c} (99%)
 create mode 100644 arch/powerpc/mm/book3s64/trace.c

-- 
2.23.0


^ permalink raw reply

* Re: [PATCH] ibmvscsi: use GFP_KERNEL with dma_alloc_coherent in initialize_event_pool
From: Martin K. Petersen @ 2021-10-21  3:42 UTC (permalink / raw)
  To: james.bottomley, Tyrel Datwyler
  Cc: brking, linuxppc-dev, linux-scsi, Martin K . Petersen
In-Reply-To: <1547089149-20577-1-git-send-email-tyreld@linux.vnet.ibm.com>

On Wed, 9 Jan 2019 18:59:09 -0800, Tyrel Datwyler wrote:

> During driver probe we allocate a dma region for our event pool.
> Currently, zero is passed for the gfp_flags parameter. Driver probe
> callbacks run in process context and we hold no locks so we can sleep
> here if necessary.
> 
> Fix by passing GFP_KERNEL explicitly to dma_alloc_coherent().
> 
> [...]

Applied to 5.16/scsi-queue, thanks!

[1/1] ibmvscsi: use GFP_KERNEL with dma_alloc_coherent in initialize_event_pool
      https://git.kernel.org/mkp/scsi/c/3319a8ba82b9

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply

* Re: [V4 0/2] tools/perf: Add instruction and data address registers to extended regs in powerpc
From: Athira Rajeev @ 2021-10-21  2:52 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: maddy, rnsastry, linux-perf-users, Jiri Olsa, kjain, linuxppc-dev
In-Reply-To: <YW7yjZE8LKvgjapw@kernel.org>



> On 19-Oct-2021, at 10:00 PM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
> 
> Em Mon, Oct 18, 2021 at 05:19:46PM +0530, Athira Rajeev escreveu:
>> Patch set adds PMU registers namely Sampled Instruction Address Register
>> (SIAR) and Sampled Data Address Register (SDAR) as part of extended regs
>> in PowerPC. These registers provides the instruction/data address and
>> adding these to extended regs helps in debug purposes.
>> 
>> Patch 1/2 refactors the existing macro definition of
>> PERF_REG_PMU_MASK_300 and PERF_REG_PMU_MASK_31 to make it more
>> readable.
>> Patch 2/2 includes perf tools side changes to add the SPRs to
>> sample_reg_mask to use with -I? option.
> 
> Thanks, applied.
> 
> - Arnaldo

Thanks Arnaldo.

Athira
> 
> 
>> Changelog:
>> Change from v3 -> v4:
>> - Spilt tools side patches separately since kernel side
>>  changes are in powerpc/next. There is no code wise changes
>>  from v3.
>>  Link to previous version:
>>  https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=265811&state=*
>> 
>>  Kernel patches are taken to powerpc/next:
>>  [1/4] powerpc/perf: Refactor the code definition of perf reg extended mask
>>  https://git.kernel.org/powerpc/c/02b182e67482d9167a13a0ff19b55037b70b21ad
>>  [3/4] powerpc/perf: Expose instruction and data address registers as part of extended regs
>>  https://git.kernel.org/powerpc/c/29908bbf7b8960d261dfdd428bbaa656275e80f3
>> 
>> Change from v2 -> v3:
>> Addressed review comments from Michael Ellerman
>> - Fixed the macro definition to use "unsigned long long"
>>  which otherwise will cause build error with perf on
>>  32-bit.
>> - Added Reviewed-by from Daniel Axtens for patch3.
>> 
>> Change from v1 -> v2:
>> Addressed review comments from Michael Ellerman
>> - Refactored the perf reg extended mask value macros for
>>  PERF_REG_PMU_MASK_300 and PERF_REG_PMU_MASK_31 to
>>  make it more readable. Also moved PERF_REG_EXTENDED_MAX
>>  along with enum definition similar to PERF_REG_POWERPC_MAX.
>> 
>> Athira Rajeev (2):
>>  tools/perf: Refactor the code definition of perf reg extended mask in
>>    tools side header file
>>  tools/perf: Add perf tools support to expose instruction and data
>>    address registers as part of extended regs
>> 
>> .../arch/powerpc/include/uapi/asm/perf_regs.h | 28 ++++++++++++-------
>> tools/perf/arch/powerpc/include/perf_regs.h   |  2 ++
>> tools/perf/arch/powerpc/util/perf_regs.c      |  2 ++
>> 3 files changed, 22 insertions(+), 10 deletions(-)
>> 
>> -- 
>> 2.33.0
> 
> -- 
> 
> - Arnaldo


^ permalink raw reply

* Re: [PATCH v4 5/8] PCI/DPC: Converge EDR and DPC Path of clearing AER registers
From: Bjorn Helgaas @ 2021-10-21  2:09 UTC (permalink / raw)
  To: Naveen Naidu
  Cc: Oza Pawandeep, linux-pci, Sinan Kaya, linux-kernel, Keith Busch,
	oohall, bhelgaas, linuxppc-dev, linux-kernel-mentees
In-Reply-To: <0a443323ab64ba8c0fc6caa03ca56ecd4d038ea3.1633453452.git.naveennaidu479@gmail.com>

[+cc Keith, Sinan, Oza]

On Tue, Oct 05, 2021 at 10:48:12PM +0530, Naveen Naidu wrote:
> In the EDR path, AER registers are cleared *after* DPC error event is
> processed. The process stack in EDR is:
> 
>   edr_handle_event()
>     dpc_process_error()
>     pci_aer_raw_clear_status()
>     pcie_do_recovery()
> 
> But in DPC path, AER status registers are cleared *while* processing
> the error. The process stack in DPC is:
> 
>   dpc_handler()
>     dpc_process_error()
>       pci_aer_clear_status()
>     pcie_do_recovery()

These are accurate but they both include dpc_process_error(), so we
need a hint to show why the one here is different from the one in the
EDR path, e.g.,

  dpc_handler
    dpc_process_error
      if (reason == 0)
        pci_aer_clear_status    # uncorrectable errors only
    pcie_do_recovery

> In EDR path, AER status registers are cleared irrespective of whether
> the error was an RP PIO or unmasked uncorrectable error. But in DPC, the
> AER status registers are cleared only when it's an unmasked uncorrectable
> error.
> 
> This leads to two different behaviours for the same task (handling of
> DPC errors) in FFS systems and when native OS has control.

FFS?

I'd really like to have a specific example of how a user would observe
this difference.  I know you probably don't have two systems to
compare like that, but maybe we can work it out manually.

I guess you're saying the problem is in the native DPC handling, and
we don't clear the AER status registers for ERR_NONFATAL,
ERR_NONFATAL, etc., right?

I think the current behavior is from 8aefa9b0d910 ("PCI/DPC: Print AER
status in DPC event handling"), where Keith explicitly mentions those
cases.  The commit log here should connect back to that and explain
whether something has changed.

I cc'd Keith and the reviewers of that change in case any of them have
time to dig into this again.

> Bring the same semantics for clearing the AER status register in EDR
> path and DPC path.
> 
> Signed-off-by: Naveen Naidu <naveennaidu479@gmail.com>
> ---
>  drivers/pci/pcie/dpc.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> index faf4a1e77fab..68899a3db126 100644
> --- a/drivers/pci/pcie/dpc.c
> +++ b/drivers/pci/pcie/dpc.c
> @@ -288,7 +288,6 @@ void dpc_process_error(struct pci_dev *pdev)
>  		 dpc_get_aer_uncorrect_severity(pdev, &info) &&
>  		 aer_get_device_error_info(pdev, &info)) {
>  		aer_print_error(pdev, &info);
> -		pci_aer_clear_status(pdev);
>  	}
>  }
>  
> @@ -297,6 +296,7 @@ static irqreturn_t dpc_handler(int irq, void *context)
>  	struct pci_dev *pdev = context;
>  
>  	dpc_process_error(pdev);
> +	pci_aer_clear_status(pdev);
>  
>  	/* We configure DPC so it only triggers on ERR_FATAL */
>  	pcie_do_recovery(pdev, pci_channel_io_frozen, dpc_reset_link);
> -- 
> 2.25.1
> 
> _______________________________________________
> Linux-kernel-mentees mailing list
> Linux-kernel-mentees@lists.linuxfoundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees

^ permalink raw reply

* Re: [PATCH v4 4/8] PCI/DPC: Use pci_aer_clear_status() in dpc_process_error()
From: Bjorn Helgaas @ 2021-10-21  1:40 UTC (permalink / raw)
  To: Naveen Naidu
  Cc: linux-pci, linux-kernel, oohall, bhelgaas, linuxppc-dev,
	linux-kernel-mentees
In-Reply-To: <f0d301cb1245a8e2fd9565426b87c22a97aa6de7.1633453452.git.naveennaidu479@gmail.com>

On Tue, Oct 05, 2021 at 10:48:11PM +0530, Naveen Naidu wrote:
> dpc_process_error() clears both AER fatal and non fatal status
> registers. Instead of clearing each status registers via a different
> function call use pci_aer_clear_status().
> 
> This helps clean up the code a bit.
> 
> Signed-off-by: Naveen Naidu <naveennaidu479@gmail.com>
> ---
>  drivers/pci/pcie/dpc.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> index df3f3a10f8bc..faf4a1e77fab 100644
> --- a/drivers/pci/pcie/dpc.c
> +++ b/drivers/pci/pcie/dpc.c
> @@ -288,8 +288,7 @@ void dpc_process_error(struct pci_dev *pdev)
>  		 dpc_get_aer_uncorrect_severity(pdev, &info) &&
>  		 aer_get_device_error_info(pdev, &info)) {
>  		aer_print_error(pdev, &info);
> -		pci_aer_clear_nonfatal_status(pdev);
> -		pci_aer_clear_fatal_status(pdev);
> +		pci_aer_clear_status(pdev);

The commit log suggests that this is a simple cleanup that doesn't
change any behavior, but that's not quite true:

  - The new code would clear PCI_ERR_ROOT_STATUS, but the old code
    does not.

  - The old code masks the status bits with the severity bits before
    clearing, but the new code does not.

The commit log needs to show why these changes are what we want.

>  	}
>  }
>  
> -- 
> 2.25.1
> 
> _______________________________________________
> Linux-kernel-mentees mailing list
> Linux-kernel-mentees@lists.linuxfoundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees

^ permalink raw reply

* [powerpc:fixes-test] BUILD SUCCESS 787252a10d9422f3058df9a4821f389e5326c440
From: kernel test robot @ 2021-10-21  1:30 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git fixes-test
branch HEAD: 787252a10d9422f3058df9a4821f389e5326c440  powerpc/smp: do not decrement idle task preempt count in CPU offline

elapsed time: 729m

configs tested: 137
configs skipped: 97

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
i386                 randconfig-c001-20211019
nios2                         3c120_defconfig
sparc                            alldefconfig
m68k                         amcore_defconfig
arm                      pxa255-idp_defconfig
powerpc                     taishan_defconfig
arc                          axs103_defconfig
powerpc                     mpc5200_defconfig
m68k                        mvme16x_defconfig
openrisc                            defconfig
h8300                     edosk2674_defconfig
powerpc                 mpc834x_itx_defconfig
mips                     decstation_defconfig
sh                             espt_defconfig
arc                           tb10x_defconfig
arm                         palmz72_defconfig
arm                             ezx_defconfig
h8300                            alldefconfig
sh                            hp6xx_defconfig
powerpc                  mpc885_ads_defconfig
mips                           ip27_defconfig
powerpc                      mgcoge_defconfig
powerpc                      obs600_defconfig
powerpc                  iss476-smp_defconfig
mips                      maltaaprp_defconfig
nds32                             allnoconfig
arm                         s3c6400_defconfig
powerpc                   lite5200b_defconfig
sh                           se7712_defconfig
arm                        vexpress_defconfig
riscv                    nommu_k210_defconfig
sh                 kfr2r09-romimage_defconfig
arm64                               defconfig
m68k                       bvme6000_defconfig
arm                        keystone_defconfig
h8300                               defconfig
mips                         rt305x_defconfig
mips                        workpad_defconfig
arm                          gemini_defconfig
arm                             mxs_defconfig
arm                        mini2440_defconfig
mips                      maltasmvp_defconfig
mips                             allyesconfig
arm                            mps2_defconfig
powerpc                     ppa8548_defconfig
arm                  randconfig-c002-20211019
x86_64               randconfig-c001-20211019
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                                defconfig
m68k                             allmodconfig
nios2                               defconfig
nds32                               defconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allmodconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
i386                              debian-10.3
arc                              allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
i386                 randconfig-a004-20211020
i386                 randconfig-a003-20211020
i386                 randconfig-a002-20211020
i386                 randconfig-a005-20211020
i386                 randconfig-a006-20211020
i386                 randconfig-a001-20211020
x86_64               randconfig-a015-20211019
x86_64               randconfig-a012-20211019
x86_64               randconfig-a016-20211019
x86_64               randconfig-a014-20211019
x86_64               randconfig-a013-20211019
x86_64               randconfig-a011-20211019
i386                 randconfig-a014-20211019
i386                 randconfig-a016-20211019
i386                 randconfig-a011-20211019
i386                 randconfig-a015-20211019
i386                 randconfig-a012-20211019
i386                 randconfig-a013-20211019
riscv                            allyesconfig
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
x86_64                    rhel-8.3-kselftests
um                           x86_64_defconfig
um                             i386_defconfig
x86_64                           allyesconfig
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
arm                  randconfig-c002-20211019
mips                 randconfig-c004-20211019
i386                 randconfig-c001-20211019
s390                 randconfig-c005-20211019
x86_64               randconfig-c007-20211019
riscv                randconfig-c006-20211019
powerpc              randconfig-c003-20211019
x86_64               randconfig-a004-20211019
x86_64               randconfig-a006-20211019
x86_64               randconfig-a005-20211019
x86_64               randconfig-a001-20211019
x86_64               randconfig-a002-20211019
x86_64               randconfig-a003-20211019
i386                 randconfig-a001-20211019
i386                 randconfig-a003-20211019
i386                 randconfig-a004-20211019
i386                 randconfig-a005-20211019
i386                 randconfig-a002-20211019
i386                 randconfig-a006-20211019
x86_64               randconfig-a013-20211020
x86_64               randconfig-a015-20211020
x86_64               randconfig-a011-20211020
x86_64               randconfig-a014-20211020
x86_64               randconfig-a016-20211020
x86_64               randconfig-a012-20211020
riscv                randconfig-r042-20211020
s390                 randconfig-r044-20211020
hexagon              randconfig-r045-20211020
hexagon              randconfig-r041-20211020
hexagon              randconfig-r041-20211019
hexagon              randconfig-r045-20211019

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* Re: [PATCH v4 1/8] PCI/AER: Remove ID from aer_agent_string[]
From: Bjorn Helgaas @ 2021-10-21  1:28 UTC (permalink / raw)
  To: Naveen Naidu
  Cc: linux-pci, linux-kernel, oohall, bhelgaas, linuxppc-dev,
	linux-kernel-mentees
In-Reply-To: <22b2dae2a6ac340d9d45c28481d746ec1064cd6c.1633453452.git.naveennaidu479@gmail.com>

On Tue, Oct 05, 2021 at 10:48:08PM +0530, Naveen Naidu wrote:
> Currently, we do not print the "id" field in the AER error logs. Yet the
> aer_agent_string[] has the word "id" in it. The AER error log looks
> like:
> 
>   pcieport 0000:00:03.0: PCIe Bus Error: severity=Corrected, type=Data Link Layer, (Receiver ID)
> 
> Without the "id" field in the error log, The aer_agent_string[]
> (eg: "Receiver ID") does not make sense. A user reading the
> aer_agent_string[] in the log, might inadvertently look for an "id"
> field and not finding it might lead to confusion.
> 
> Remove the "ID" from the aer_agent_string[].
> 
> The following are sample dummy errors inject via aer-inject.

I like this, and the problem it fixes was my fault because
these "ID" strings should have been removed by 010caed4ccb6.

If it's straightforward enough, it would be nice to have the
aer-inject command line here in the commit log to make it easier
for people to play with this.

> Before
> =======
> 
> In 010caed4ccb6 ("PCI/AER: Decode Error Source Requester ID"),
> the "id" field was removed from the AER error logs, so currently AER
> logs look like:
> 
>   pcieport 0000:00:03.0: AER: Corrected error received: 0000:00:03:0
>   pcieport 0000:00:03.0: PCIe Bus Error: severity=Corrected, type=Data Link Layer, (Receiver ID) <--- no id field
>   pcieport 0000:00:03.0:   device [1b36:000c] error status/mask=00000040/0000e000
>   pcieport 0000:00:03.0:    [ 6] BadTLP
> 
> After
> ======
> 
>   pcieport 0000:00:03.0: AER: Corrected error received: 0000:00:03.0
>   pcieport 0000:00:03.0: PCIe Bus Error: severity=Corrected, type=Data Link Layer, (Receiver)
>   pcieport 0000:00:03.0:   device [1b36:000c] error status/mask=00000040/0000e000
>   pcieport 0000:00:03.0:    [ 6] BadTLP
> 
> Signed-off-by: Naveen Naidu <naveennaidu479@gmail.com>
> ---
>  drivers/pci/pcie/aer.c | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index 9784fdcf3006..241ff361b43c 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -516,10 +516,10 @@ static const char *aer_uncorrectable_error_string[] = {
>  };
>  
>  static const char *aer_agent_string[] = {
> -	"Receiver ID",
> -	"Requester ID",
> -	"Completer ID",
> -	"Transmitter ID"
> +	"Receiver",
> +	"Requester",
> +	"Completer",
> +	"Transmitter"
>  };
>  
>  #define aer_stats_dev_attr(name, stats_array, strings_array,		\
> @@ -703,7 +703,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>  	const char *level;
>  
>  	if (!info->status) {
> -		pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
> +		pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent)\n",
>  			aer_error_severity_string[info->severity]);
>  		goto out;
>  	}
> -- 
> 2.25.1
> 
> _______________________________________________
> Linux-kernel-mentees mailing list
> Linux-kernel-mentees@lists.linuxfoundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees

^ permalink raw reply

* Re: [PATCH v4 3/8] PCI/DPC: Initialize info->id in dpc_process_error()
From: Bjorn Helgaas @ 2021-10-21  1:28 UTC (permalink / raw)
  To: Naveen Naidu
  Cc: linux-pci, linux-kernel, oohall, bhelgaas, linuxppc-dev,
	linux-kernel-mentees
In-Reply-To: <5ebe87f18339d7567c1d91203e7c5d31f4e65c52.1633453452.git.naveennaidu479@gmail.com>

On Tue, Oct 05, 2021 at 10:48:10PM +0530, Naveen Naidu wrote:
> In the dpc_process_error() path, info->id isn't initialized before being
> passed to aer_print_error(). In the corresponding AER path, it is
> initialized in aer_isr_one_error().
> 
> The error message shown during Coverity Scan is:
> 
>   Coverity #1461602
>   CID 1461602 (#1 of 1): Uninitialized scalar variable (UNINIT)
>   8. uninit_use_in_call: Using uninitialized value info.id when calling aer_print_error.
> 
> Initialize the "info->id" before passing it to aer_print_error()
> 
> Fixes: 8aefa9b0d910 ("PCI/DPC: Print AER status in DPC event handling")
> Signed-off-by: Naveen Naidu <naveennaidu479@gmail.com>
> ---
>  drivers/pci/pcie/dpc.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> index c556e7beafe3..df3f3a10f8bc 100644
> --- a/drivers/pci/pcie/dpc.c
> +++ b/drivers/pci/pcie/dpc.c
> @@ -262,14 +262,14 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
>  
>  void dpc_process_error(struct pci_dev *pdev)
>  {
> -	u16 cap = pdev->dpc_cap, status, source, reason, ext_reason;
> +	u16 cap = pdev->dpc_cap, status, reason, ext_reason;
>  	struct aer_err_info info;
>  
>  	pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
> -	pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source);
> +	pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &info.id);
>  
>  	pci_info(pdev, "containment event, status:%#06x source:%#06x\n",
> -		 status, source);
> +		 status, info.id);
>  
>  	reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN) >> 1;

Per PCIe r5.0, sec 7.9.15.5, the Source ID is defined only when the
Trigger Reason indicates ERR_NONFATAL or ERR_FATAL.  So I think we
need to extract this reason before reading PCI_EXP_DPC_SOURCE_ID,
e.g.,

  reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN) >> 1;
  if (reason == 1 || reason == 2)
    pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &info.id);
  else
    info.id = 0;

>  	ext_reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT) >> 5;
> -- 
> 2.25.1
> 
> _______________________________________________
> Linux-kernel-mentees mailing list
> Linux-kernel-mentees@lists.linuxfoundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/linux-kernel-mentees

^ permalink raw reply

* Re: [PATCH] KVM: PPC: Replace zero-length array with flexible array member
From: Gustavo A. R. Silva @ 2021-10-20 23:28 UTC (permalink / raw)
  To: Gustavo A. R. Silva
  Cc: Kees Cook, linux-kernel, kvm-ppc, Len Baker, linux-hardening,
	linuxppc-dev
In-Reply-To: <bb4faf3a-9fe9-280b-cb4c-e4904b0b2a8f@embeddedor.com>

On Mon, Sep 20, 2021 at 07:05:07PM -0500, Gustavo A. R. Silva wrote:
> 
> 
> On 9/18/21 09:21, Len Baker wrote:
> > There is a regular need in the kernel to provide a way to declare having
> > a dynamically sized set of trailing elements in a structure. Kernel code
> > should always use "flexible array members" [1] for these cases. The
> > older style of one-element or zero-length arrays should no longer be
> > used[2].
> > 
> > Also, make use of the struct_size() helper in kzalloc().
> > 
> > [1] https://en.wikipedia.org/wiki/Flexible_array_member
> > [2] https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays
> > 
> > Signed-off-by: Len Baker <len.baker@gmx.com>
> 
> Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>

I'm taking this in my -next tree.

Thanks
--
Gustavo

^ permalink raw reply

* [PATCH 21/20] signal: Replace force_sigsegv(SIGSEGV) with force_fatal_sig(SIGSEGV)
From: Eric W. Biederman @ 2021-10-20 21:51 UTC (permalink / raw)
  To: linux-kernel
  Cc: Rich Felker, linux-xtensa, linux-mips, Max Filippov,
	Paul Mackerras, H Peter Anvin, sparclinux, Vincent Chen,
	Thomas Gleixner, linux-arch, linux-s390, Yoshinori Sato, linux-sh,
	Christian Borntraeger, Ingo Molnar, Jonas Bonn, Kees Cook,
	Vasily Gorbik, Heiko Carstens, Stefan Kristiansson, openrisc,
	Borislav Petkov, Al Viro, Andy Lutomirski, Stafford Horne,
	Chris Zankel, Thomas Bogendoerfer, Nick Hu, linuxppc-dev,
	Oleg Nesterov, Greg Kroah-Hartman, Maciej Rozycki, Linus Torvalds,
	David Miller, Greentime Hu
In-Reply-To: <87y26nmwkb.fsf@disp2133>


Now that force_fatal_sig exists it is unnecessary and a bit confusing
to use force_sigsegv in cases where the simpler force_fatal_sig is
wanted.  So change every instance we can to make the code clearer.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/arc/kernel/process.c       | 2 +-
 arch/m68k/kernel/traps.c        | 2 +-
 arch/powerpc/kernel/signal_32.c | 2 +-
 arch/powerpc/kernel/signal_64.c | 4 ++--
 arch/s390/kernel/traps.c        | 2 +-
 arch/um/kernel/trap.c           | 2 +-
 arch/x86/kernel/vm86_32.c       | 2 +-
 fs/exec.c                       | 2 +-
 8 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index 3793876f42d9..8e90052f6f05 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -294,7 +294,7 @@ int elf_check_arch(const struct elf32_hdr *x)
 	eflags = x->e_flags;
 	if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) {
 		pr_err("ABI mismatch - you need newer toolchain\n");
-		force_sigsegv(SIGSEGV);
+		force_fatal_sig(SIGSEGV);
 		return 0;
 	}
 
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 5b19fcdcd69e..74045d164ddb 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -1150,7 +1150,7 @@ asmlinkage void set_esp0(unsigned long ssp)
  */
 asmlinkage void fpsp040_die(void)
 {
-	force_sigsegv(SIGSEGV);
+	force_fatal_sig(SIGSEGV);
 }
 
 #ifdef CONFIG_M68KFPU_EMU
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 666f3da41232..933ab95805a6 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1063,7 +1063,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	 * We kill the task with a SIGSEGV in this situation.
 	 */
 	if (do_setcontext(new_ctx, regs, 0)) {
-		force_sigsegv(SIGSEGV);
+		force_fatal_sig(SIGSEGV);
 		return -EFAULT;
 	}
 
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index d8de622c9e4a..8ead9b3f47c6 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -704,7 +704,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	 */
 
 	if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) {
-		force_sigsegv(SIGSEGV);
+		force_fatal_sig(SIGSEGV);
 		return -EFAULT;
 	}
 	set_current_blocked(&set);
@@ -713,7 +713,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 		return -EFAULT;
 	if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
 		user_read_access_end();
-		force_sigsegv(SIGSEGV);
+		force_fatal_sig(SIGSEGV);
 		return -EFAULT;
 	}
 	user_read_access_end();
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 51729ea2cf8e..01a7c68dcfb6 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -84,7 +84,7 @@ static void default_trap_handler(struct pt_regs *regs)
 {
 	if (user_mode(regs)) {
 		report_user_fault(regs, SIGSEGV, 0);
-		force_sigsegv(SIGSEGV);
+		force_fatal_sig(SIGSEGV);
 	} else
 		die(regs, "Unknown program exception");
 }
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 3198c4767387..c32efb09db21 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -158,7 +158,7 @@ static void bad_segv(struct faultinfo fi, unsigned long ip)
 
 void fatal_sigsegv(void)
 {
-	force_sigsegv(SIGSEGV);
+	force_fatal_sig(SIGSEGV);
 	do_signal(&current->thread.regs);
 	/*
 	 * This is to tell gcc that we're not returning - do_signal
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 040fd01be8b3..7ff0f622abd4 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -159,7 +159,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
 	user_access_end();
 Efault:
 	pr_alert("could not access userspace vm86 info\n");
-	force_sigsegv(SIGSEGV);
+	force_fatal_sig(SIGSEGV);
 }
 
 static int do_vm86_irq_handling(int subfunction, int irqnumber);
diff --git a/fs/exec.c b/fs/exec.c
index a098c133d8d7..ac7b51b51f38 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1852,7 +1852,7 @@ static int bprm_execve(struct linux_binprm *bprm,
 	 * SIGSEGV.
 	 */
 	if (bprm->point_of_no_return && !fatal_signal_pending(current))
-		force_sigsegv(SIGSEGV);
+		force_fatal_sig(SIGSEGV);
 
 out_unmark:
 	current->fs->in_exec = 0;
-- 
2.20.1


^ permalink raw reply related

* [PATCH v2] powerpc/pseries/mobility: ignore ibm, platform-facilities updates
From: Nathan Lynch @ 2021-10-20 19:47 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: tyreld, cheloha, ldufour

On VMs with NX encryption, compression, and/or RNG offload, these
capabilities are described by nodes in the ibm,platform-facilities device
tree hierarchy:

  $ tree -d /sys/firmware/devicetree/base/ibm,platform-facilities/
  /sys/firmware/devicetree/base/ibm,platform-facilities/
  ├── ibm,compression-v1
  ├── ibm,random-v1
  └── ibm,sym-encryption-v1

  3 directories

The acceleration functions that these nodes describe are not disrupted by
live migration, not even temporarily.

But the post-migration ibm,update-nodes sequence firmware always sends
"delete" messages for this hierarchy, followed by an "add" directive to
reconstruct it via ibm,configure-connector (log with debugging statements
enabled in mobility.c):

  mobility: removing node /ibm,platform-facilities/ibm,random-v1:4294967285
  mobility: removing node /ibm,platform-facilities/ibm,compression-v1:4294967284
  mobility: removing node /ibm,platform-facilities/ibm,sym-encryption-v1:4294967283
  mobility: removing node /ibm,platform-facilities:4294967286
  ...
  mobility: added node /ibm,platform-facilities:4294967286

Note we receive a single "add" message for the entire hierarchy, and what
we receive from the ibm,configure-connector sequence is the top-level
platform-facilities node along with its three children. The debug message
simply reports the parent node and not the whole subtree.

Also, significantly, the nodes added are almost completely equivalent to
the ones removed; even phandles are unchanged. ibm,shared-interrupt-pool in
the leaf nodes is the only property I've observed to differ, and Linux does
not use that. So in practice, the sum of update messages Linux receives for
this hierarchy is equivalent to minor property updates.

We succeed in removing the original hierarchy from the device tree. But the
vio bus code is ignorant of this, and does not unbind or relinquish its
references. The leaf nodes, still reachable through sysfs, of course still
refer to the now-freed ibm,platform-facilities parent node, which makes
use-after-free possible:

  refcount_t: addition on 0; use-after-free.
  WARNING: CPU: 3 PID: 1706 at lib/refcount.c:25 refcount_warn_saturate+0x164/0x1f0
  refcount_warn_saturate+0x160/0x1f0 (unreliable)
  kobject_get+0xf0/0x100
  of_node_get+0x30/0x50
  of_get_parent+0x50/0xb0
  of_fwnode_get_parent+0x54/0x90
  fwnode_count_parents+0x50/0x150
  fwnode_full_name_string+0x30/0x110
  device_node_string+0x49c/0x790
  vsnprintf+0x1c0/0x4c0
  sprintf+0x44/0x60
  devspec_show+0x34/0x50
  dev_attr_show+0x40/0xa0
  sysfs_kf_seq_show+0xbc/0x200
  kernfs_seq_show+0x44/0x60
  seq_read_iter+0x2a4/0x740
  kernfs_fop_read_iter+0x254/0x2e0
  new_sync_read+0x120/0x190
  vfs_read+0x1d0/0x240

Moreover, the "new" replacement subtree is not correctly added to the
device tree, resulting in ibm,platform-facilities parent node without the
appropriate leaf nodes, and broken symlinks in the sysfs device hierarchy:

  $ tree -d /sys/firmware/devicetree/base/ibm,platform-facilities/
  /sys/firmware/devicetree/base/ibm,platform-facilities/

  0 directories

  $ cd /sys/devices/vio ; find . -xtype l -exec file {} +
  ./ibm,sym-encryption-v1/of_node: broken symbolic link to
    ../../../firmware/devicetree/base/ibm,platform-facilities/ibm,sym-encryption-v1
  ./ibm,random-v1/of_node:         broken symbolic link to
    ../../../firmware/devicetree/base/ibm,platform-facilities/ibm,random-v1
  ./ibm,compression-v1/of_node:    broken symbolic link to
    ../../../firmware/devicetree/base/ibm,platform-facilities/ibm,compression-v1

This is because add_dt_node() -> dlpar_attach_node() attaches only the
parent node returned from configure-connector, ignoring any children. This
should be corrected for the general case, but fixing that won't help with
the stale OF node references, which is the more urgent problem.

One way to address that would be to make the drivers respond to node
removal notifications, so that node references can be dropped
appropriately. But this would likely force the drivers to disrupt active
clients for no useful purpose: equivalent nodes are immediately re-added.
And recall that the acceleration capabilities described by the nodes remain
available throughout the whole process.

The solution I believe to be robust for this situation is to convert
remove+add of a node with an unchanged phandle to an update of the node's
properties in the Linux device tree structure. That would involve changing
and adding a fair amount of code, and may take several iterations to land.

Until that can be realized we have a confirmed use-after-free and the
possibility of memory corruption. So add a limited workaround that
discriminates on the node type, ignoring adds and removes. This should be
amenable to backporting in the meantime.

Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
Fixes: 410bccf97881 ("powerpc/pseries: Partition migration in the kernel")
Cc: stable@vger.kernel.org
---

Notes:
    Changes since v1:
    
    * Clarify that the vio bus code maintains references to removed nodes, per
      Tyrel.

 arch/powerpc/platforms/pseries/mobility.c | 34 +++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index e83e0891272d..210a37a065fb 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -63,6 +63,27 @@ static int mobility_rtas_call(int token, char *buf, s32 scope)
 
 static int delete_dt_node(struct device_node *dn)
 {
+	struct device_node *pdn;
+	bool is_platfac;
+
+	pdn = of_get_parent(dn);
+	is_platfac = of_node_is_type(dn, "ibm,platform-facilities") ||
+		     of_node_is_type(pdn, "ibm,platform-facilities");
+	of_node_put(pdn);
+
+	/*
+	 * The drivers that bind to nodes in the platform-facilities
+	 * hierarchy don't support node removal, and the removal directive
+	 * from firmware is always followed by an add of an equivalent
+	 * node. The capability (e.g. RNG, encryption, compression)
+	 * represented by the node is never interrupted by the migration.
+	 * So ignore changes to this part of the tree.
+	 */
+	if (is_platfac) {
+		pr_notice("ignoring remove operation for %pOFfp\n", dn);
+		return 0;
+	}
+
 	pr_debug("removing node %pOFfp\n", dn);
 	dlpar_detach_node(dn);
 	return 0;
@@ -222,6 +243,19 @@ static int add_dt_node(struct device_node *parent_dn, __be32 drc_index)
 	if (!dn)
 		return -ENOENT;
 
+	/*
+	 * Since delete_dt_node() ignores this node type, this is the
+	 * necessary counterpart. We also know that a platform-facilities
+	 * node returned from dlpar_configure_connector() has children
+	 * attached, and dlpar_attach_node() only adds the parent, leaking
+	 * the children. So ignore these on the add side for now.
+	 */
+	if (of_node_is_type(dn, "ibm,platform-facilities")) {
+		pr_notice("ignoring add operation for %pOF\n", dn);
+		dlpar_free_cc_nodes(dn);
+		return 0;
+	}
+
 	rc = dlpar_attach_node(dn, parent_dn);
 	if (rc)
 		dlpar_free_cc_nodes(dn);
-- 
2.31.1


^ permalink raw reply related

* Re: [PATCH 00/12] DT: CPU h/w id parsing clean-ups and cacheinfo id support
From: Rob Herring @ 2021-10-20 18:47 UTC (permalink / raw)
  To: Russell King, James Morse, Catalin Marinas, Will Deacon, Guo Ren,
	Jonas Bonn, Stefan Kristiansson, Stafford Horne, Michael Ellerman,
	Paul Walmsley, Palmer Dabbelt, Albert Ou, Yoshinori Sato,
	Rich Felker, X86 ML, Greg Kroah-Hartman
  Cc: devicetree, Florian Fainelli, Scott Branden, Rafael J. Wysocki,
	SH-Linux, Ray Jui, H. Peter Anvin, linux-kernel@vger.kernel.org,
	linux-csky, Openrisc, linuxppc-dev, Ingo Molnar, Paul Mackerras,
	Borislav Petkov, maintainer:BROADCOM BCM7XXX ARM ARCHITECTURE,
	Thomas Gleixner, Frank Rowand, linux-riscv, linux-arm-kernel
In-Reply-To: <20211006164332.1981454-1-robh@kernel.org>

On Wed, Oct 6, 2021 at 11:43 AM Rob Herring <robh@kernel.org> wrote:
>
> The first 10 patches add a new function, of_get_cpu_hwid(), which parses
> CPU DT node 'reg' property, and then use it to replace all the open
> coded versions of parsing CPU node 'reg' properties.
>
> The last 2 patches add support for populating the cacheinfo 'id' on DT
> platforms. The minimum associated CPU hwid is used for the id. The id is
> optional, but necessary for resctrl which is being adapted for Arm MPAM.
>
> Tested on arm64. Compile tested on arm, x86 and powerpc.
>
> Rob
>
> Rob Herring (12):
>   of: Add of_get_cpu_hwid() to read hardware ID from CPU nodes
>   ARM: Use of_get_cpu_hwid()
>   ARM: broadcom: Use of_get_cpu_hwid()
>   arm64: Use of_get_cpu_hwid()
>   csky: Use of_get_cpu_hwid()
>   openrisc: Use of_get_cpu_hwid()
>   powerpc: Use of_get_cpu_hwid()
>   riscv: Use of_get_cpu_hwid()
>   sh: Use of_get_cpu_hwid()
>   x86: dt: Use of_get_cpu_hwid()
>   cacheinfo: Allow for >32-bit cache 'id'
>   cacheinfo: Set cache 'id' based on DT data

I've fixed up the openrisc error and applied 1-10 to the DT tree.

The cacheinfo part is going to need some more work. I've found I will
need the cache affinity (of possible cpus) as well, so I plan to also
store the affinity instead of looping thru caches and cpus again.

Rob

^ permalink raw reply

* [PATCH 00/20] exit cleanups
From: Eric W. Biederman @ 2021-10-20 17:32 UTC (permalink / raw)
  To: linux-kernel
  Cc: Rich Felker, linux-xtensa, linux-mips, Max Filippov,
	Paul Mackerras, H Peter Anvin, sparclinux, Vincent Chen,
	Thomas Gleixner, linux-arch, linux-s390, Yoshinori Sato, linux-sh,
	Christian Borntraeger, Ingo Molnar, Jonas Bonn, Kees Cook,
	Vasily Gorbik, Heiko Carstens, Stefan Kristiansson, openrisc,
	Borislav Petkov, Al Viro, Andy Lutomirski, Stafford Horne,
	Chris Zankel, Thomas Bogendoerfer, Nick Hu, linuxppc-dev,
	Oleg Nesterov, Greg Kroah-Hartman, Maciej Rozycki, Linus Torvalds,
	David Miller, Greentime Hu


While looking at some issues related to the exit path in the kernel I
found several instances where the code is not using the existing
abstractions properly.

This set of changes introduces force_fatal_sig a way of sending
a signal and not allowing it to be caught, and corrects the
misuse of the existing abstractions that I found.

A lot of the misuse of the existing abstractions are silly things such
as doing something after calling a no return function, rolling BUG by
hand, doing more work than necessary to terminate a kernel thread, or
calling do_exit(SIGKILL) instead of calling force_sig(SIGKILL).

It is my plan after sending all of these changes out for review to place
them in a topic branch for sending Linus.  Especially for the changes
that depend upon the new helper force_fatal_sig this is important.

Eric W. Biederman (20):
      exit/doublefault: Remove apparently bogus comment about rewind_stack_do_exit
      exit: Remove calls of do_exit after noreturn versions of die
      reboot: Remove the unreachable panic after do_exit in reboot(2)
      signal/sparc32: Remove unreachable do_exit in do_sparc_fault
      signal/mips: Update (_save|_restore)_fp_context to fail with -EFAULT
      signal/sh: Use force_sig(SIGKILL) instead of do_group_exit(SIGKILL)
      signal/powerpc: On swapcontext failure force SIGSEGV
      signal/sparc: In setup_tsb_params convert open coded BUG into BUG
      signal/vm86_32: Replace open coded BUG_ON with an actual BUG_ON
      signal/vm86_32: Properly send SIGSEGV when the vm86 state cannot be saved.
      signal/s390: Use force_sigsegv in default_trap_handler
      exit/kthread: Have kernel threads return instead of calling do_exit
      signal: Implement force_fatal_sig
      exit/syscall_user_dispatch: Send ordinary signals on failure
      signal/sparc32: Exit with a fatal signal when try_to_clear_window_buffer fails
      signal/sparc32: In setup_rt_frame and setup_fram use force_fatal_sig
      signal/x86: In emulate_vsyscall force a signal instead of calling do_exit
      exit/rtl8723bs: Replace the macro thread_exit with a simple return 0
      exit/rtl8712: Replace the macro thread_exit with a simple return 0
      exit/r8188eu: Replace the macro thread_exit with a simple return 0

 arch/mips/kernel/r2300_fpu.S                       |  4 ++--
 arch/mips/kernel/syscall.c                         |  9 --------
 arch/nds32/kernel/traps.c                          |  2 +-
 arch/nds32/mm/fault.c                              |  6 +----
 arch/openrisc/kernel/traps.c                       |  2 +-
 arch/openrisc/mm/fault.c                           |  4 +---
 arch/powerpc/kernel/signal_32.c                    |  6 +++--
 arch/powerpc/kernel/signal_64.c                    |  9 +++++---
 arch/s390/include/asm/kdebug.h                     |  2 +-
 arch/s390/kernel/dumpstack.c                       |  2 +-
 arch/s390/kernel/traps.c                           |  2 +-
 arch/s390/mm/fault.c                               |  2 --
 arch/sh/kernel/cpu/fpu.c                           | 10 +++++----
 arch/sh/kernel/traps.c                             |  2 +-
 arch/sh/mm/fault.c                                 |  2 --
 arch/sparc/kernel/signal_32.c                      |  4 ++--
 arch/sparc/kernel/windows.c                        |  6 +++--
 arch/sparc/mm/fault_32.c                           |  1 -
 arch/sparc/mm/tsb.c                                |  2 +-
 arch/x86/entry/vsyscall/vsyscall_64.c              |  3 ++-
 arch/x86/kernel/doublefault_32.c                   |  3 ---
 arch/x86/kernel/signal.c                           |  6 ++++-
 arch/x86/kernel/vm86_32.c                          |  8 +++----
 arch/xtensa/kernel/traps.c                         |  2 +-
 arch/xtensa/mm/fault.c                             |  3 +--
 drivers/firmware/stratix10-svc.c                   |  4 ++--
 drivers/soc/ti/wkup_m3_ipc.c                       |  2 +-
 drivers/staging/r8188eu/core/rtw_cmd.c             |  2 +-
 drivers/staging/r8188eu/core/rtw_mp.c              |  2 +-
 drivers/staging/r8188eu/include/osdep_service.h    |  2 --
 drivers/staging/rtl8712/osdep_service.h            |  1 -
 drivers/staging/rtl8712/rtl8712_cmd.c              |  2 +-
 drivers/staging/rtl8723bs/core/rtw_cmd.c           |  2 +-
 drivers/staging/rtl8723bs/core/rtw_xmit.c          |  2 +-
 drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c     |  2 +-
 .../rtl8723bs/include/osdep_service_linux.h        |  2 --
 fs/ocfs2/journal.c                                 |  5 +----
 include/linux/sched/signal.h                       |  1 +
 kernel/entry/syscall_user_dispatch.c               | 12 ++++++----
 kernel/kthread.c                                   |  2 +-
 kernel/reboot.c                                    |  1 -
 kernel/signal.c                                    | 26 ++++++++++++++--------
 net/batman-adv/tp_meter.c                          |  2 +-
 43 files changed, 83 insertions(+), 91 deletions(-)

Eric

^ permalink raw reply

* [PATCH 07/20] signal/powerpc: On swapcontext failure force SIGSEGV
From: Eric W. Biederman @ 2021-10-20 17:43 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-arch, Kees Cook, linuxppc-dev, Oleg Nesterov,
	Paul Mackerras, Eric W. Biederman, Linus Torvalds, Al Viro
In-Reply-To: <87y26nmwkb.fsf@disp2133>

If the register state may be partial and corrupted instead of calling
do_exit, call force_sigsegv(SIGSEGV).  Which properly kills the
process with SIGSEGV and does not let any more userspace code execute,
instead of just killing one thread of the process and potentially
confusing everything.

Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
History-tree: git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git
Fixes: 756f1ae8a44e ("PPC32: Rework signal code and add a swapcontext system call.")
Fixes: 04879b04bf50 ("[PATCH] ppc64: VMX (Altivec) support & signal32 rework, from Ben Herrenschmidt")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/powerpc/kernel/signal_32.c | 6 ++++--
 arch/powerpc/kernel/signal_64.c | 9 ++++++---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 0608581967f0..666f3da41232 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1062,8 +1062,10 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	 * or if another thread unmaps the region containing the context.
 	 * We kill the task with a SIGSEGV in this situation.
 	 */
-	if (do_setcontext(new_ctx, regs, 0))
-		do_exit(SIGSEGV);
+	if (do_setcontext(new_ctx, regs, 0)) {
+		force_sigsegv(SIGSEGV);
+		return -EFAULT;
+	}
 
 	set_thread_flag(TIF_RESTOREALL);
 	return 0;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 1831bba0582e..d8de622c9e4a 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -703,15 +703,18 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	 * We kill the task with a SIGSEGV in this situation.
 	 */
 
-	if (__get_user_sigset(&set, &new_ctx->uc_sigmask))
-		do_exit(SIGSEGV);
+	if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) {
+		force_sigsegv(SIGSEGV);
+		return -EFAULT;
+	}
 	set_current_blocked(&set);
 
 	if (!user_read_access_begin(new_ctx, ctx_size))
 		return -EFAULT;
 	if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
 		user_read_access_end();
-		do_exit(SIGSEGV);
+		force_sigsegv(SIGSEGV);
+		return -EFAULT;
 	}
 	user_read_access_end();
 
-- 
2.20.1


^ permalink raw reply related

* Re: [PATCH] powerpc/pseries/mobility: ignore ibm, platform-facilities updates
From: Tyrel Datwyler @ 2021-10-20 17:34 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: cheloha, ldufour, linuxppc-dev
In-Reply-To: <87r1cfy9m8.fsf@linux.ibm.com>

On 10/20/21 8:54 AM, Nathan Lynch wrote:
> Tyrel Datwyler <tyreld@linux.ibm.com> writes:
>> On 10/19/21 2:36 PM, Nathan Lynch wrote:
>>> Tyrel Datwyler <tyreld@linux.ibm.com> writes:
>>>> On 10/18/21 9:34 AM, Nathan Lynch wrote:
>>>>> On VMs with NX encryption, compression, and/or RNG offload, these
>>>>> capabilities are described by nodes in the ibm,platform-facilities device
>>>>> tree hierarchy:
>>>>>
>>>>>   $ tree -d /sys/firmware/devicetree/base/ibm,platform-facilities/
>>>>>   /sys/firmware/devicetree/base/ibm,platform-facilities/
>>>>>   ├── ibm,compression-v1
>>>>>   ├── ibm,random-v1
>>>>>   └── ibm,sym-encryption-v1
>>>>>
>>>>>   3 directories
>>>>>
>>>>> The acceleration functions that these nodes describe are not disrupted by
>>>>> live migration, not even temporarily.
>>>>>
>>>>> But the post-migration ibm,update-nodes sequence firmware always sends
>>>>> "delete" messages for this hierarchy, followed by an "add" directive to
>>>>> reconstruct it via ibm,configure-connector (log with debugging statements
>>>>> enabled in mobility.c):
>>>>>
>>>>>   mobility: removing node /ibm,platform-facilities/ibm,random-v1:4294967285
>>>>>   mobility: removing node /ibm,platform-facilities/ibm,compression-v1:4294967284
>>>>>   mobility: removing node /ibm,platform-facilities/ibm,sym-encryption-v1:4294967283
>>>>>   mobility: removing node /ibm,platform-facilities:4294967286
>>>>>   ...
>>>>>   mobility: added node /ibm,platform-facilities:4294967286
>>>>
>>>> It always bothered me that the update from firmware here was an delete/add at
>>>> the entire '/ibm,platform-facilities' node level instead of update properties
>>>> calls. When I asked about it years ago the claim was it was just easier to pass
>>>> an entire sub-tree as a single node add.
>>>
>>> Yes, I believe this is for ease of implementation on their part.
>>
>> I'd still argue that a full node removal or addition is a platform
>> reconfiguration on par with a hotplug operation.
> 
> Well... I think we might be better served by a slightly more flexible
> view of things :-) These are just a series of messages from firmware
> that should be considered as a whole, and they don't dictate exactly
> what the OS must do to its own data structures. Nothing mandates that
> the OS immediately and literally apply the changes as they are
> individually reported by the update-nodes sequence.

Agree to disagree. Not saying we can do much about it here, but I think being
flexible leads to scope creep down the road that may eventually complicate
things worse. A node removal doesn't guarantee an immediate node addition. So we
are just papering over the issue with a plan to paper over it some more in a
more complicated fashion.

> 
> Anyway, whether the firmware behavior is reasonable is sort of beside
> the point since we're stuck with it.

I'm aware. Moaning for the sake of moaning about it.

> 
> 
>>>>> This is because add_dt_node() -> dlpar_attach_node() attaches only the
>>>>> parent node returned from configure-connector, ignoring any children. This
>>>>> should be corrected for the general case, but fixing that won't help with
>>>>> the stale OF node references, which is the more urgent problem.
>>>>
>>>> I don't follow. If the code path you mention is truly broken in the way you say
>>>> then DLPAR operations involving nodes with child nodes should also be
>>>> broken.
>>>
>>> Hmm I don't know of any kernel-based DLPAR workflow that attaches
>>> sub-trees i.e. parent + children. Processor addition attaches CPU nodes
>>> and possibly cache nodes, but they have sibling relationships. If you're
>>> thinking of I/O adapters, the DT updates are (still!) driven from user
>>> space. As undesirable as that is, that use case actually works correctly
>>> AFAIK.
>>>
>>> What happens for the platfac LPM scenario is that
>>> dlpar_configure_connector() returns the node pointer for the root
>>> ibm,platform-facilities node, with children attached. That node pointer
>>> is passed from add_dt_node() -> dlpar_attach_node() -> of_attach_node()
>>> -> __of_attach_node(), where its child and sibling fields are
>>> overwritten in the process of attaching it to the tree.
>>
>> Well it worked back in 2013 when I got all the in kernel device tree update
>> stuff working. Looks like I missed this one which now explains one area where
>> platform-facilities update originally went to shit.
>>
>> commit 6162dbe49a451f96431a23b4821f05e3bd925bc1
>> Author: Grant Likely <grant.likely@linaro.org>
>> Date:   Wed Jul 16 08:48:46 2014 -0600
>>
>>     of: Make sure attached nodes don't carry along extra children
>>
>>     The child pointer does not get cleared when attaching new nodes which
>>     could cause the tree to be inconsistent. Clear the child pointer in
>>     __of_attach_node() to be absolutely sure that the structure remains in a
>>     consistent layout.
>>
>>     Signed-off-by: Grant Likely <grant.likely@linaro.org>
>>
>> diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
>> index c875787fa394..b96d83100987 100644
>> --- a/drivers/of/dynamic.c
>> +++ b/drivers/of/dynamic.c
>> @@ -98,6 +98,7 @@ int of_property_notify(int action, struct device_node *np,
>>
>>  void __of_attach_node(struct device_node *np)
>>  {
>> +       np->child = NULL;
>>         np->sibling = np->parent->child;
>>         np->allnext = np->parent->allnext;
>>         np->parent->allnext = np;
>>
>> Not sure what the reasoning was here since this prevents attaching a node that
>> is a sub tree. Either need to revert that, rewrite dlpar_attach_node to iterate
>> over the sub-tree, or probably best rewrite dlpar_configure_connector to use a
>> of_changeset instead.
> 
> Good find. I'd guess that adding a subtree used to sort of work, except
> that children of np were not added to the allnext list, which would
> effectively hide them from some lookups.

Pretty sure allnodes/allnext list is a relic of the past. It guaranteed depth
first traversal after boot, but dynamic nodes broke that guarantee. It was
removed because you can make the same traversal via the parent<->child lists.

> 
> Regardless, yes, the pseries code needs to change to attach and detach
> nodes individually. I don't think the OF core supports more complex
> changes.
> 

Indeed. I clearly capitalized on the behavior at the time while missing the
intended usage.

-Tyrel




^ permalink raw reply

* Re: [PATCH] PCI/hotplug: Remove unneeded of_node_put() in pnv_php
From: Tyrel Datwyler @ 2021-10-20 16:57 UTC (permalink / raw)
  To: Nathan Lynch, Wan Jiabing
  Cc: ajd, linux-pci, linux-kernel, Paul Mackerras, Bjorn Helgaas,
	kael_w, linuxppc-dev
In-Reply-To: <87tuhcx6v6.fsf@linux.ibm.com>

On 10/20/21 4:39 AM, Nathan Lynch wrote:
> Wan Jiabing <wanjiabing@vivo.com> writes:
>> Fix following coccicheck warning:
>> ./drivers/pci/hotplug/pnv_php.c:161:2-13: ERROR: probable double put.
>>
>> Device node iterators put the previous value of the index variable, so
>> an explicit put causes a double put.
> 
> I suppose Coccinelle doesn't take into account that this code is
> detaching and freeing the nodes.
> 
> 
>> diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
>> index f4c2e6e01be0..f3da4f95d73f 100644
>> --- a/drivers/pci/hotplug/pnv_php.c
>> +++ b/drivers/pci/hotplug/pnv_php.c
>> @@ -158,7 +158,6 @@ static void pnv_php_detach_device_nodes(struct device_node *parent)
>>  	for_each_child_of_node(parent, dn) {
>>  		pnv_php_detach_device_nodes(dn);
>>  
>> -		of_node_put(dn);
>>  		of_detach_node(dn);
>>  	}
> 
> The code might be improved by comments explaining how the bare
> of_node_put() corresponds to a "get" somewhere else in the driver, and
> how it doesn't render the ongoing traversal unsafe. It looks suspicious
> on first review, but I believe it's intentional and probably correct as
> written.
> 

This is a common usage pattern which if we put a comment about the pattern here
we need to do it every where. I suppose a better solution is to wrap this put in
a more descriptive function name like of_node_long_put() or something of the
sort the makes it obvious we are dropping a long held global scope reference.

-Tyrel

^ permalink raw reply

* Re: [PATCH] PCI/hotplug: Remove unneeded of_node_put() in pnv_php
From: Tyrel Datwyler @ 2021-10-20 16:53 UTC (permalink / raw)
  To: Wan Jiabing, Michael Ellerman, Benjamin Herrenschmidt,
	Paul Mackerras, Bjorn Helgaas, linuxppc-dev, linux-pci,
	linux-kernel
  Cc: kael_w
In-Reply-To: <20211020094604.2106-1-wanjiabing@vivo.com>

On 10/20/21 2:46 AM, Wan Jiabing wrote:
> Fix following coccicheck warning:
> ./drivers/pci/hotplug/pnv_php.c:161:2-13: ERROR: probable double put.
> 
> Device node iterators put the previous value of the index variable, so
> an explicit put causes a double put.
> 
> Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>

NACK

This is a false positive from coccicheck. This is a case were a node is being
dynamically removed and the long reference needs to be dropped. Otherwise, the
reference count doesn't go to zero and trigger cleanup. This would result in us
ending up in a leaked device node.

-Tyrel

> ---
>  drivers/pci/hotplug/pnv_php.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
> index f4c2e6e01be0..f3da4f95d73f 100644
> --- a/drivers/pci/hotplug/pnv_php.c
> +++ b/drivers/pci/hotplug/pnv_php.c
> @@ -158,7 +158,6 @@ static void pnv_php_detach_device_nodes(struct device_node *parent)
>  	for_each_child_of_node(parent, dn) {
>  		pnv_php_detach_device_nodes(dn);
> 
> -		of_node_put(dn);
>  		of_detach_node(dn);
>  	}
>  }
> 


^ permalink raw reply

* Re: [PATCH] powerpc/pseries/mobility: ignore ibm,platform-facilities updates
From: Nathan Lynch @ 2021-10-20 15:54 UTC (permalink / raw)
  To: Tyrel Datwyler; +Cc: cheloha, ldufour, linuxppc-dev
In-Reply-To: <669cb321-7e7f-e45b-a7a1-7002d8371080@linux.ibm.com>

Tyrel Datwyler <tyreld@linux.ibm.com> writes:
> On 10/19/21 2:36 PM, Nathan Lynch wrote:
>> Tyrel Datwyler <tyreld@linux.ibm.com> writes:
>>> On 10/18/21 9:34 AM, Nathan Lynch wrote:
>>>> On VMs with NX encryption, compression, and/or RNG offload, these
>>>> capabilities are described by nodes in the ibm,platform-facilities device
>>>> tree hierarchy:
>>>>
>>>>   $ tree -d /sys/firmware/devicetree/base/ibm,platform-facilities/
>>>>   /sys/firmware/devicetree/base/ibm,platform-facilities/
>>>>   ├── ibm,compression-v1
>>>>   ├── ibm,random-v1
>>>>   └── ibm,sym-encryption-v1
>>>>
>>>>   3 directories
>>>>
>>>> The acceleration functions that these nodes describe are not disrupted by
>>>> live migration, not even temporarily.
>>>>
>>>> But the post-migration ibm,update-nodes sequence firmware always sends
>>>> "delete" messages for this hierarchy, followed by an "add" directive to
>>>> reconstruct it via ibm,configure-connector (log with debugging statements
>>>> enabled in mobility.c):
>>>>
>>>>   mobility: removing node /ibm,platform-facilities/ibm,random-v1:4294967285
>>>>   mobility: removing node /ibm,platform-facilities/ibm,compression-v1:4294967284
>>>>   mobility: removing node /ibm,platform-facilities/ibm,sym-encryption-v1:4294967283
>>>>   mobility: removing node /ibm,platform-facilities:4294967286
>>>>   ...
>>>>   mobility: added node /ibm,platform-facilities:4294967286
>>>
>>> It always bothered me that the update from firmware here was an delete/add at
>>> the entire '/ibm,platform-facilities' node level instead of update properties
>>> calls. When I asked about it years ago the claim was it was just easier to pass
>>> an entire sub-tree as a single node add.
>> 
>> Yes, I believe this is for ease of implementation on their part.
>
> I'd still argue that a full node removal or addition is a platform
> reconfiguration on par with a hotplug operation.

Well... I think we might be better served by a slightly more flexible
view of things :-) These are just a series of messages from firmware
that should be considered as a whole, and they don't dictate exactly
what the OS must do to its own data structures. Nothing mandates that
the OS immediately and literally apply the changes as they are
individually reported by the update-nodes sequence.

Anyway, whether the firmware behavior is reasonable is sort of beside
the point since we're stuck with it.


>>>> This is because add_dt_node() -> dlpar_attach_node() attaches only the
>>>> parent node returned from configure-connector, ignoring any children. This
>>>> should be corrected for the general case, but fixing that won't help with
>>>> the stale OF node references, which is the more urgent problem.
>>>
>>> I don't follow. If the code path you mention is truly broken in the way you say
>>> then DLPAR operations involving nodes with child nodes should also be
>>> broken.
>> 
>> Hmm I don't know of any kernel-based DLPAR workflow that attaches
>> sub-trees i.e. parent + children. Processor addition attaches CPU nodes
>> and possibly cache nodes, but they have sibling relationships. If you're
>> thinking of I/O adapters, the DT updates are (still!) driven from user
>> space. As undesirable as that is, that use case actually works correctly
>> AFAIK.
>> 
>> What happens for the platfac LPM scenario is that
>> dlpar_configure_connector() returns the node pointer for the root
>> ibm,platform-facilities node, with children attached. That node pointer
>> is passed from add_dt_node() -> dlpar_attach_node() -> of_attach_node()
>> -> __of_attach_node(), where its child and sibling fields are
>> overwritten in the process of attaching it to the tree.
>
> Well it worked back in 2013 when I got all the in kernel device tree update
> stuff working. Looks like I missed this one which now explains one area where
> platform-facilities update originally went to shit.
>
> commit 6162dbe49a451f96431a23b4821f05e3bd925bc1
> Author: Grant Likely <grant.likely@linaro.org>
> Date:   Wed Jul 16 08:48:46 2014 -0600
>
>     of: Make sure attached nodes don't carry along extra children
>
>     The child pointer does not get cleared when attaching new nodes which
>     could cause the tree to be inconsistent. Clear the child pointer in
>     __of_attach_node() to be absolutely sure that the structure remains in a
>     consistent layout.
>
>     Signed-off-by: Grant Likely <grant.likely@linaro.org>
>
> diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
> index c875787fa394..b96d83100987 100644
> --- a/drivers/of/dynamic.c
> +++ b/drivers/of/dynamic.c
> @@ -98,6 +98,7 @@ int of_property_notify(int action, struct device_node *np,
>
>  void __of_attach_node(struct device_node *np)
>  {
> +       np->child = NULL;
>         np->sibling = np->parent->child;
>         np->allnext = np->parent->allnext;
>         np->parent->allnext = np;
>
> Not sure what the reasoning was here since this prevents attaching a node that
> is a sub tree. Either need to revert that, rewrite dlpar_attach_node to iterate
> over the sub-tree, or probably best rewrite dlpar_configure_connector to use a
> of_changeset instead.

Good find. I'd guess that adding a subtree used to sort of work, except
that children of np were not added to the allnext list, which would
effectively hide them from some lookups.

Regardless, yes, the pseries code needs to change to attach and detach
nodes individually. I don't think the OF core supports more complex
changes.

^ permalink raw reply

* Re: [PATCH v2][net-next] soc: fsl: dpio: Unsigned compared against 0 in qbman_swp_set_irq_coalescing()
From: patchwork-bot+netdevbpf @ 2021-10-20 13:30 UTC (permalink / raw)
  To: Tim Gardner
  Cc: netdev, Roy.Pledge, linux-kernel, leoyang.li, ioana.ciornei,
	linuxppc-dev, linux-arm-kernel
In-Reply-To: <20211019121925.8910-1-tim.gardner@canonical.com>

Hello:

This patch was applied to netdev/net-next.git (master)
by David S. Miller <davem@davemloft.net>:

On Tue, 19 Oct 2021 06:19:25 -0600 you wrote:
> Coverity complains of unsigned compare against 0. There are 2 cases in
> this function:
> 
> 1821        itp = (irq_holdoff * 1000) / p->desc->qman_256_cycles_per_ns;
> 
> CID 121131 (#1 of 1): Unsigned compared against 0 (NO_EFFECT)
> unsigned_compare: This less-than-zero comparison of an unsigned value is never true. itp < 0U.
> 1822        if (itp < 0 || itp > 4096) {
> 1823                max_holdoff = (p->desc->qman_256_cycles_per_ns * 4096) / 1000;
> 1824                pr_err("irq_holdoff must be between 0..%dus\n", max_holdoff);
> 1825                return -EINVAL;
> 1826        }
> 1827
>     	unsigned_compare: This less-than-zero comparison of an unsigned value is never true. irq_threshold < 0U.
> 1828        if (irq_threshold >= p->dqrr.dqrr_size || irq_threshold < 0) {
> 1829                pr_err("irq_threshold must be between 0..%d\n",
> 1830                       p->dqrr.dqrr_size - 1);
> 1831                return -EINVAL;
> 1832        }
> 
> [...]

Here is the summary with links:
  - [v2,net-next] soc: fsl: dpio: Unsigned compared against 0 in qbman_swp_set_irq_coalescing()
    https://git.kernel.org/netdev/net-next/c/818a76a55d6e

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox