LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v2 18/19] powerpc/64s: runlatch interrupt handling in C
From: Nicholas Piggin @ 2020-11-11  9:44 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201111094410.3038123-1-npiggin@gmail.com>

There is no need for this to be in asm, use the new intrrupt entry wrapper.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/interrupt.h |  7 +++++++
 arch/powerpc/kernel/exceptions-64s.S | 18 ------------------
 2 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 65aa31353794..2648da187edf 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -6,6 +6,7 @@
 #include <linux/hardirq.h>
 #include <asm/cputime.h>
 #include <asm/ftrace.h>
+#include <asm/runlatch.h>
 
 struct interrupt_state {
 #ifdef CONFIG_PPC_BOOK3E_64
@@ -84,6 +85,12 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt
 
 static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
 {
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (cpu_has_feature(CPU_FTR_CTRL) &&
+	    !test_thread_local_flags(_TLF_RUNLATCH))
+		__ppc64_runlatch_on();
+#endif
+
 	interrupt_enter_prepare(regs, state);
 	irq_enter();
 }
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 0949dd47be59..227bad3a586d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -692,14 +692,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	ld	r1,GPR1(r1)
 .endm
 
-#define RUNLATCH_ON				\
-BEGIN_FTR_SECTION				\
-	ld	r3, PACA_THREAD_INFO(r13);	\
-	ld	r4,TI_LOCAL_FLAGS(r3);		\
-	andi.	r0,r4,_TLF_RUNLATCH;		\
-	beql	ppc64_runlatch_on_trampoline;	\
-END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
-
 /*
  * When the idle code in power4_idle puts the CPU into NAP mode,
  * it has to do so in a loop, and relies on the external interrupt
@@ -1581,7 +1573,6 @@ EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
 EXC_COMMON_BEGIN(hardware_interrupt_common)
 	GEN_COMMON hardware_interrupt
 	FINISH_NAP
-	RUNLATCH_ON
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	do_IRQ
 	b	interrupt_return
@@ -1767,7 +1758,6 @@ EXC_VIRT_END(decrementer, 0x4900, 0x80)
 EXC_COMMON_BEGIN(decrementer_common)
 	GEN_COMMON decrementer
 	FINISH_NAP
-	RUNLATCH_ON
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	timer_interrupt
 	b	interrupt_return
@@ -1853,7 +1843,6 @@ EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
 EXC_COMMON_BEGIN(doorbell_super_common)
 	GEN_COMMON doorbell_super
 	FINISH_NAP
-	RUNLATCH_ON
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_PPC_DOORBELL
 	bl	doorbell_exception
@@ -2208,7 +2197,6 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
 EXC_COMMON_BEGIN(hmi_exception_common)
 	GEN_COMMON hmi_exception
 	FINISH_NAP
-	RUNLATCH_ON
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	handle_hmi_exception
 	b	interrupt_return
@@ -2238,7 +2226,6 @@ EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
 EXC_COMMON_BEGIN(h_doorbell_common)
 	GEN_COMMON h_doorbell
 	FINISH_NAP
-	RUNLATCH_ON
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_PPC_DOORBELL
 	bl	doorbell_exception
@@ -2272,7 +2259,6 @@ EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
 EXC_COMMON_BEGIN(h_virt_irq_common)
 	GEN_COMMON h_virt_irq
 	FINISH_NAP
-	RUNLATCH_ON
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	do_IRQ
 	b	interrupt_return
@@ -2319,7 +2305,6 @@ EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
 EXC_COMMON_BEGIN(performance_monitor_common)
 	GEN_COMMON performance_monitor
 	FINISH_NAP
-	RUNLATCH_ON
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	performance_monitor_exception
 	b	interrupt_return
@@ -3030,9 +3015,6 @@ kvmppc_skip_Hinterrupt:
 	 * come here.
 	 */
 
-EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline)
-	b	__ppc64_runlatch_on
-
 USE_FIXED_SECTION(virt_trampolines)
 	/*
 	 * All code below __end_interrupts is treated as soft-masked. If
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 19/19] powerpc/64s: power4 nap fixup in C
From: Nicholas Piggin @ 2020-11-11  9:44 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201111094410.3038123-1-npiggin@gmail.com>

There is no need for this to be in asm, use the new intrrupt entry wrapper.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/interrupt.h   | 15 +++++++++
 arch/powerpc/include/asm/processor.h   |  1 +
 arch/powerpc/include/asm/thread_info.h |  6 ++++
 arch/powerpc/kernel/exceptions-64s.S   | 45 --------------------------
 arch/powerpc/kernel/idle_book3s.S      |  4 +++
 5 files changed, 26 insertions(+), 45 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 2648da187edf..54fb55a1bc6c 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -8,6 +8,16 @@
 #include <asm/ftrace.h>
 #include <asm/runlatch.h>
 
+static inline void nap_adjust_return(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_970_NAP
+	if (unlikely(test_thread_local_flags(_TLF_NAPPING))) {
+		clear_thread_local_flags(_TLF_NAPPING);
+		regs->nip = (unsigned long)power4_idle_nap_return;
+	}
+#endif
+}
+
 struct interrupt_state {
 #ifdef CONFIG_PPC_BOOK3E_64
 	enum ctx_state ctx_state;
@@ -99,6 +109,9 @@ static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct int
 {
 	irq_exit();
 	interrupt_exit_prepare(regs, state);
+
+	/* Adjust at exit so the main handler sees the true NIA */
+	nap_adjust_return(regs);
 }
 
 struct interrupt_nmi_state {
@@ -150,6 +163,8 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter
 			radix_enabled() || (mfmsr() & MSR_DR))
 		nmi_exit();
 
+	nap_adjust_return(regs);
+
 #ifdef CONFIG_PPC64
 	this_cpu_set_ftrace_enabled(state->ftrace_enabled);
 
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index c61c859b51a8..d845850f75e2 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -418,6 +418,7 @@ extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
 extern unsigned long isa206_idle_insn_mayloss(unsigned long type);
 #ifdef CONFIG_PPC_970_NAP
 extern void power4_idle_nap(void);
+void power4_idle_nap_return(void);
 #endif
 
 extern unsigned long cpuidle_disable;
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index c9443c16e5fb..e7ee220b88ea 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -151,6 +151,12 @@ void arch_setup_new_exec(void);
 
 #ifndef __ASSEMBLY__
 
+static inline void clear_thread_local_flags(unsigned int flags)
+{
+	struct thread_info *ti = current_thread_info();
+	ti->local_flags &= ~flags;
+}
+
 static inline bool test_thread_local_flags(unsigned int flags)
 {
 	struct thread_info *ti = current_thread_info();
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 227bad3a586d..1db6b3438c88 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -692,25 +692,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	ld	r1,GPR1(r1)
 .endm
 
-/*
- * When the idle code in power4_idle puts the CPU into NAP mode,
- * it has to do so in a loop, and relies on the external interrupt
- * and decrementer interrupt entry code to get it out of the loop.
- * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags
- * to signal that it is in the loop and needs help to get out.
- */
-#ifdef CONFIG_PPC_970_NAP
-#define FINISH_NAP				\
-BEGIN_FTR_SECTION				\
-	ld	r11, PACA_THREAD_INFO(r13);	\
-	ld	r9,TI_LOCAL_FLAGS(r11);		\
-	andi.	r10,r9,_TLF_NAPPING;		\
-	bnel	power4_fixup_nap;		\
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-#else
-#define FINISH_NAP
-#endif
-
 /*
  * There are a few constraints to be concerned with.
  * - Real mode exceptions code/data must be located at their physical location.
@@ -1250,7 +1231,6 @@ EXC_COMMON_BEGIN(machine_check_common)
 	 */
 	GEN_COMMON machine_check
 
-	FINISH_NAP
 	/* Enable MSR_RI when finished with PACA_EXMC */
 	li	r10,MSR_RI
 	mtmsrd 	r10,1
@@ -1572,7 +1552,6 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
 EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
 EXC_COMMON_BEGIN(hardware_interrupt_common)
 	GEN_COMMON hardware_interrupt
-	FINISH_NAP
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	do_IRQ
 	b	interrupt_return
@@ -1757,7 +1736,6 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
 EXC_VIRT_END(decrementer, 0x4900, 0x80)
 EXC_COMMON_BEGIN(decrementer_common)
 	GEN_COMMON decrementer
-	FINISH_NAP
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	timer_interrupt
 	b	interrupt_return
@@ -1842,7 +1820,6 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
 EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
 EXC_COMMON_BEGIN(doorbell_super_common)
 	GEN_COMMON doorbell_super
-	FINISH_NAP
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_PPC_DOORBELL
 	bl	doorbell_exception
@@ -2196,7 +2173,6 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
 
 EXC_COMMON_BEGIN(hmi_exception_common)
 	GEN_COMMON hmi_exception
-	FINISH_NAP
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	handle_hmi_exception
 	b	interrupt_return
@@ -2225,7 +2201,6 @@ EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
 EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
 EXC_COMMON_BEGIN(h_doorbell_common)
 	GEN_COMMON h_doorbell
-	FINISH_NAP
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_PPC_DOORBELL
 	bl	doorbell_exception
@@ -2258,7 +2233,6 @@ EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
 EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
 EXC_COMMON_BEGIN(h_virt_irq_common)
 	GEN_COMMON h_virt_irq
-	FINISH_NAP
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	do_IRQ
 	b	interrupt_return
@@ -2304,7 +2278,6 @@ EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
 EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
 EXC_COMMON_BEGIN(performance_monitor_common)
 	GEN_COMMON performance_monitor
-	FINISH_NAP
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	performance_monitor_exception
 	b	interrupt_return
@@ -3032,24 +3005,6 @@ USE_FIXED_SECTION(virt_trampolines)
 __end_interrupts:
 DEFINE_FIXED_SYMBOL(__end_interrupts)
 
-#ifdef CONFIG_PPC_970_NAP
-	/*
-	 * Called by exception entry code if _TLF_NAPPING was set, this clears
-	 * the NAPPING flag, and redirects the exception exit to
-	 * power4_fixup_nap_return.
-	 */
-	.globl power4_fixup_nap
-EXC_COMMON_BEGIN(power4_fixup_nap)
-	andc	r9,r9,r10
-	std	r9,TI_LOCAL_FLAGS(r11)
-	LOAD_REG_ADDR(r10, power4_idle_nap_return)
-	std	r10,_NIP(r1)
-	blr
-
-power4_idle_nap_return:
-	blr
-#endif
-
 CLOSE_FIXED_SECTION(real_vectors);
 CLOSE_FIXED_SECTION(real_trampolines);
 CLOSE_FIXED_SECTION(virt_vectors);
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 22f249b6f58d..27d2e6a72ec9 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -201,4 +201,8 @@ _GLOBAL(power4_idle_nap)
 	mtmsrd	r7
 	isync
 	b	1b
+
+	.globl power4_idle_nap_return
+power4_idle_nap_return:
+	blr
 #endif
-- 
2.23.0


^ permalink raw reply related

* [PATCH 0/3] powerpc: convert to use ARCH_ATOMIC
From: Nicholas Piggin @ 2020-11-11 11:07 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Christophe Leroy, linux-arch, Arnd Bergmann, Peter Zijlstra,
	Boqun Feng, linux-kernel, Nicholas Piggin, Alexey Kardashevskiy,
	Will Deacon

This conversion seems to require generic atomic64 changes, looks
like nothing else uses ARCH_ATOMIC and GENERIC_ATOMIC64 yet.

Thanks,
Nick

Nicholas Piggin (3):
  asm-generic/atomic64: Add support for ARCH_ATOMIC
  powerpc/64s/iommu: don't use atomic_ function on atomic64_t type
  powerpc: rewrite atomics to use ARCH_ATOMIC

 arch/powerpc/include/asm/atomic.h    | 681 ++++++++++-----------------
 arch/powerpc/include/asm/cmpxchg.h   |  62 +--
 arch/powerpc/mm/book3s64/iommu_api.c |   2 +-
 include/asm-generic/atomic64.h       |  70 ++-
 lib/atomic64.c                       |  36 +-
 5 files changed, 324 insertions(+), 527 deletions(-)

-- 
2.23.0


^ permalink raw reply

* [PATCH 1/3] asm-generic/atomic64: Add support for ARCH_ATOMIC
From: Nicholas Piggin @ 2020-11-11 11:07 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Christophe Leroy, linux-arch, Arnd Bergmann, Peter Zijlstra,
	Boqun Feng, linux-kernel, Nicholas Piggin, Alexey Kardashevskiy,
	Will Deacon
In-Reply-To: <20201111110723.3148665-1-npiggin@gmail.com>

This passes atomic64 selftest on ppc32 on qemu (uniprocessor only)
both before and after powerpc is converted to use ARCH_ATOMIC.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 include/asm-generic/atomic64.h | 70 +++++++++++++++++++++++++++-------
 lib/atomic64.c                 | 36 ++++++++---------
 2 files changed, 75 insertions(+), 31 deletions(-)

diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index 370f01d4450f..2b1ecb591bb9 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -15,19 +15,17 @@ typedef struct {
 
 #define ATOMIC64_INIT(i)	{ (i) }
 
-extern s64 atomic64_read(const atomic64_t *v);
-extern void atomic64_set(atomic64_t *v, s64 i);
-
-#define atomic64_set_release(v, i)	atomic64_set((v), (i))
+extern s64 __atomic64_read(const atomic64_t *v);
+extern void __atomic64_set(atomic64_t *v, s64 i);
 
 #define ATOMIC64_OP(op)							\
-extern void	 atomic64_##op(s64 a, atomic64_t *v);
+extern void	 __atomic64_##op(s64 a, atomic64_t *v);
 
 #define ATOMIC64_OP_RETURN(op)						\
-extern s64 atomic64_##op##_return(s64 a, atomic64_t *v);
+extern s64 __atomic64_##op##_return(s64 a, atomic64_t *v);
 
 #define ATOMIC64_FETCH_OP(op)						\
-extern s64 atomic64_fetch_##op(s64 a, atomic64_t *v);
+extern s64 __atomic64_fetch_##op(s64 a, atomic64_t *v);
 
 #define ATOMIC64_OPS(op)	ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
 
@@ -46,11 +44,57 @@ ATOMIC64_OPS(xor)
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-extern s64 atomic64_dec_if_positive(atomic64_t *v);
-#define atomic64_dec_if_positive atomic64_dec_if_positive
-extern s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n);
-extern s64 atomic64_xchg(atomic64_t *v, s64 new);
-extern s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u);
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
+extern s64 __atomic64_dec_if_positive(atomic64_t *v);
+extern s64 __atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n);
+extern s64 __atomic64_xchg(atomic64_t *v, s64 new);
+extern s64 __atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u);
+
+#ifdef ARCH_ATOMIC
+#define arch_atomic64_read __atomic64_read
+#define arch_atomic64_set __atomic64_set
+#define arch_atomic64_add __atomic64_add
+#define arch_atomic64_add_return __atomic64_add_return
+#define arch_atomic64_fetch_add __atomic64_fetch_add
+#define arch_atomic64_sub __atomic64_sub
+#define arch_atomic64_sub_return __atomic64_sub_return
+#define arch_atomic64_fetch_sub __atomic64_fetch_sub
+#define arch_atomic64_and __atomic64_and
+#define arch_atomic64_and_return __atomic64_and_return
+#define arch_atomic64_fetch_and __atomic64_fetch_and
+#define arch_atomic64_or __atomic64_or
+#define arch_atomic64_or_return __atomic64_or_return
+#define arch_atomic64_fetch_or __atomic64_fetch_or
+#define arch_atomic64_xor __atomic64_xor
+#define arch_atomic64_xor_return __atomic64_xor_return
+#define arch_atomic64_fetch_xor __atomic64_fetch_xor
+#define arch_atomic64_xchg __atomic64_xchg
+#define arch_atomic64_cmpxchg __atomic64_cmpxchg
+#define arch_atomic64_set_release(v, i)	__atomic64_set((v), (i))
+#define arch_atomic64_dec_if_positive __atomic64_dec_if_positive
+#define arch_atomic64_fetch_add_unless __atomic64_fetch_add_unless
+#else
+#define atomic64_read __atomic64_read
+#define atomic64_set __atomic64_set
+#define atomic64_add __atomic64_add
+#define atomic64_add_return __atomic64_add_return
+#define atomic64_fetch_add __atomic64_fetch_add
+#define atomic64_sub __atomic64_sub
+#define atomic64_sub_return __atomic64_sub_return
+#define atomic64_fetch_sub __atomic64_fetch_sub
+#define atomic64_and __atomic64_and
+#define atomic64_and_return __atomic64_and_return
+#define atomic64_fetch_and __atomic64_fetch_and
+#define atomic64_or __atomic64_or
+#define atomic64_or_return __atomic64_or_return
+#define atomic64_fetch_or __atomic64_fetch_or
+#define atomic64_xor __atomic64_xor
+#define atomic64_xor_return __atomic64_xor_return
+#define atomic64_fetch_xor __atomic64_fetch_xor
+#define atomic64_xchg __atomic64_xchg
+#define atomic64_cmpxchg __atomic64_cmpxchg
+#define atomic64_set_release(v, i)	__atomic64_set((v), (i))
+#define atomic64_dec_if_positive __atomic64_dec_if_positive
+#define atomic64_fetch_add_unless __atomic64_fetch_add_unless
+#endif
 
 #endif  /*  _ASM_GENERIC_ATOMIC64_H  */
diff --git a/lib/atomic64.c b/lib/atomic64.c
index e98c85a99787..05aba5e3268f 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -42,7 +42,7 @@ static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
 	return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
 }
 
-s64 atomic64_read(const atomic64_t *v)
+s64 __atomic64_read(const atomic64_t *v)
 {
 	unsigned long flags;
 	raw_spinlock_t *lock = lock_addr(v);
@@ -53,9 +53,9 @@ s64 atomic64_read(const atomic64_t *v)
 	raw_spin_unlock_irqrestore(lock, flags);
 	return val;
 }
-EXPORT_SYMBOL(atomic64_read);
+EXPORT_SYMBOL(__atomic64_read);
 
-void atomic64_set(atomic64_t *v, s64 i)
+void __atomic64_set(atomic64_t *v, s64 i)
 {
 	unsigned long flags;
 	raw_spinlock_t *lock = lock_addr(v);
@@ -64,10 +64,10 @@ void atomic64_set(atomic64_t *v, s64 i)
 	v->counter = i;
 	raw_spin_unlock_irqrestore(lock, flags);
 }
-EXPORT_SYMBOL(atomic64_set);
+EXPORT_SYMBOL(__atomic64_set);
 
 #define ATOMIC64_OP(op, c_op)						\
-void atomic64_##op(s64 a, atomic64_t *v)				\
+void __atomic64_##op(s64 a, atomic64_t *v)				\
 {									\
 	unsigned long flags;						\
 	raw_spinlock_t *lock = lock_addr(v);				\
@@ -76,10 +76,10 @@ void atomic64_##op(s64 a, atomic64_t *v)				\
 	v->counter c_op a;						\
 	raw_spin_unlock_irqrestore(lock, flags);			\
 }									\
-EXPORT_SYMBOL(atomic64_##op);
+EXPORT_SYMBOL(__atomic64_##op);
 
 #define ATOMIC64_OP_RETURN(op, c_op)					\
-s64 atomic64_##op##_return(s64 a, atomic64_t *v)			\
+s64 __atomic64_##op##_return(s64 a, atomic64_t *v)			\
 {									\
 	unsigned long flags;						\
 	raw_spinlock_t *lock = lock_addr(v);				\
@@ -90,10 +90,10 @@ s64 atomic64_##op##_return(s64 a, atomic64_t *v)			\
 	raw_spin_unlock_irqrestore(lock, flags);			\
 	return val;							\
 }									\
-EXPORT_SYMBOL(atomic64_##op##_return);
+EXPORT_SYMBOL(__atomic64_##op##_return);
 
 #define ATOMIC64_FETCH_OP(op, c_op)					\
-s64 atomic64_fetch_##op(s64 a, atomic64_t *v)				\
+s64 __atomic64_fetch_##op(s64 a, atomic64_t *v)				\
 {									\
 	unsigned long flags;						\
 	raw_spinlock_t *lock = lock_addr(v);				\
@@ -105,7 +105,7 @@ s64 atomic64_fetch_##op(s64 a, atomic64_t *v)				\
 	raw_spin_unlock_irqrestore(lock, flags);			\
 	return val;							\
 }									\
-EXPORT_SYMBOL(atomic64_fetch_##op);
+EXPORT_SYMBOL(__atomic64_fetch_##op);
 
 #define ATOMIC64_OPS(op, c_op)						\
 	ATOMIC64_OP(op, c_op)						\
@@ -130,7 +130,7 @@ ATOMIC64_OPS(xor, ^=)
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-s64 atomic64_dec_if_positive(atomic64_t *v)
+s64 __atomic64_dec_if_positive(atomic64_t *v)
 {
 	unsigned long flags;
 	raw_spinlock_t *lock = lock_addr(v);
@@ -143,9 +143,9 @@ s64 atomic64_dec_if_positive(atomic64_t *v)
 	raw_spin_unlock_irqrestore(lock, flags);
 	return val;
 }
-EXPORT_SYMBOL(atomic64_dec_if_positive);
+EXPORT_SYMBOL(__atomic64_dec_if_positive);
 
-s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
+s64 __atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
 {
 	unsigned long flags;
 	raw_spinlock_t *lock = lock_addr(v);
@@ -158,9 +158,9 @@ s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
 	raw_spin_unlock_irqrestore(lock, flags);
 	return val;
 }
-EXPORT_SYMBOL(atomic64_cmpxchg);
+EXPORT_SYMBOL(__atomic64_cmpxchg);
 
-s64 atomic64_xchg(atomic64_t *v, s64 new)
+s64 __atomic64_xchg(atomic64_t *v, s64 new)
 {
 	unsigned long flags;
 	raw_spinlock_t *lock = lock_addr(v);
@@ -172,9 +172,9 @@ s64 atomic64_xchg(atomic64_t *v, s64 new)
 	raw_spin_unlock_irqrestore(lock, flags);
 	return val;
 }
-EXPORT_SYMBOL(atomic64_xchg);
+EXPORT_SYMBOL(__atomic64_xchg);
 
-s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+s64 __atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	unsigned long flags;
 	raw_spinlock_t *lock = lock_addr(v);
@@ -188,4 +188,4 @@ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 
 	return val;
 }
-EXPORT_SYMBOL(atomic64_fetch_add_unless);
+EXPORT_SYMBOL(__atomic64_fetch_add_unless);
-- 
2.23.0


^ permalink raw reply related

* [PATCH 2/3] powerpc/64s/iommu: don't use atomic_ function on atomic64_t type
From: Nicholas Piggin @ 2020-11-11 11:07 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Christophe Leroy, linux-arch, Arnd Bergmann, Peter Zijlstra,
	Boqun Feng, linux-kernel, Nicholas Piggin, Alexey Kardashevskiy,
	Will Deacon
In-Reply-To: <20201111110723.3148665-1-npiggin@gmail.com>

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/mm/book3s64/iommu_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c
index 563faa10bb66..685d7bb3d26f 100644
--- a/arch/powerpc/mm/book3s64/iommu_api.c
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
@@ -263,7 +263,7 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
 		goto unlock_exit;
 
 	/* Are there still mappings? */
-	if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) {
+	if (atomic64_cmpxchg(&mem->mapped, 1, 0) != 1) {
 		++mem->used;
 		ret = -EBUSY;
 		goto unlock_exit;
-- 
2.23.0


^ permalink raw reply related

* [PATCH 3/3] powerpc: rewrite atomics to use ARCH_ATOMIC
From: Nicholas Piggin @ 2020-11-11 11:07 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Christophe Leroy, linux-arch, Arnd Bergmann, Peter Zijlstra,
	Boqun Feng, linux-kernel, Nicholas Piggin, Alexey Kardashevskiy,
	Will Deacon
In-Reply-To: <20201111110723.3148665-1-npiggin@gmail.com>

All the cool kids are doing it.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/atomic.h  | 681 ++++++++++-------------------
 arch/powerpc/include/asm/cmpxchg.h |  62 +--
 2 files changed, 248 insertions(+), 495 deletions(-)

diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 8a55eb8cc97b..899aa2403ba7 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -11,185 +11,285 @@
 #include <asm/cmpxchg.h>
 #include <asm/barrier.h>
 
+#define ARCH_ATOMIC
+
+#ifndef CONFIG_64BIT
+#include <asm-generic/atomic64.h>
+#endif
+
 /*
  * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with
  * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
  * on the platform without lwsync.
  */
 #define __atomic_acquire_fence()					\
-	__asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory")
+	asm volatile(PPC_ACQUIRE_BARRIER "" : : : "memory")
 
 #define __atomic_release_fence()					\
-	__asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory")
+	asm volatile(PPC_RELEASE_BARRIER "" : : : "memory")
 
-static __inline__ int atomic_read(const atomic_t *v)
-{
-	int t;
+#define __atomic_pre_full_fence		smp_mb
 
-	__asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
+#define __atomic_post_full_fence	smp_mb
 
-	return t;
+#define arch_atomic_read(v)			__READ_ONCE((v)->counter)
+#define arch_atomic_set(v, i)			__WRITE_ONCE(((v)->counter), (i))
+#ifdef CONFIG_64BIT
+#define ATOMIC64_INIT(i)			{ (i) }
+#define arch_atomic64_read(v)			__READ_ONCE((v)->counter)
+#define arch_atomic64_set(v, i)			__WRITE_ONCE(((v)->counter), (i))
+#endif
+
+#define ATOMIC_OP(name, type, dtype, width, asm_op)			\
+static inline void arch_##name(dtype a, type *v)			\
+{									\
+	dtype t;							\
+									\
+	asm volatile(							\
+"1:	l" #width "arx	%0,0,%3		# " #name		"\n"	\
+"\t"	#asm_op " %0,%2,%0					\n"	\
+"	st" #width "cx.	%0,0,%3					\n"	\
+"	bne-	1b						\n"	\
+	: "=&r" (t), "+m" (v->counter)					\
+	: "r" (a), "r" (&v->counter)					\
+	: "cr0", "xer");						\
 }
 
-static __inline__ void atomic_set(atomic_t *v, int i)
-{
-	__asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
+#define ATOMIC_OP_IMM(name, type, dtype, width, asm_op, imm)		\
+static inline void arch_##name(type *v)					\
+{									\
+	dtype t;							\
+									\
+	asm volatile(							\
+"1:	l" #width "arx	%0,0,%3		# " #name		"\n"	\
+"\t"	#asm_op " %0,%0,%2					\n"	\
+"	st" #width "cx.	%0,0,%3					\n"	\
+"	bne-	1b						\n"	\
+	: "=&r" (t), "+m" (v->counter)					\
+	: "i" (imm), "r" (&v->counter)					\
+	: "cr0", "xer");						\
 }
 
-#define ATOMIC_OP(op, asm_op)						\
-static __inline__ void atomic_##op(int a, atomic_t *v)			\
+#define ATOMIC_OP_RETURN_RELAXED(name, type, dtype, width, asm_op)	\
+static inline dtype arch_##name##_relaxed(dtype a, type *v)		\
 {									\
-	int t;								\
+	dtype t;							\
 									\
-	__asm__ __volatile__(						\
-"1:	lwarx	%0,0,%3		# atomic_" #op "\n"			\
-	#asm_op " %0,%2,%0\n"						\
-"	stwcx.	%0,0,%3 \n"						\
-"	bne-	1b\n"							\
+	asm volatile(							\
+"1:	l" #width "arx	%0,0,%3		# " #name 		"\n"	\
+"\t"	#asm_op " %0,%2,%0					\n"	\
+"	st" #width "cx.	%0,0,%3					\n"	\
+"	bne-	1b						\n"	\
 	: "=&r" (t), "+m" (v->counter)					\
 	: "r" (a), "r" (&v->counter)					\
-	: "cc");							\
-}									\
+	: "cr0", "xer");						\
+									\
+	return t;							\
+}
 
-#define ATOMIC_OP_RETURN_RELAXED(op, asm_op)				\
-static inline int atomic_##op##_return_relaxed(int a, atomic_t *v)	\
+#define ATOMIC_OP_IMM_RETURN_RELAXED(name, type, dtype, width, asm_op, imm) \
+static inline dtype arch_##name##_relaxed(type *v)			\
 {									\
-	int t;								\
+	dtype t;							\
 									\
-	__asm__ __volatile__(						\
-"1:	lwarx	%0,0,%3		# atomic_" #op "_return_relaxed\n"	\
-	#asm_op " %0,%2,%0\n"						\
-"	stwcx.	%0,0,%3\n"						\
-"	bne-	1b\n"							\
+	asm volatile(							\
+"1:	l" #width "arx	%0,0,%3		# " #name		"\n"	\
+"\t"	#asm_op " %0,%0,%2					\n"	\
+"	st" #width "cx.	%0,0,%3					\n"	\
+"	bne-	1b						\n"	\
 	: "=&r" (t), "+m" (v->counter)					\
-	: "r" (a), "r" (&v->counter)					\
-	: "cc");							\
+	: "i" (imm), "r" (&v->counter)					\
+	: "cr0", "xer");						\
 									\
 	return t;							\
 }
 
-#define ATOMIC_FETCH_OP_RELAXED(op, asm_op)				\
-static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v)	\
+#define ATOMIC_FETCH_OP_RELAXED(name, type, dtype, width, asm_op)	\
+static inline dtype arch_##name##_relaxed(dtype a, type *v)		\
 {									\
-	int res, t;							\
+	dtype res, t;							\
 									\
-	__asm__ __volatile__(						\
-"1:	lwarx	%0,0,%4		# atomic_fetch_" #op "_relaxed\n"	\
-	#asm_op " %1,%3,%0\n"						\
-"	stwcx.	%1,0,%4\n"						\
-"	bne-	1b\n"							\
+	asm volatile(							\
+"1:	l" #width "arx	%0,0,%4		# " #name		"\n"	\
+"\t"	#asm_op " %1,%3,%0					\n"	\
+"	st" #width "cx.	%1,0,%4					\n"	\
+"	bne-	1b						\n"	\
 	: "=&r" (res), "=&r" (t), "+m" (v->counter)			\
 	: "r" (a), "r" (&v->counter)					\
-	: "cc");							\
+	: "cr0", "xer");						\
 									\
 	return res;							\
 }
 
+#define ATOMIC_FETCH_OP_UNLESS_RELAXED(name, type, dtype, width, asm_op) \
+static inline int arch_##name##_relaxed(type *v, dtype a, dtype u)	\
+{									\
+	dtype res, t;							\
+									\
+	asm volatile(							\
+"1:	l" #width "arx	%0,0,%5		# " #name		"\n"	\
+"	cmp" #width "	0,%0,%3					\n"	\
+"	beq-	2f						\n"	\
+"\t"	#asm_op " %1,%2,%0					\n"	\
+"	st" #width "cx.	%1,0,%5					\n"	\
+"	bne-	1b						\n"	\
+"2:								\n"	\
+	: "=&r" (res), "=&r" (t), "+m" (v->counter)			\
+	: "r" (a), "r" (u), "r" (&v->counter)				\
+	: "cr0", "xer");						\
+									\
+	return res;							\
+}
+
+#define ATOMIC_INC_NOT_ZERO_RELAXED(name, type, dtype, width)		\
+static inline dtype arch_##name##_relaxed(type *v)			\
+{									\
+	dtype t1, t2;							\
+									\
+	asm volatile(							\
+"1:	l" #width "arx	%0,0,%3		# " #name		"\n"	\
+"	cmp" #width "i	0,%0,0					\n"	\
+"	beq-	2f						\n"	\
+"	addic	%1,%2,1						\n"	\
+"	st" #width "cx.	%1,0,%3					\n"	\
+"	bne-	1b						\n"	\
+"2:								\n"	\
+	: "=&r" (t1), "=&r" (t2), "+m" (v->counter)			\
+	: "r" (&v->counter)						\
+	: "cr0", "xer");						\
+									\
+	return t1;							\
+}
+
+#undef ATOMIC_OPS
 #define ATOMIC_OPS(op, asm_op)						\
-	ATOMIC_OP(op, asm_op)						\
-	ATOMIC_OP_RETURN_RELAXED(op, asm_op)				\
-	ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+ATOMIC_OP(atomic_##op, atomic_t, int, w, asm_op)			\
+ATOMIC_OP_RETURN_RELAXED(atomic_##op##_return, atomic_t, int, w, asm_op) \
+ATOMIC_FETCH_OP_RELAXED(atomic_fetch_##op, atomic_t, int, w, asm_op)	\
+ATOMIC_FETCH_OP_UNLESS_RELAXED(atomic_fetch_##op##_unless, atomic_t, int, w, asm_op)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, asm_op)					\
+ATOMIC_OP(atomic64_##op, atomic64_t, u64, d, asm_op)			\
+ATOMIC_OP_RETURN_RELAXED(atomic64_##op##_return, atomic64_t, u64, d, asm_op) \
+ATOMIC_FETCH_OP_RELAXED(atomic64_fetch_##op, atomic64_t, u64, d, asm_op) \
+ATOMIC_FETCH_OP_UNLESS_RELAXED(atomic64_fetch_##op##_unless, atomic64_t, u64, d, asm_op)
 
 ATOMIC_OPS(add, add)
+#define arch_atomic_add arch_atomic_add
+#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_add_unless_relaxed arch_atomic_fetch_add_unless_relaxed
+
 ATOMIC_OPS(sub, subf)
+#define arch_atomic_sub arch_atomic_sub
+#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
+/* skip atomic_fetch_sub_unless_relaxed */
 
-#define atomic_add_return_relaxed atomic_add_return_relaxed
-#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+#ifdef CONFIG_64BIT
+ATOMIC64_OPS(add, add)
+#define arch_atomic64_add arch_atomic64_add
+#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
+#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_add_unless_relaxed arch_atomic64_fetch_add_unless_relaxed
 
-#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
-#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+ATOMIC64_OPS(sub, subf)
+#define arch_atomic64_sub arch_atomic64_sub
+#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
+#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
+/* skip atomic64_fetch_sub_unless_relaxed */
+#endif
 
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op, asm_op)						\
-	ATOMIC_OP(op, asm_op)						\
-	ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+ATOMIC_OP(atomic_##op, atomic_t, int, w, asm_op)			\
+ATOMIC_FETCH_OP_RELAXED(atomic_fetch_##op, atomic_t, int, w, asm_op)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, asm_op)					\
+ATOMIC_OP(atomic64_##op, atomic64_t, u64, d, asm_op)			\
+ATOMIC_FETCH_OP_RELAXED(atomic64_fetch_##op, atomic64_t, u64, d, asm_op)
 
 ATOMIC_OPS(and, and)
+#define arch_atomic_and arch_atomic_and
+#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+
 ATOMIC_OPS(or, or)
+#define arch_atomic_or arch_atomic_or
+#define arch_atomic_fetch_or_relaxed  arch_atomic_fetch_or_relaxed
+
 ATOMIC_OPS(xor, xor)
+#define arch_atomic_xor arch_atomic_xor
+#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
+
+#ifdef CONFIG_64BIT
+ATOMIC64_OPS(and, and)
+#define arch_atomic64_and arch_atomic64_and
+#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
 
-#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
-#define atomic_fetch_or_relaxed  atomic_fetch_or_relaxed
-#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+ATOMIC64_OPS(or, or)
+#define arch_atomic64_or arch_atomic64_or
+#define arch_atomic64_fetch_or_relaxed  arch_atomic64_fetch_or_relaxed
+
+ATOMIC64_OPS(xor, xor)
+#define arch_atomic64_xor arch_atomic64_xor
+#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
+#endif
 
 #undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm_op, imm)					\
+ATOMIC_OP_IMM(atomic_##op, atomic_t, int, w, asm_op, imm)		\
+ATOMIC_OP_IMM_RETURN_RELAXED(atomic_##op##_return, atomic_t, int, w, asm_op, imm)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, asm_op, imm)					\
+ATOMIC_OP_IMM(atomic64_##op, atomic64_t, u64, d, asm_op, imm)		\
+ATOMIC_OP_IMM_RETURN_RELAXED(atomic64_##op##_return, atomic64_t, u64, d, asm_op, imm)
+
+ATOMIC_OPS(inc, addic, 1)
+#define arch_atomic_inc arch_atomic_inc
+#define arch_atomic_inc_return_relaxed arch_atomic_inc_return_relaxed
+
+ATOMIC_OPS(dec, addic, -1)
+#define arch_atomic_dec arch_atomic_dec
+#define arch_atomic_dec_return_relaxed arch_atomic_dec_return_relaxed
+
+#ifdef CONFIG_64BIT
+ATOMIC64_OPS(inc, addic, 1)
+#define arch_atomic64_inc arch_atomic64_inc
+#define arch_atomic64_inc_return_relaxed arch_atomic64_inc_return_relaxed
+
+ATOMIC64_OPS(dec, addic, -1)
+#define arch_atomic64_dec arch_atomic64_dec
+#define arch_atomic64_dec_return_relaxed arch_atomic64_dec_return_relaxed
+#endif
+
+ATOMIC_INC_NOT_ZERO_RELAXED(atomic_inc_not_zero, atomic_t, int, w)
+#define arch_atomic_inc_not_zero_relaxed(v) arch_atomic_inc_not_zero_relaxed(v)
+
+#ifdef CONFIG_64BIT
+ATOMIC_INC_NOT_ZERO_RELAXED(atomic64_inc_not_zero, atomic64_t, u64, d)
+#define arch_atomic64_inc_not_zero_relaxed(v) arch_atomic64_inc_not_zero_relaxed(v)
+#endif
+
+#undef ATOMIC_INC_NOT_ZERO_RELAXED
+#undef ATOMIC_FETCH_OP_UNLESS_RELAXED
 #undef ATOMIC_FETCH_OP_RELAXED
+#undef ATOMIC_OP_IMM_RETURN_RELAXED
 #undef ATOMIC_OP_RETURN_RELAXED
+#undef ATOMIC_OP_IMM
 #undef ATOMIC_OP
+#undef ATOMIC_OPS
+#undef ATOMIC64_OPS
 
-static __inline__ void atomic_inc(atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-"1:	lwarx	%0,0,%2		# atomic_inc\n\
-	addic	%0,%0,1\n"
-"	stwcx.	%0,0,%2 \n\
-	bne-	1b"
-	: "=&r" (t), "+m" (v->counter)
-	: "r" (&v->counter)
-	: "cc", "xer");
-}
-#define atomic_inc atomic_inc
-
-static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-"1:	lwarx	%0,0,%2		# atomic_inc_return_relaxed\n"
-"	addic	%0,%0,1\n"
-"	stwcx.	%0,0,%2\n"
-"	bne-	1b"
-	: "=&r" (t), "+m" (v->counter)
-	: "r" (&v->counter)
-	: "cc", "xer");
-
-	return t;
-}
-
-static __inline__ void atomic_dec(atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-"1:	lwarx	%0,0,%2		# atomic_dec\n\
-	addic	%0,%0,-1\n"
-"	stwcx.	%0,0,%2\n\
-	bne-	1b"
-	: "=&r" (t), "+m" (v->counter)
-	: "r" (&v->counter)
-	: "cc", "xer");
-}
-#define atomic_dec atomic_dec
-
-static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-"1:	lwarx	%0,0,%2		# atomic_dec_return_relaxed\n"
-"	addic	%0,%0,-1\n"
-"	stwcx.	%0,0,%2\n"
-"	bne-	1b"
-	: "=&r" (t), "+m" (v->counter)
-	: "r" (&v->counter)
-	: "cc", "xer");
-
-	return t;
-}
-
-#define atomic_inc_return_relaxed atomic_inc_return_relaxed
-#define atomic_dec_return_relaxed atomic_dec_return_relaxed
-
-#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_cmpxchg_relaxed(v, o, n) \
-	cmpxchg_relaxed(&((v)->counter), (o), (n))
-#define atomic_cmpxchg_acquire(v, o, n) \
-	cmpxchg_acquire(&((v)->counter), (o), (n))
+#define arch_atomic_cmpxchg_relaxed(v, o, n) arch_cmpxchg_relaxed(&((v)->counter), (o), (n))
+#define arch_atomic_xchg_relaxed(v, new) arch_xchg_relaxed(&((v)->counter), (new))
 
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
+#ifdef CONFIG_64BIT
+#define arch_atomic64_cmpxchg_relaxed(v, o, n) arch_cmpxchg_relaxed(&((v)->counter), (o), (n))
+#define arch_atomic64_xchg_relaxed(v, new) arch_xchg_relaxed(&((v)->counter), (new))
+#endif
 
 /*
  * Don't want to override the generic atomic_try_cmpxchg_acquire, because
@@ -203,7 +303,7 @@ atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
 	int r, o = *old;
 
 	__asm__ __volatile__ (
-"1:\t"	PPC_LWARX(%0,0,%2,1) "	# atomic_try_cmpxchg_acquire	\n"
+"1:\t"	PPC_LWARX(%0,0,%2,1) "	# atomic_try_cmpxchg_lock		\n"
 "	cmpw	0,%0,%3							\n"
 "	bne-	2f							\n"
 "	stwcx.	%4,0,%2							\n"
@@ -219,270 +319,41 @@ atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
 	return likely(r == o);
 }
 
-/**
- * atomic_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int t;
-
-	__asm__ __volatile__ (
-	PPC_ATOMIC_ENTRY_BARRIER
-"1:	lwarx	%0,0,%1		# atomic_fetch_add_unless\n\
-	cmpw	0,%0,%3 \n\
-	beq	2f \n\
-	add	%0,%2,%0 \n"
-"	stwcx.	%0,0,%1 \n\
-	bne-	1b \n"
-	PPC_ATOMIC_EXIT_BARRIER
-"	subf	%0,%2,%0 \n\
-2:"
-	: "=&r" (t)
-	: "r" (&v->counter), "r" (a), "r" (u)
-	: "cc", "memory");
-
-	return t;
-}
-#define atomic_fetch_add_unless atomic_fetch_add_unless
-
-/**
- * atomic_inc_not_zero - increment unless the number is zero
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1, so long as @v is non-zero.
- * Returns non-zero if @v was non-zero, and zero otherwise.
- */
-static __inline__ int atomic_inc_not_zero(atomic_t *v)
-{
-	int t1, t2;
-
-	__asm__ __volatile__ (
-	PPC_ATOMIC_ENTRY_BARRIER
-"1:	lwarx	%0,0,%2		# atomic_inc_not_zero\n\
-	cmpwi	0,%0,0\n\
-	beq-	2f\n\
-	addic	%1,%0,1\n"
-"	stwcx.	%1,0,%2\n\
-	bne-	1b\n"
-	PPC_ATOMIC_EXIT_BARRIER
-	"\n\
-2:"
-	: "=&r" (t1), "=&r" (t2)
-	: "r" (&v->counter)
-	: "cc", "xer", "memory");
-
-	return t1;
-}
-#define atomic_inc_not_zero(v) atomic_inc_not_zero((v))
-
 /*
  * Atomically test *v and decrement if it is greater than 0.
  * The function returns the old value of *v minus 1, even if
  * the atomic variable, v, was not decremented.
  */
-static __inline__ int atomic_dec_if_positive(atomic_t *v)
+static inline int atomic_dec_if_positive_relaxed(atomic_t *v)
 {
 	int t;
 
-	__asm__ __volatile__(
-	PPC_ATOMIC_ENTRY_BARRIER
-"1:	lwarx	%0,0,%1		# atomic_dec_if_positive\n\
-	cmpwi	%0,1\n\
-	addi	%0,%0,-1\n\
-	blt-	2f\n"
-"	stwcx.	%0,0,%1\n\
-	bne-	1b"
-	PPC_ATOMIC_EXIT_BARRIER
-	"\n\
-2:"	: "=&b" (t)
+	asm volatile(
+"1:	lwarx	%0,0,%1		# atomic_dec_if_positive		\n"
+"	cmpwi	%0,1							\n"
+"	addi	%0,%0,-1						\n"
+"	blt-	2f							\n"
+"	stwcx.	%0,0,%1							\n"
+"	bne-	1b							\n"
+"2:									\n"
+	: "=&b" (t)
 	: "r" (&v->counter)
 	: "cc", "memory");
 
 	return t;
 }
-#define atomic_dec_if_positive atomic_dec_if_positive
-
-#ifdef __powerpc64__
-
-#define ATOMIC64_INIT(i)	{ (i) }
-
-static __inline__ s64 atomic64_read(const atomic64_t *v)
-{
-	s64 t;
-
-	__asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
-
-	return t;
-}
-
-static __inline__ void atomic64_set(atomic64_t *v, s64 i)
-{
-	__asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
-}
-
-#define ATOMIC64_OP(op, asm_op)						\
-static __inline__ void atomic64_##op(s64 a, atomic64_t *v)		\
-{									\
-	s64 t;								\
-									\
-	__asm__ __volatile__(						\
-"1:	ldarx	%0,0,%3		# atomic64_" #op "\n"			\
-	#asm_op " %0,%2,%0\n"						\
-"	stdcx.	%0,0,%3 \n"						\
-"	bne-	1b\n"							\
-	: "=&r" (t), "+m" (v->counter)					\
-	: "r" (a), "r" (&v->counter)					\
-	: "cc");							\
-}
-
-#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op)				\
-static inline s64							\
-atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)			\
-{									\
-	s64 t;								\
-									\
-	__asm__ __volatile__(						\
-"1:	ldarx	%0,0,%3		# atomic64_" #op "_return_relaxed\n"	\
-	#asm_op " %0,%2,%0\n"						\
-"	stdcx.	%0,0,%3\n"						\
-"	bne-	1b\n"							\
-	: "=&r" (t), "+m" (v->counter)					\
-	: "r" (a), "r" (&v->counter)					\
-	: "cc");							\
-									\
-	return t;							\
-}
-
-#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op)				\
-static inline s64							\
-atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)			\
-{									\
-	s64 res, t;							\
-									\
-	__asm__ __volatile__(						\
-"1:	ldarx	%0,0,%4		# atomic64_fetch_" #op "_relaxed\n"	\
-	#asm_op " %1,%3,%0\n"						\
-"	stdcx.	%1,0,%4\n"						\
-"	bne-	1b\n"							\
-	: "=&r" (res), "=&r" (t), "+m" (v->counter)			\
-	: "r" (a), "r" (&v->counter)					\
-	: "cc");							\
-									\
-	return res;							\
-}
-
-#define ATOMIC64_OPS(op, asm_op)					\
-	ATOMIC64_OP(op, asm_op)						\
-	ATOMIC64_OP_RETURN_RELAXED(op, asm_op)				\
-	ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
-
-ATOMIC64_OPS(add, add)
-ATOMIC64_OPS(sub, subf)
-
-#define atomic64_add_return_relaxed atomic64_add_return_relaxed
-#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
-
-#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
-#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
-
-#undef ATOMIC64_OPS
-#define ATOMIC64_OPS(op, asm_op)					\
-	ATOMIC64_OP(op, asm_op)						\
-	ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
-
-ATOMIC64_OPS(and, and)
-ATOMIC64_OPS(or, or)
-ATOMIC64_OPS(xor, xor)
-
-#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
-#define atomic64_fetch_or_relaxed  atomic64_fetch_or_relaxed
-#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
-
-#undef ATOPIC64_OPS
-#undef ATOMIC64_FETCH_OP_RELAXED
-#undef ATOMIC64_OP_RETURN_RELAXED
-#undef ATOMIC64_OP
-
-static __inline__ void atomic64_inc(atomic64_t *v)
-{
-	s64 t;
-
-	__asm__ __volatile__(
-"1:	ldarx	%0,0,%2		# atomic64_inc\n\
-	addic	%0,%0,1\n\
-	stdcx.	%0,0,%2 \n\
-	bne-	1b"
-	: "=&r" (t), "+m" (v->counter)
-	: "r" (&v->counter)
-	: "cc", "xer");
-}
-#define atomic64_inc atomic64_inc
-
-static __inline__ s64 atomic64_inc_return_relaxed(atomic64_t *v)
-{
-	s64 t;
-
-	__asm__ __volatile__(
-"1:	ldarx	%0,0,%2		# atomic64_inc_return_relaxed\n"
-"	addic	%0,%0,1\n"
-"	stdcx.	%0,0,%2\n"
-"	bne-	1b"
-	: "=&r" (t), "+m" (v->counter)
-	: "r" (&v->counter)
-	: "cc", "xer");
-
-	return t;
-}
-
-static __inline__ void atomic64_dec(atomic64_t *v)
-{
-	s64 t;
-
-	__asm__ __volatile__(
-"1:	ldarx	%0,0,%2		# atomic64_dec\n\
-	addic	%0,%0,-1\n\
-	stdcx.	%0,0,%2\n\
-	bne-	1b"
-	: "=&r" (t), "+m" (v->counter)
-	: "r" (&v->counter)
-	: "cc", "xer");
-}
-#define atomic64_dec atomic64_dec
-
-static __inline__ s64 atomic64_dec_return_relaxed(atomic64_t *v)
-{
-	s64 t;
-
-	__asm__ __volatile__(
-"1:	ldarx	%0,0,%2		# atomic64_dec_return_relaxed\n"
-"	addic	%0,%0,-1\n"
-"	stdcx.	%0,0,%2\n"
-"	bne-	1b"
-	: "=&r" (t), "+m" (v->counter)
-	: "r" (&v->counter)
-	: "cc", "xer");
-
-	return t;
-}
-
-#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
-#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
+#define atomic_dec_if_positive_relaxed atomic_dec_if_positive_relaxed
 
+#ifdef CONFIG_64BIT
 /*
  * Atomically test *v and decrement if it is greater than 0.
  * The function returns the old value of *v minus 1.
  */
-static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 atomic64_dec_if_positive_relaxed(atomic64_t *v)
 {
 	s64 t;
 
-	__asm__ __volatile__(
+	asm volatile(
 	PPC_ATOMIC_ENTRY_BARRIER
 "1:	ldarx	%0,0,%1		# atomic64_dec_if_positive\n\
 	addic.	%0,%0,-1\n\
@@ -497,80 +368,8 @@ static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
 
 	return t;
 }
-#define atomic64_dec_if_positive atomic64_dec_if_positive
-
-#define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic64_cmpxchg_relaxed(v, o, n) \
-	cmpxchg_relaxed(&((v)->counter), (o), (n))
-#define atomic64_cmpxchg_acquire(v, o, n) \
-	cmpxchg_acquire(&((v)->counter), (o), (n))
-
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
-#define atomic64_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
-
-/**
- * atomic64_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
-{
-	s64 t;
-
-	__asm__ __volatile__ (
-	PPC_ATOMIC_ENTRY_BARRIER
-"1:	ldarx	%0,0,%1		# atomic64_fetch_add_unless\n\
-	cmpd	0,%0,%3 \n\
-	beq	2f \n\
-	add	%0,%2,%0 \n"
-"	stdcx.	%0,0,%1 \n\
-	bne-	1b \n"
-	PPC_ATOMIC_EXIT_BARRIER
-"	subf	%0,%2,%0 \n\
-2:"
-	: "=&r" (t)
-	: "r" (&v->counter), "r" (a), "r" (u)
-	: "cc", "memory");
-
-	return t;
-}
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
-
-/**
- * atomic_inc64_not_zero - increment unless the number is zero
- * @v: pointer of type atomic64_t
- *
- * Atomically increments @v by 1, so long as @v is non-zero.
- * Returns non-zero if @v was non-zero, and zero otherwise.
- */
-static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
-{
-	s64 t1, t2;
-
-	__asm__ __volatile__ (
-	PPC_ATOMIC_ENTRY_BARRIER
-"1:	ldarx	%0,0,%2		# atomic64_inc_not_zero\n\
-	cmpdi	0,%0,0\n\
-	beq-	2f\n\
-	addic	%1,%0,1\n\
-	stdcx.	%1,0,%2\n\
-	bne-	1b\n"
-	PPC_ATOMIC_EXIT_BARRIER
-	"\n\
-2:"
-	: "=&r" (t1), "=&r" (t2)
-	: "r" (&v->counter)
-	: "cc", "xer", "memory");
-
-	return t1 != 0;
-}
-#define atomic64_inc_not_zero(v) atomic64_inc_not_zero((v))
-
-#endif /* __powerpc64__ */
+#define atomic64_dec_if_positive_relaxed atomic64_dec_if_positive_relaxed
+#endif
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_ATOMIC_H_ */
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index cf091c4c22e5..181f7e8b3281 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -192,7 +192,7 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
      		(unsigned long)_x_, sizeof(*(ptr))); 			     \
   })
 
-#define xchg_relaxed(ptr, x)						\
+#define arch_xchg_relaxed(ptr, x)					\
 ({									\
 	__typeof__(*(ptr)) _x_ = (x);					\
 	(__typeof__(*(ptr))) __xchg_relaxed((ptr),			\
@@ -448,35 +448,7 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
 	return old;
 }
 
-static __always_inline unsigned long
-__cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
-		  unsigned int size)
-{
-	switch (size) {
-	case 1:
-		return __cmpxchg_u8_acquire(ptr, old, new);
-	case 2:
-		return __cmpxchg_u16_acquire(ptr, old, new);
-	case 4:
-		return __cmpxchg_u32_acquire(ptr, old, new);
-#ifdef CONFIG_PPC64
-	case 8:
-		return __cmpxchg_u64_acquire(ptr, old, new);
-#endif
-	}
-	BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire");
-	return old;
-}
-#define cmpxchg(ptr, o, n)						 \
-  ({									 \
-     __typeof__(*(ptr)) _o_ = (o);					 \
-     __typeof__(*(ptr)) _n_ = (n);					 \
-     (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,		 \
-				    (unsigned long)_n_, sizeof(*(ptr))); \
-  })
-
-
-#define cmpxchg_local(ptr, o, n)					 \
+#define arch_cmpxchg_local(ptr, o, n)					 \
   ({									 \
      __typeof__(*(ptr)) _o_ = (o);					 \
      __typeof__(*(ptr)) _n_ = (n);					 \
@@ -484,7 +456,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
 				    (unsigned long)_n_, sizeof(*(ptr))); \
   })
 
-#define cmpxchg_relaxed(ptr, o, n)					\
+#define arch_cmpxchg_relaxed(ptr, o, n)					\
 ({									\
 	__typeof__(*(ptr)) _o_ = (o);					\
 	__typeof__(*(ptr)) _n_ = (n);					\
@@ -493,38 +465,20 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
 			sizeof(*(ptr)));				\
 })
 
-#define cmpxchg_acquire(ptr, o, n)					\
-({									\
-	__typeof__(*(ptr)) _o_ = (o);					\
-	__typeof__(*(ptr)) _n_ = (n);					\
-	(__typeof__(*(ptr))) __cmpxchg_acquire((ptr),			\
-			(unsigned long)_o_, (unsigned long)_n_,		\
-			sizeof(*(ptr)));				\
-})
 #ifdef CONFIG_PPC64
-#define cmpxchg64(ptr, o, n)						\
-  ({									\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg((ptr), (o), (n));					\
-  })
-#define cmpxchg64_local(ptr, o, n)					\
+#define arch_cmpxchg64_local(ptr, o, n)					\
   ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_local((ptr), (o), (n));					\
+	arch_cmpxchg_local((ptr), (o), (n));				\
   })
-#define cmpxchg64_relaxed(ptr, o, n)					\
-({									\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_relaxed((ptr), (o), (n));				\
-})
-#define cmpxchg64_acquire(ptr, o, n)					\
+#define arch_cmpxchg64_relaxed(ptr, o, n)				\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_acquire((ptr), (o), (n));				\
+	arch_cmpxchg_relaxed((ptr), (o), (n));				\
 })
 #else
 #include <asm-generic/cmpxchg-local.h>
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+#define arch_cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
 #endif
 
 #endif /* __KERNEL__ */
-- 
2.23.0


^ permalink raw reply related

* [PATCH] powerpc/64s/perf: perf interrupt does not have to get_user_pages to access user memory
From: Nicholas Piggin @ 2020-11-11 12:01 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

read_user_stack_slow that walks user address translation by hand is
only required on hash, because a hash fault can not be serviced from
"NMI" context (to avoid re-entering the hash code) so the user stack
can be mapped into Linux page tables but not accessible by the CPU.

Radix MMU mode does not have this restriction. A page fault failure
would indicate the page is not accessible via get_user_pages either,
so avoid this on radix.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/perf/callchain.h    | 2 +-
 arch/powerpc/perf/callchain_64.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h
index ae24d4a00da6..d6fa6e25234f 100644
--- a/arch/powerpc/perf/callchain.h
+++ b/arch/powerpc/perf/callchain.h
@@ -33,7 +33,7 @@ static inline int __read_user_stack(const void __user *ptr, void *ret,
 
 	rc = copy_from_user_nofault(ret, ptr, size);
 
-	if (IS_ENABLED(CONFIG_PPC64) && rc)
+	if (IS_ENABLED(CONFIG_PPC64) && !radix_enabled() && rc)
 		return read_user_stack_slow(ptr, ret, size);
 
 	return rc;
diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c
index fed90e827f3a..0777b04a0c56 100644
--- a/arch/powerpc/perf/callchain_64.c
+++ b/arch/powerpc/perf/callchain_64.c
@@ -21,7 +21,8 @@
 /*
  * On 64-bit we don't want to invoke hash_page on user addresses from
  * interrupt context, so if the access faults, we read the page tables
- * to find which page (if any) is mapped and access it directly.
+ * to find which page (if any) is mapped and access it directly. Radix
+ * has no need for this so it doesn't use read_user_stack_slow.
  */
 int read_user_stack_slow(const void __user *ptr, void *buf, int nb)
 {
-- 
2.23.0


^ permalink raw reply related

* Re: [PATCH v1 3/4] powerpc/mm: remove linear mapping if __add_pages() fails in arch_add_memory()
From: David Hildenbrand @ 2020-11-11 12:07 UTC (permalink / raw)
  To: Oscar Salvador, Mike Rapoport
  Cc: Michal Hocko, Wei Yang, linux-kernel, linux-mm, Paul Mackerras,
	Rashmica Gupta, linuxppc-dev, Andrew Morton
In-Reply-To: <20201104121109.GA5126@localhost.localdomain>

On 04.11.20 13:11, Oscar Salvador wrote:
> On Wed, Nov 04, 2020 at 02:06:51PM +0200, Mike Rapoport wrote:
>> On Wed, Nov 04, 2020 at 10:50:07AM +0100, osalvador wrote:
>>> On Thu, Oct 29, 2020 at 05:27:17PM +0100, David Hildenbrand wrote:
>>>> Let's revert what we did in case seomthing goes wrong and we return an
>>>> error.
>>>
>>> Dumb question, but should not we do this for other arches as well?
>>
>> It seems arm64 and s390 already do that.
>> x86 could have its arch_add_memory() improved though :)
> 
> Right, I only stared at x86 and see it did not have it.
> I guess we want to have all arches aligned with this.

The ultimate goal would be to get rid of arch-specific arch_add_memory() 
implementations completely, providing arch_create_linear_mapping() / 
arch_remove_linear_mapping() instead (as indicated in patch #1).

The x86 variant certainly needs love, but I'll keep this patch set 
powerpc specific, so it can go via the powerpc tree in one piece. I'll 
add unifying these implementations onto my todo list.

Thanks!

-- 
Thanks,

David / dhildenb


^ permalink raw reply

* Re: [PATCH v1 2/4] powerpc/mm: print warning in arch_remove_linear_mapping()
From: David Hildenbrand @ 2020-11-11 12:10 UTC (permalink / raw)
  To: osalvador
  Cc: Michal Hocko, Wei Yang, linux-kernel, linux-mm, Paul Mackerras,
	Rashmica Gupta, linuxppc-dev, Andrew Morton, Mike Rapoport
In-Reply-To: <20201104094255.GA4981@localhost.localdomain>

On 04.11.20 10:42, osalvador wrote:
> On Thu, Oct 29, 2020 at 05:27:16PM +0100, David Hildenbrand wrote:
>> Let's print a warning similar to in arch_add_linear_mapping() instead of
>> WARN_ON_ONCE() and eventually crashing the kernel.
>>
>> Cc: Michael Ellerman <mpe@ellerman.id.au>
>> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>> Cc: Paul Mackerras <paulus@samba.org>
>> Cc: Rashmica Gupta <rashmica.g@gmail.com>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: Mike Rapoport <rppt@kernel.org>
>> Cc: Michal Hocko <mhocko@suse.com>
>> Cc: Oscar Salvador <osalvador@suse.de>
>> Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
>> ---
>>   arch/powerpc/mm/mem.c | 4 +++-
>>   1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
>> index 8a86d81f8df0..685028451dd2 100644
>> --- a/arch/powerpc/mm/mem.c
>> +++ b/arch/powerpc/mm/mem.c
>> @@ -145,7 +145,9 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size)
>>   	flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE);
>>   
>>   	ret = remove_section_mapping(start, start + size);
>> -	WARN_ON_ONCE(ret);
>> +	if (ret)
>> +		pr_warn("Unable to remove linear mapping for 0x%llx..0x%llx: %d\n",
>> +			start, start + size, ret);
> 
> I guess the fear is to panic on systems that do have panic_on_warn (not
> sure how many productions systems have this out there).

Exactly.

> But anyway, being coherent with that, I think you should remove the WARN_ON
> in hash__remove_section_mapping as well.

Thanks, I'll add a patch doing that.

> 
> Besides that:
> 
> Reviewed-by: Oscar Salvador <osalvador@suse.
> 
> Not sure if the functions below that also have any sort of WARN_ON.
> native_hpte_removebolted has a VM_WARN_ON, but that is on
> CONFIG_DEBUG_VM so does not really matter.

Right. Thanks!

-- 
Thanks,

David / dhildenb


^ permalink raw reply

* [PATCH seccomp v2 1/8] csky: Enable seccomp architecture tracking
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
  To: containers
  Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
	linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
	Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
	Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
	linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
	Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
In-Reply-To: <cover.1605101222.git.yifeifz2@illinois.edu>

From: YiFei Zhu <yifeifz2@illinois.edu>

To enable seccomp constant action bitmaps, we need to have a static
mapping to the audit architecture and system call table size. Add these
for csky.

Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu>
---
 arch/csky/include/asm/Kbuild    |  1 -
 arch/csky/include/asm/seccomp.h | 11 +++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 arch/csky/include/asm/seccomp.h

diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild
index 64876e59e2ef..93372255984d 100644
--- a/arch/csky/include/asm/Kbuild
+++ b/arch/csky/include/asm/Kbuild
@@ -4,6 +4,5 @@ generic-y += gpio.h
 generic-y += kvm_para.h
 generic-y += local64.h
 generic-y += qrwlock.h
-generic-y += seccomp.h
 generic-y += user.h
 generic-y += vmlinux.lds.h
diff --git a/arch/csky/include/asm/seccomp.h b/arch/csky/include/asm/seccomp.h
new file mode 100644
index 000000000000..d33e758126fb
--- /dev/null
+++ b/arch/csky/include/asm/seccomp.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
+
+#include <asm-generic/seccomp.h>
+
+#define SECCOMP_ARCH_NATIVE		AUDIT_ARCH_CSKY
+#define SECCOMP_ARCH_NATIVE_NR		NR_syscalls
+#define SECCOMP_ARCH_NATIVE_NAME	"csky"
+
+#endif /* _ASM_SECCOMP_H */
-- 
2.29.2


^ permalink raw reply related

* [PATCH seccomp v2 0/8] seccomp: add bitmap cache support on remaining arches and report cache in procfs
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
  To: containers
  Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
	linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
	Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
	Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
	linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
	Giuseppe Scrivano, linuxppc-dev, Tycho Andersen

From: YiFei Zhu <yifeifz2@illinois.edu>

This patch series enables bitmap cache for the remaining arches with
SECCOMP_FILTER, other than MIPS.

I was unable to find any of the arches having subarch-specific NR_syscalls
macros, so generic NR_syscalls is used. SH's syscall_get_arch seems to
only have the 32-bit subarch implementation. I'm not sure if this is
expected.

This series has not been tested; I have not built all the cross compilers
necessary to build test, let alone run the kernel or benchmark the
performance, so help on making sure the bitmap cache works as expected
(selftests/seccomp/{seccomp_benchmark,seccomp_bpf}) would be appreciated.
The series applies on top of Kees's for-next/seccomp branch.

v1 -> v2:
* ppc, sh: s/__SECCOMP_ARCH_LE_BIT/__SECCOMP_ARCH_LE/
* ppc: add "le" suffix to arch name when the arch is little endian.
* ppc: add explanation of why __LITTLE_ENDIAN__ is used to commit message.

YiFei Zhu (8):
  csky: Enable seccomp architecture tracking
  parisc: Enable seccomp architecture tracking
  powerpc: Enable seccomp architecture tracking
  riscv: Enable seccomp architecture tracking
  s390: Enable seccomp architecture tracking
  sh: Enable seccomp architecture tracking
  xtensa: Enable seccomp architecture tracking
  seccomp/cache: Report cache data through /proc/pid/seccomp_cache

 arch/Kconfig                       | 15 ++++++++
 arch/csky/include/asm/Kbuild       |  1 -
 arch/csky/include/asm/seccomp.h    | 11 ++++++
 arch/parisc/include/asm/Kbuild     |  1 -
 arch/parisc/include/asm/seccomp.h  | 22 +++++++++++
 arch/powerpc/include/asm/seccomp.h | 23 ++++++++++++
 arch/riscv/include/asm/seccomp.h   | 10 +++++
 arch/s390/include/asm/seccomp.h    |  9 +++++
 arch/sh/include/asm/seccomp.h      | 10 +++++
 arch/xtensa/include/asm/Kbuild     |  1 -
 arch/xtensa/include/asm/seccomp.h  | 11 ++++++
 fs/proc/base.c                     |  6 +++
 include/linux/seccomp.h            |  7 ++++
 kernel/seccomp.c                   | 59 ++++++++++++++++++++++++++++++
 14 files changed, 183 insertions(+), 3 deletions(-)
 create mode 100644 arch/csky/include/asm/seccomp.h
 create mode 100644 arch/parisc/include/asm/seccomp.h
 create mode 100644 arch/xtensa/include/asm/seccomp.h

base-commit: 38c37e8fd3d2590c4234d8cfbc22158362f0eb04
--
2.29.2

^ permalink raw reply

* [PATCH seccomp v2 2/8] parisc: Enable seccomp architecture tracking
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
  To: containers
  Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
	linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
	Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
	Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
	linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
	Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
In-Reply-To: <cover.1605101222.git.yifeifz2@illinois.edu>

From: YiFei Zhu <yifeifz2@illinois.edu>

To enable seccomp constant action bitmaps, we need to have a static
mapping to the audit architecture and system call table size. Add these
for parisc.

Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu>
---
 arch/parisc/include/asm/Kbuild    |  1 -
 arch/parisc/include/asm/seccomp.h | 22 ++++++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 arch/parisc/include/asm/seccomp.h

diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index e3ee5c0bfe80..f16c4db80116 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -5,5 +5,4 @@ generated-y += syscall_table_c32.h
 generic-y += kvm_para.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
-generic-y += seccomp.h
 generic-y += user.h
diff --git a/arch/parisc/include/asm/seccomp.h b/arch/parisc/include/asm/seccomp.h
new file mode 100644
index 000000000000..b058b2220322
--- /dev/null
+++ b/arch/parisc/include/asm/seccomp.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
+
+#include <asm-generic/seccomp.h>
+
+#ifdef CONFIG_64BIT
+# define SECCOMP_ARCH_NATIVE		AUDIT_ARCH_PARISC64
+# define SECCOMP_ARCH_NATIVE_NR		NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME	"parisc64"
+# ifdef CONFIG_COMPAT
+#  define SECCOMP_ARCH_COMPAT		AUDIT_ARCH_PARISC
+#  define SECCOMP_ARCH_COMPAT_NR	NR_syscalls
+#  define SECCOMP_ARCH_COMPAT_NAME	"parisc"
+# endif
+#else /* !CONFIG_64BIT */
+# define SECCOMP_ARCH_NATIVE		AUDIT_ARCH_PARISC
+# define SECCOMP_ARCH_NATIVE_NR		NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME	"parisc"
+#endif
+
+#endif /* _ASM_SECCOMP_H */
-- 
2.29.2


^ permalink raw reply related

* [PATCH seccomp v2 3/8] powerpc: Enable seccomp architecture tracking
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
  To: containers
  Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
	linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
	Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
	Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
	linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
	Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
In-Reply-To: <cover.1605101222.git.yifeifz2@illinois.edu>

From: YiFei Zhu <yifeifz2@illinois.edu>

To enable seccomp constant action bitmaps, we need to have a static
mapping to the audit architecture and system call table size. Add these
for powerpc.

__LITTLE_ENDIAN__ is used here instead of CONFIG_CPU_LITTLE_ENDIAN
to keep it consistent with asm/syscall.h.

Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu>
---
 arch/powerpc/include/asm/seccomp.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/arch/powerpc/include/asm/seccomp.h b/arch/powerpc/include/asm/seccomp.h
index 51209f6071c5..ac2033f134f0 100644
--- a/arch/powerpc/include/asm/seccomp.h
+++ b/arch/powerpc/include/asm/seccomp.h
@@ -8,4 +8,27 @@
 
 #include <asm-generic/seccomp.h>
 
+#ifdef __LITTLE_ENDIAN__
+#define __SECCOMP_ARCH_LE		__AUDIT_ARCH_LE
+#define __SECCOMP_ARCH_LE_NAME		"le"
+#else
+#define __SECCOMP_ARCH_LE		0
+#define __SECCOMP_ARCH_LE_NAME
+#endif
+
+#ifdef CONFIG_PPC64
+# define SECCOMP_ARCH_NATIVE		(AUDIT_ARCH_PPC64 | __SECCOMP_ARCH_LE)
+# define SECCOMP_ARCH_NATIVE_NR		NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME	"ppc64" __SECCOMP_ARCH_LE_NAME
+# ifdef CONFIG_COMPAT
+#  define SECCOMP_ARCH_COMPAT		(AUDIT_ARCH_PPC | __SECCOMP_ARCH_LE)
+#  define SECCOMP_ARCH_COMPAT_NR	NR_syscalls
+#  define SECCOMP_ARCH_COMPAT_NAME	"ppc" __SECCOMP_ARCH_LE_NAME
+# endif
+#else /* !CONFIG_PPC64 */
+# define SECCOMP_ARCH_NATIVE		(AUDIT_ARCH_PPC | __SECCOMP_ARCH_LE)
+# define SECCOMP_ARCH_NATIVE_NR		NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME	"ppc" __SECCOMP_ARCH_LE_NAME
+#endif
+
 #endif	/* _ASM_POWERPC_SECCOMP_H */
-- 
2.29.2


^ permalink raw reply related

* [PATCH seccomp v2 5/8] s390: Enable seccomp architecture tracking
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
  To: containers
  Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
	linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
	Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
	Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
	linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
	Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
In-Reply-To: <cover.1605101222.git.yifeifz2@illinois.edu>

From: YiFei Zhu <yifeifz2@illinois.edu>

To enable seccomp constant action bitmaps, we need to have a static
mapping to the audit architecture and system call table size. Add these
for s390.

Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu>
---
 arch/s390/include/asm/seccomp.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h
index 795bbe0d7ca6..71d46f0ba97b 100644
--- a/arch/s390/include/asm/seccomp.h
+++ b/arch/s390/include/asm/seccomp.h
@@ -16,4 +16,13 @@
 
 #include <asm-generic/seccomp.h>
 
+#define SECCOMP_ARCH_NATIVE		AUDIT_ARCH_S390X
+#define SECCOMP_ARCH_NATIVE_NR		NR_syscalls
+#define SECCOMP_ARCH_NATIVE_NAME	"s390x"
+#ifdef CONFIG_COMPAT
+# define SECCOMP_ARCH_COMPAT		AUDIT_ARCH_S390
+# define SECCOMP_ARCH_COMPAT_NR		NR_syscalls
+# define SECCOMP_ARCH_COMPAT_NAME	"s390"
+#endif
+
 #endif	/* _ASM_S390_SECCOMP_H */
-- 
2.29.2


^ permalink raw reply related

* [PATCH seccomp v2 4/8] riscv: Enable seccomp architecture tracking
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
  To: containers
  Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
	linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
	Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
	Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
	linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
	Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
In-Reply-To: <cover.1605101222.git.yifeifz2@illinois.edu>

From: YiFei Zhu <yifeifz2@illinois.edu>

To enable seccomp constant action bitmaps, we need to have a static
mapping to the audit architecture and system call table size. Add these
for riscv.

Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu>
---
 arch/riscv/include/asm/seccomp.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/riscv/include/asm/seccomp.h b/arch/riscv/include/asm/seccomp.h
index bf7744ee3b3d..c7ee6a3507be 100644
--- a/arch/riscv/include/asm/seccomp.h
+++ b/arch/riscv/include/asm/seccomp.h
@@ -7,4 +7,14 @@
 
 #include <asm-generic/seccomp.h>
 
+#ifdef CONFIG_64BIT
+# define SECCOMP_ARCH_NATIVE		AUDIT_ARCH_RISCV64
+# define SECCOMP_ARCH_NATIVE_NR		NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME	"riscv64"
+#else /* !CONFIG_64BIT */
+# define SECCOMP_ARCH_NATIVE		AUDIT_ARCH_RISCV32
+# define SECCOMP_ARCH_NATIVE_NR		NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME	"riscv32"
+#endif
+
 #endif /* _ASM_SECCOMP_H */
-- 
2.29.2


^ permalink raw reply related

* [PATCH seccomp v2 6/8] sh: Enable seccomp architecture tracking
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
  To: containers
  Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
	linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
	Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
	Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
	linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
	Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
In-Reply-To: <cover.1605101222.git.yifeifz2@illinois.edu>

From: YiFei Zhu <yifeifz2@illinois.edu>

To enable seccomp constant action bitmaps, we need to have a static
mapping to the audit architecture and system call table size. Add these
for sh.

Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu>
---
 arch/sh/include/asm/seccomp.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/sh/include/asm/seccomp.h b/arch/sh/include/asm/seccomp.h
index 54111e4d32b8..d4578395fd66 100644
--- a/arch/sh/include/asm/seccomp.h
+++ b/arch/sh/include/asm/seccomp.h
@@ -8,4 +8,14 @@
 #define __NR_seccomp_exit __NR_exit
 #define __NR_seccomp_sigreturn __NR_rt_sigreturn
 
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define __SECCOMP_ARCH_LE		__AUDIT_ARCH_LE
+#else
+#define __SECCOMP_ARCH_LE		0
+#endif
+
+#define SECCOMP_ARCH_NATIVE		(AUDIT_ARCH_SH | __SECCOMP_ARCH_LE)
+#define SECCOMP_ARCH_NATIVE_NR		NR_syscalls
+#define SECCOMP_ARCH_NATIVE_NAME	"sh"
+
 #endif /* __ASM_SECCOMP_H */
-- 
2.29.2


^ permalink raw reply related

* [PATCH seccomp v2 7/8] xtensa: Enable seccomp architecture tracking
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
  To: containers
  Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
	linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
	Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
	Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
	linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
	Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
In-Reply-To: <cover.1605101222.git.yifeifz2@illinois.edu>

From: YiFei Zhu <yifeifz2@illinois.edu>

To enable seccomp constant action bitmaps, we need to have a static
mapping to the audit architecture and system call table size. Add these
for xtensa.

Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu>
---
 arch/xtensa/include/asm/Kbuild    |  1 -
 arch/xtensa/include/asm/seccomp.h | 11 +++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 arch/xtensa/include/asm/seccomp.h

diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index c59c42a1221a..9718e9593564 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -7,5 +7,4 @@ generic-y += mcs_spinlock.h
 generic-y += param.h
 generic-y += qrwlock.h
 generic-y += qspinlock.h
-generic-y += seccomp.h
 generic-y += user.h
diff --git a/arch/xtensa/include/asm/seccomp.h b/arch/xtensa/include/asm/seccomp.h
new file mode 100644
index 000000000000..f1cb6b0a9e1f
--- /dev/null
+++ b/arch/xtensa/include/asm/seccomp.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
+
+#include <asm-generic/seccomp.h>
+
+#define SECCOMP_ARCH_NATIVE		AUDIT_ARCH_XTENSA
+#define SECCOMP_ARCH_NATIVE_NR		NR_syscalls
+#define SECCOMP_ARCH_NATIVE_NAME	"xtensa"
+
+#endif /* _ASM_SECCOMP_H */
-- 
2.29.2


^ permalink raw reply related

* [PATCH seccomp v2 8/8] seccomp/cache: Report cache data through /proc/pid/seccomp_cache
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
  To: containers
  Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
	linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
	Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
	Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
	linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
	Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
In-Reply-To: <cover.1605101222.git.yifeifz2@illinois.edu>

From: YiFei Zhu <yifeifz2@illinois.edu>

Currently the kernel does not provide an infrastructure to translate
architecture numbers to a human-readable name. Translating syscall
numbers to syscall names is possible through FTRACE_SYSCALL
infrastructure but it does not provide support for compat syscalls.

This will create a file for each PID as /proc/pid/seccomp_cache.
The file will be empty when no seccomp filters are loaded, or be
in the format of:
<arch name> <decimal syscall number> <ALLOW | FILTER>
where ALLOW means the cache is guaranteed to allow the syscall,
and filter means the cache will pass the syscall to the BPF filter.

For the docker default profile on x86_64 it looks like:
x86_64 0 ALLOW
x86_64 1 ALLOW
x86_64 2 ALLOW
x86_64 3 ALLOW
[...]
x86_64 132 ALLOW
x86_64 133 ALLOW
x86_64 134 FILTER
x86_64 135 FILTER
x86_64 136 FILTER
x86_64 137 ALLOW
x86_64 138 ALLOW
x86_64 139 FILTER
x86_64 140 ALLOW
x86_64 141 ALLOW
[...]

This file is guarded by CONFIG_SECCOMP_CACHE_DEBUG with a default
of N because I think certain users of seccomp might not want the
application to know which syscalls are definitely usable. For
the same reason, it is also guarded by CAP_SYS_ADMIN.

Suggested-by: Jann Horn <jannh@google.com>
Link: https://lore.kernel.org/lkml/CAG48ez3Ofqp4crXGksLmZY6=fGrF_tWyUCg7PBkAetvbbOPeOA@mail.gmail.com/
Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu>
---
 arch/Kconfig            | 15 +++++++++++
 fs/proc/base.c          |  6 +++++
 include/linux/seccomp.h |  7 +++++
 kernel/seccomp.c        | 59 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 87 insertions(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index 56b6ccc0e32d..6e2eb7171da0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -514,6 +514,21 @@ config SECCOMP_FILTER
 
 	  See Documentation/userspace-api/seccomp_filter.rst for details.
 
+config SECCOMP_CACHE_DEBUG
+	bool "Show seccomp filter cache status in /proc/pid/seccomp_cache"
+	depends on SECCOMP
+	depends on SECCOMP_FILTER && !HAVE_SPARSE_SYSCALL_NR
+	depends on PROC_FS
+	help
+	  This enables the /proc/pid/seccomp_cache interface to monitor
+	  seccomp cache data. The file format is subject to change. Reading
+	  the file requires CAP_SYS_ADMIN.
+
+	  This option is for debugging only. Enabling presents the risk that
+	  an adversary may be able to infer the seccomp filter logic.
+
+	  If unsure, say N.
+
 config HAVE_ARCH_STACKLEAK
 	bool
 	help
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 0f707003dda5..d652f9dbaecc 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3261,6 +3261,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_PROC_PID_ARCH_STATUS
 	ONE("arch_status", S_IRUGO, proc_pid_arch_status),
 #endif
+#ifdef CONFIG_SECCOMP_CACHE_DEBUG
+	ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
+#endif
 };
 
 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3590,6 +3593,9 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_PROC_PID_ARCH_STATUS
 	ONE("arch_status", S_IRUGO, proc_pid_arch_status),
 #endif
+#ifdef CONFIG_SECCOMP_CACHE_DEBUG
+	ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
+#endif
 };
 
 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 02aef2844c38..76963ec4641a 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -121,4 +121,11 @@ static inline long seccomp_get_metadata(struct task_struct *task,
 	return -EINVAL;
 }
 #endif /* CONFIG_SECCOMP_FILTER && CONFIG_CHECKPOINT_RESTORE */
+
+#ifdef CONFIG_SECCOMP_CACHE_DEBUG
+struct seq_file;
+
+int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
+			   struct pid *pid, struct task_struct *task);
+#endif
 #endif /* _LINUX_SECCOMP_H */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d8cf468dbe1e..76f524e320b1 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -553,6 +553,9 @@ void seccomp_filter_release(struct task_struct *tsk)
 {
 	struct seccomp_filter *orig = tsk->seccomp.filter;
 
+	/* We are effectively holding the siglock by not having any sighand. */
+	WARN_ON(tsk->sighand != NULL);
+
 	/* Detach task from its filter tree. */
 	tsk->seccomp.filter = NULL;
 	__seccomp_filter_release(orig);
@@ -2335,3 +2338,59 @@ static int __init seccomp_sysctl_init(void)
 device_initcall(seccomp_sysctl_init)
 
 #endif /* CONFIG_SYSCTL */
+
+#ifdef CONFIG_SECCOMP_CACHE_DEBUG
+/* Currently CONFIG_SECCOMP_CACHE_DEBUG implies SECCOMP_ARCH_NATIVE */
+static void proc_pid_seccomp_cache_arch(struct seq_file *m, const char *name,
+					const void *bitmap, size_t bitmap_size)
+{
+	int nr;
+
+	for (nr = 0; nr < bitmap_size; nr++) {
+		bool cached = test_bit(nr, bitmap);
+		char *status = cached ? "ALLOW" : "FILTER";
+
+		seq_printf(m, "%s %d %s\n", name, nr, status);
+	}
+}
+
+int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
+			   struct pid *pid, struct task_struct *task)
+{
+	struct seccomp_filter *f;
+	unsigned long flags;
+
+	/*
+	 * We don't want some sandboxed process to know what their seccomp
+	 * filters consist of.
+	 */
+	if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (!lock_task_sighand(task, &flags))
+		return -ESRCH;
+
+	f = READ_ONCE(task->seccomp.filter);
+	if (!f) {
+		unlock_task_sighand(task, &flags);
+		return 0;
+	}
+
+	/* prevent filter from being freed while we are printing it */
+	__get_seccomp_filter(f);
+	unlock_task_sighand(task, &flags);
+
+	proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_NATIVE_NAME,
+				    f->cache.allow_native,
+				    SECCOMP_ARCH_NATIVE_NR);
+
+#ifdef SECCOMP_ARCH_COMPAT
+	proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_COMPAT_NAME,
+				    f->cache.allow_compat,
+				    SECCOMP_ARCH_COMPAT_NR);
+#endif /* SECCOMP_ARCH_COMPAT */
+
+	__put_seccomp_filter(f);
+	return 0;
+}
+#endif /* CONFIG_SECCOMP_CACHE_DEBUG */
-- 
2.29.2


^ permalink raw reply related

* Re: [PATCH 1/3] asm-generic/atomic64: Add support for ARCH_ATOMIC
From: Christophe Leroy @ 2020-11-11 13:39 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev
  Cc: Christophe Leroy, linux-arch, Arnd Bergmann, Peter Zijlstra,
	Boqun Feng, linux-kernel, Alexey Kardashevskiy, Will Deacon
In-Reply-To: <20201111110723.3148665-2-npiggin@gmail.com>

Hello,

Le 11/11/2020 à 12:07, Nicholas Piggin a écrit :
> This passes atomic64 selftest on ppc32 on qemu (uniprocessor only)
> both before and after powerpc is converted to use ARCH_ATOMIC.

Can you explain what this change does and why it is needed ?

Christophe

> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>   include/asm-generic/atomic64.h | 70 +++++++++++++++++++++++++++-------
>   lib/atomic64.c                 | 36 ++++++++---------
>   2 files changed, 75 insertions(+), 31 deletions(-)
> 
> diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
> index 370f01d4450f..2b1ecb591bb9 100644
> --- a/include/asm-generic/atomic64.h
> +++ b/include/asm-generic/atomic64.h
> @@ -15,19 +15,17 @@ typedef struct {
>   
>   #define ATOMIC64_INIT(i)	{ (i) }
>   
> -extern s64 atomic64_read(const atomic64_t *v);
> -extern void atomic64_set(atomic64_t *v, s64 i);
> -
> -#define atomic64_set_release(v, i)	atomic64_set((v), (i))
> +extern s64 __atomic64_read(const atomic64_t *v);
> +extern void __atomic64_set(atomic64_t *v, s64 i);
>   
>   #define ATOMIC64_OP(op)							\
> -extern void	 atomic64_##op(s64 a, atomic64_t *v);
> +extern void	 __atomic64_##op(s64 a, atomic64_t *v);
>   
>   #define ATOMIC64_OP_RETURN(op)						\
> -extern s64 atomic64_##op##_return(s64 a, atomic64_t *v);
> +extern s64 __atomic64_##op##_return(s64 a, atomic64_t *v);
>   
>   #define ATOMIC64_FETCH_OP(op)						\
> -extern s64 atomic64_fetch_##op(s64 a, atomic64_t *v);
> +extern s64 __atomic64_fetch_##op(s64 a, atomic64_t *v);
>   
>   #define ATOMIC64_OPS(op)	ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
>   
> @@ -46,11 +44,57 @@ ATOMIC64_OPS(xor)
>   #undef ATOMIC64_OP_RETURN
>   #undef ATOMIC64_OP
>   
> -extern s64 atomic64_dec_if_positive(atomic64_t *v);
> -#define atomic64_dec_if_positive atomic64_dec_if_positive
> -extern s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n);
> -extern s64 atomic64_xchg(atomic64_t *v, s64 new);
> -extern s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u);
> -#define atomic64_fetch_add_unless atomic64_fetch_add_unless
> +extern s64 __atomic64_dec_if_positive(atomic64_t *v);
> +extern s64 __atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n);
> +extern s64 __atomic64_xchg(atomic64_t *v, s64 new);
> +extern s64 __atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u);
> +
> +#ifdef ARCH_ATOMIC
> +#define arch_atomic64_read __atomic64_read
> +#define arch_atomic64_set __atomic64_set
> +#define arch_atomic64_add __atomic64_add
> +#define arch_atomic64_add_return __atomic64_add_return
> +#define arch_atomic64_fetch_add __atomic64_fetch_add
> +#define arch_atomic64_sub __atomic64_sub
> +#define arch_atomic64_sub_return __atomic64_sub_return
> +#define arch_atomic64_fetch_sub __atomic64_fetch_sub
> +#define arch_atomic64_and __atomic64_and
> +#define arch_atomic64_and_return __atomic64_and_return
> +#define arch_atomic64_fetch_and __atomic64_fetch_and
> +#define arch_atomic64_or __atomic64_or
> +#define arch_atomic64_or_return __atomic64_or_return
> +#define arch_atomic64_fetch_or __atomic64_fetch_or
> +#define arch_atomic64_xor __atomic64_xor
> +#define arch_atomic64_xor_return __atomic64_xor_return
> +#define arch_atomic64_fetch_xor __atomic64_fetch_xor
> +#define arch_atomic64_xchg __atomic64_xchg
> +#define arch_atomic64_cmpxchg __atomic64_cmpxchg
> +#define arch_atomic64_set_release(v, i)	__atomic64_set((v), (i))
> +#define arch_atomic64_dec_if_positive __atomic64_dec_if_positive
> +#define arch_atomic64_fetch_add_unless __atomic64_fetch_add_unless
> +#else
> +#define atomic64_read __atomic64_read
> +#define atomic64_set __atomic64_set
> +#define atomic64_add __atomic64_add
> +#define atomic64_add_return __atomic64_add_return
> +#define atomic64_fetch_add __atomic64_fetch_add
> +#define atomic64_sub __atomic64_sub
> +#define atomic64_sub_return __atomic64_sub_return
> +#define atomic64_fetch_sub __atomic64_fetch_sub
> +#define atomic64_and __atomic64_and
> +#define atomic64_and_return __atomic64_and_return
> +#define atomic64_fetch_and __atomic64_fetch_and
> +#define atomic64_or __atomic64_or
> +#define atomic64_or_return __atomic64_or_return
> +#define atomic64_fetch_or __atomic64_fetch_or
> +#define atomic64_xor __atomic64_xor
> +#define atomic64_xor_return __atomic64_xor_return
> +#define atomic64_fetch_xor __atomic64_fetch_xor
> +#define atomic64_xchg __atomic64_xchg
> +#define atomic64_cmpxchg __atomic64_cmpxchg
> +#define atomic64_set_release(v, i)	__atomic64_set((v), (i))
> +#define atomic64_dec_if_positive __atomic64_dec_if_positive
> +#define atomic64_fetch_add_unless __atomic64_fetch_add_unless
> +#endif
>   
>   #endif  /*  _ASM_GENERIC_ATOMIC64_H  */
> diff --git a/lib/atomic64.c b/lib/atomic64.c
> index e98c85a99787..05aba5e3268f 100644
> --- a/lib/atomic64.c
> +++ b/lib/atomic64.c
> @@ -42,7 +42,7 @@ static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
>   	return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
>   }
>   
> -s64 atomic64_read(const atomic64_t *v)
> +s64 __atomic64_read(const atomic64_t *v)
>   {
>   	unsigned long flags;
>   	raw_spinlock_t *lock = lock_addr(v);
> @@ -53,9 +53,9 @@ s64 atomic64_read(const atomic64_t *v)
>   	raw_spin_unlock_irqrestore(lock, flags);
>   	return val;
>   }
> -EXPORT_SYMBOL(atomic64_read);
> +EXPORT_SYMBOL(__atomic64_read);
>   
> -void atomic64_set(atomic64_t *v, s64 i)
> +void __atomic64_set(atomic64_t *v, s64 i)
>   {
>   	unsigned long flags;
>   	raw_spinlock_t *lock = lock_addr(v);
> @@ -64,10 +64,10 @@ void atomic64_set(atomic64_t *v, s64 i)
>   	v->counter = i;
>   	raw_spin_unlock_irqrestore(lock, flags);
>   }
> -EXPORT_SYMBOL(atomic64_set);
> +EXPORT_SYMBOL(__atomic64_set);
>   
>   #define ATOMIC64_OP(op, c_op)						\
> -void atomic64_##op(s64 a, atomic64_t *v)				\
> +void __atomic64_##op(s64 a, atomic64_t *v)				\
>   {									\
>   	unsigned long flags;						\
>   	raw_spinlock_t *lock = lock_addr(v);				\
> @@ -76,10 +76,10 @@ void atomic64_##op(s64 a, atomic64_t *v)				\
>   	v->counter c_op a;						\
>   	raw_spin_unlock_irqrestore(lock, flags);			\
>   }									\
> -EXPORT_SYMBOL(atomic64_##op);
> +EXPORT_SYMBOL(__atomic64_##op);
>   
>   #define ATOMIC64_OP_RETURN(op, c_op)					\
> -s64 atomic64_##op##_return(s64 a, atomic64_t *v)			\
> +s64 __atomic64_##op##_return(s64 a, atomic64_t *v)			\
>   {									\
>   	unsigned long flags;						\
>   	raw_spinlock_t *lock = lock_addr(v);				\
> @@ -90,10 +90,10 @@ s64 atomic64_##op##_return(s64 a, atomic64_t *v)			\
>   	raw_spin_unlock_irqrestore(lock, flags);			\
>   	return val;							\
>   }									\
> -EXPORT_SYMBOL(atomic64_##op##_return);
> +EXPORT_SYMBOL(__atomic64_##op##_return);
>   
>   #define ATOMIC64_FETCH_OP(op, c_op)					\
> -s64 atomic64_fetch_##op(s64 a, atomic64_t *v)				\
> +s64 __atomic64_fetch_##op(s64 a, atomic64_t *v)				\
>   {									\
>   	unsigned long flags;						\
>   	raw_spinlock_t *lock = lock_addr(v);				\
> @@ -105,7 +105,7 @@ s64 atomic64_fetch_##op(s64 a, atomic64_t *v)				\
>   	raw_spin_unlock_irqrestore(lock, flags);			\
>   	return val;							\
>   }									\
> -EXPORT_SYMBOL(atomic64_fetch_##op);
> +EXPORT_SYMBOL(__atomic64_fetch_##op);
>   
>   #define ATOMIC64_OPS(op, c_op)						\
>   	ATOMIC64_OP(op, c_op)						\
> @@ -130,7 +130,7 @@ ATOMIC64_OPS(xor, ^=)
>   #undef ATOMIC64_OP_RETURN
>   #undef ATOMIC64_OP
>   
> -s64 atomic64_dec_if_positive(atomic64_t *v)
> +s64 __atomic64_dec_if_positive(atomic64_t *v)
>   {
>   	unsigned long flags;
>   	raw_spinlock_t *lock = lock_addr(v);
> @@ -143,9 +143,9 @@ s64 atomic64_dec_if_positive(atomic64_t *v)
>   	raw_spin_unlock_irqrestore(lock, flags);
>   	return val;
>   }
> -EXPORT_SYMBOL(atomic64_dec_if_positive);
> +EXPORT_SYMBOL(__atomic64_dec_if_positive);
>   
> -s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
> +s64 __atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
>   {
>   	unsigned long flags;
>   	raw_spinlock_t *lock = lock_addr(v);
> @@ -158,9 +158,9 @@ s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
>   	raw_spin_unlock_irqrestore(lock, flags);
>   	return val;
>   }
> -EXPORT_SYMBOL(atomic64_cmpxchg);
> +EXPORT_SYMBOL(__atomic64_cmpxchg);
>   
> -s64 atomic64_xchg(atomic64_t *v, s64 new)
> +s64 __atomic64_xchg(atomic64_t *v, s64 new)
>   {
>   	unsigned long flags;
>   	raw_spinlock_t *lock = lock_addr(v);
> @@ -172,9 +172,9 @@ s64 atomic64_xchg(atomic64_t *v, s64 new)
>   	raw_spin_unlock_irqrestore(lock, flags);
>   	return val;
>   }
> -EXPORT_SYMBOL(atomic64_xchg);
> +EXPORT_SYMBOL(__atomic64_xchg);
>   
> -s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
> +s64 __atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
>   {
>   	unsigned long flags;
>   	raw_spinlock_t *lock = lock_addr(v);
> @@ -188,4 +188,4 @@ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
>   
>   	return val;
>   }
> -EXPORT_SYMBOL(atomic64_fetch_add_unless);
> +EXPORT_SYMBOL(__atomic64_fetch_add_unless);
> 

^ permalink raw reply

* Re: [PATCH 1/3] asm-generic/atomic64: Add support for ARCH_ATOMIC
From: Peter Zijlstra @ 2020-11-11 13:44 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Christophe Leroy, linux-arch, Arnd Bergmann, Alexey Kardashevskiy,
	linuxppc-dev, Boqun Feng, linux-kernel, Nicholas Piggin,
	Will Deacon
In-Reply-To: <3086114c-8af6-3863-0cbf-5d3956fcda95@csgroup.eu>

On Wed, Nov 11, 2020 at 02:39:01PM +0100, Christophe Leroy wrote:
> Hello,
> 
> Le 11/11/2020 à 12:07, Nicholas Piggin a écrit :
> > This passes atomic64 selftest on ppc32 on qemu (uniprocessor only)
> > both before and after powerpc is converted to use ARCH_ATOMIC.
> 
> Can you explain what this change does and why it is needed ?

That certainly should've been in the Changelog. This enables atomic
instrumentation, see asm-generic/atomic-instrumented.h. IOW, it makes
atomic ops visible to K*SAN.

^ permalink raw reply

* [PATCH v2 0/8] powernv/memtrace: don't abuse memory hot(un)plug infrastructure for memory allocations
From: David Hildenbrand @ 2020-11-11 14:53 UTC (permalink / raw)
  To: linux-kernel
  Cc: Michal Hocko, Wei Yang, David Hildenbrand, Nicholas Piggin,
	Michal Hocko, linux-mm, Paul Mackerras, Aneesh Kumar K.V,
	Andrew Morton, linuxppc-dev, Rashmica Gupta, Mike Rapoport,
	Oscar Salvador

Based on latest linux/master

powernv/memtrace is the only in-kernel user that rips out random memory
it never added (doesn't own) in order to allocate memory without a
linear mapping. Let's stop abusing memory hot(un)plug infrastructure for
that - use alloc_contig_pages() for allocating memory and remove the
linear mapping manually.

The original idea was discussed in:
 https://lkml.kernel.org/r/48340e96-7e6b-736f-9e23-d3111b915b6e@redhat.com

I only tested via QEMU TCG with a single NUMA node- see patch #8 for more
details.

Error handling and cleanup handling in memtrace code is a mess - that
should definitely get cleaned up sooner or later. Once we have __GFP_ZERO
support for alloc_contig_pages(), we can drop manual clearing. I added
a TODO for now, so this series can go via the powerpc tree - the __GFP_ZERO
change is then better suited via the mm tree, along with support for
__GFP_ZERO.

v1 -> v2:
- Tweaks to patch descriptions
- "powernv/memtrace: don't leak kernel memory to user space"
-- Added. Reported by Michael.
- "powernv/memtrace: fix crashing the kernel when enabling concurrently"
-- Added, discovered while testing.
- "powerpc/mm: protect linear mapping modifications by a mutex"
-- Added. Although we currently won't have concurrency, this is cleaner and
   future-proof.
- "powerepc/book3s64/hash: drop WARN_ON in hash__remove_section_mapping"
-- Added. Suggested by Oscar
- "powernv/memtrace: don't abuse memory hot(un)plug infrastructure for
   memory allocations"
-- Reshuffle the code to make review easier.
-- Add a TODO regarding __GFP_ZERO. Adapt to changed page clearing code.
-- Use GFP_KERNEL | __GFP_THISNODE | __GFP_NOWARN for allocations.


David Hildenbrand (8):
  powernv/memtrace: don't leak kernel memory to user space
  powernv/memtrace: fix crashing the kernel when enabling concurrently
  powerpc/mm: factor out creating/removing linear mapping
  powerpc/mm: protect linear mapping modifications by a mutex
  powerpc/mm: print warning in arch_remove_linear_mapping()
  powerepc/book3s64/hash: drop WARN_ON in hash__remove_section_mapping
  powerpc/mm: remove linear mapping if __add_pages() fails in
    arch_add_memory()
  powernv/memtrace: don't abuse memory hot(un)plug infrastructure for
    memory allocations

 arch/powerpc/mm/book3s64/hash_utils.c     |   1 -
 arch/powerpc/mm/mem.c                     |  53 +++++--
 arch/powerpc/platforms/powernv/Kconfig    |   8 +-
 arch/powerpc/platforms/powernv/memtrace.c | 175 ++++++++++------------
 include/linux/memory_hotplug.h            |   3 +
 5 files changed, 125 insertions(+), 115 deletions(-)

-- 
2.26.2


^ permalink raw reply

* [PATCH v2 1/8] powernv/memtrace: don't leak kernel memory to user space
From: David Hildenbrand @ 2020-11-11 14:53 UTC (permalink / raw)
  To: linux-kernel
  Cc: Michal Hocko, Wei Yang, David Hildenbrand, stable, linux-mm,
	Paul Mackerras, Rashmica Gupta, linuxppc-dev, Andrew Morton,
	Mike Rapoport, Oscar Salvador
In-Reply-To: <20201111145322.15793-1-david@redhat.com>

We currently leak kernel memory to user space, because memory offlining
doesn't do any implicit clearing of memory and we are missing explicit
clearing of memory.

Let's keep it simple and clear pages before removing the linear mapping.

Reproduced in QEMU/TCG with 10 GiB of main memory:
  [root@localhost ~]# dd obs=9G if=/dev/urandom of=/dev/null
  [... wait until "free -m" used counter no longer changes and cancel]
  19665802+0 records in
  1+0 records out
  9663676416 bytes (9.7 GB, 9.0 GiB) copied, 135.548 s, 71.3 MB/s
  [root@localhost ~]# cat /sys/devices/system/memory/block_size_bytes
  40000000
  [root@localhost ~]# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable
  [  402.978663][ T1086] page:000000001bc4bc74 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x24900
  [  402.980063][ T1086] flags: 0x7ffff000001000(reserved)
  [  402.980415][ T1086] raw: 007ffff000001000 c00c000000924008 c00c000000924008 0000000000000000
  [  402.980627][ T1086] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000
  [  402.980845][ T1086] page dumped because: unmovable page
  [  402.989608][ T1086] Offlined Pages 16384
  [  403.324155][ T1086] memtrace: Allocated trace memory on node 0 at 0x0000000200000000

Before this patch:
  [root@localhost ~]# hexdump -C /sys/kernel/debug/powerpc/memtrace/00000000/trace  | head
  00000000  c8 25 72 51 4d 26 36 c5  5c c2 56 15 d5 1a cd 10  |.%rQM&6.\.V.....|
  00000010  19 b9 50 b2 cb e3 60 b8  ec 0a f3 ec 4b 3c 39 f0  |..P...`.....K<9.|$
  00000020  4e 5a 4c cf bd 26 19 ff  37 79 13 67 24 b7 b8 57  |NZL..&..7y.g$..W|$
  00000030  98 3e f5 be 6f 14 6a bd  a4 52 bc 6e e9 e0 c1 5d  |.>..o.j..R.n...]|$
  00000040  76 b3 ae b5 88 d7 da e3  64 23 85 2c 10 88 07 b6  |v.......d#.,....|$
  00000050  9a d8 91 de f7 50 27 69  2e 64 9c 6f d3 19 45 79  |.....P'i.d.o..Ey|$
  00000060  6a 6f 8a 61 71 19 1f c7  f1 df 28 26 ca 0f 84 55  |jo.aq.....(&...U|$
  00000070  01 3f be e4 e2 e1 da ff  7b 8c 8e 32 37 b4 24 53  |.?......{..27.$S|$
  00000080  1b 70 30 45 56 e6 8c c4  0e b5 4c fb 9f dd 88 06  |.p0EV.....L.....|$
  00000090  ef c4 18 79 f1 60 b1 5c  79 59 4d f4 36 d7 4a 5c  |...y.`.\yYM.6.J\|$

After this patch:
  [root@localhost ~]# hexdump -C /sys/kernel/debug/powerpc/memtrace/00000000/trace  | head
  00000000  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
  *
  40000000

Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Fixes: 9d5171a8f248 ("powerpc/powernv: Enable removal of memory for in memory tracing")
Cc: stable@vger.kernel.org # v4.14+
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 arch/powerpc/platforms/powernv/memtrace.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 6828108486f8..eea1f94482ff 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -67,6 +67,23 @@ static int change_memblock_state(struct memory_block *mem, void *arg)
 	return 0;
 }
 
+static void memtrace_clear_range(unsigned long start_pfn,
+				 unsigned long nr_pages)
+{
+	unsigned long pfn;
+
+	/*
+	 * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM
+	 * does not apply, avoid passing around "struct page" and use
+	 * clear_page() instead directly.
+	 */
+	for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
+		if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
+			cond_resched();
+		clear_page(__va(PFN_PHYS(pfn)));
+	}
+}
+
 /* called with device_hotplug_lock held */
 static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
 {
@@ -111,6 +128,11 @@ static u64 memtrace_alloc_node(u32 nid, u64 size)
 	lock_device_hotplug();
 	for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
 		if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
+			/*
+			 * Clear the range while we still have a linear
+			 * mapping.
+			 */
+			memtrace_clear_range(base_pfn, nr_pages);
 			/*
 			 * Remove memory in memory block size chunks so that
 			 * iomem resources are always split to the same size and
-- 
2.26.2


^ permalink raw reply related

* [PATCH v2 2/8] powernv/memtrace: fix crashing the kernel when enabling concurrently
From: David Hildenbrand @ 2020-11-11 14:53 UTC (permalink / raw)
  To: linux-kernel
  Cc: David Hildenbrand, stable, linux-mm, Paul Mackerras,
	Rashmica Gupta, linuxppc-dev
In-Reply-To: <20201111145322.15793-1-david@redhat.com>

It's very easy to crash the kernel right now by simply trying to enable
memtrace concurrently, hammering on the "enable" interface

loop.sh:
  #!/bin/bash

  dmesg --console-off

  while true; do
          echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable
  done

[root@localhost ~]# loop.sh &
[root@localhost ~]# loop.sh &

Resulting quickly in a kernel crash. Let's properly protect using a
mutex.

Fixes: 9d5171a8f248 ("powerpc/powernv: Enable removal of memory for in memory tracing")
Cc: stable@vger.kernel.org# v4.14+
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 arch/powerpc/platforms/powernv/memtrace.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index eea1f94482ff..0e42fe2d7b6a 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -30,6 +30,7 @@ struct memtrace_entry {
 	char name[16];
 };
 
+static DEFINE_MUTEX(memtrace_mutex);
 static u64 memtrace_size;
 
 static struct memtrace_entry *memtrace_array;
@@ -279,6 +280,7 @@ static int memtrace_online(void)
 
 static int memtrace_enable_set(void *data, u64 val)
 {
+	int rc = -EAGAIN;
 	u64 bytes;
 
 	/*
@@ -291,25 +293,31 @@ static int memtrace_enable_set(void *data, u64 val)
 		return -EINVAL;
 	}
 
+	mutex_lock(&memtrace_mutex);
+
 	/* Re-add/online previously removed/offlined memory */
 	if (memtrace_size) {
 		if (memtrace_online())
-			return -EAGAIN;
+			goto out_unlock;
 	}
 
-	if (!val)
-		return 0;
+	if (!val) {
+		rc = 0;
+		goto out_unlock;
+	}
 
 	/* Offline and remove memory */
 	if (memtrace_init_regions_runtime(val))
-		return -EINVAL;
+		goto out_unlock;
 
 	if (memtrace_init_debugfs())
-		return -EINVAL;
+		goto out_unlock;
 
 	memtrace_size = val;
-
-	return 0;
+	rc = 0;
+out_unlock:
+	mutex_unlock(&memtrace_mutex);
+	return rc;
 }
 
 static int memtrace_enable_get(void *data, u64 *val)
-- 
2.26.2


^ permalink raw reply related

* [PATCH v2 3/8] powerpc/mm: factor out creating/removing linear mapping
From: David Hildenbrand @ 2020-11-11 14:53 UTC (permalink / raw)
  To: linux-kernel
  Cc: Michal Hocko, Wei Yang, David Hildenbrand, linux-mm,
	Paul Mackerras, Rashmica Gupta, linuxppc-dev, Andrew Morton,
	Mike Rapoport, Oscar Salvador
In-Reply-To: <20201111145322.15793-1-david@redhat.com>

We want to stop abusing memory hotplug infrastructure in memtrace code
to perform allocations and remove the linear mapping. Instead we will use
alloc_contig_pages() and remove the linear mapping manually.

Let's factor out creating/removing the linear mapping into
arch_create_linear_mapping() / arch_remove_linear_mapping() - so in the
future, we might be able to have whole arch_add_memory() /
arch_remove_memory() be implemented in common code.

Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 arch/powerpc/mm/mem.c          | 41 +++++++++++++++++++++++-----------
 include/linux/memory_hotplug.h |  3 +++
 2 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 01ec2a252f09..8a86d81f8df0 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -120,34 +120,26 @@ static void flush_dcache_range_chunked(unsigned long start, unsigned long stop,
 	}
 }
 
-int __ref arch_add_memory(int nid, u64 start, u64 size,
-			  struct mhp_params *params)
+int __ref arch_create_linear_mapping(int nid, u64 start, u64 size,
+				     struct mhp_params *params)
 {
-	unsigned long start_pfn = start >> PAGE_SHIFT;
-	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int rc;
 
 	start = (unsigned long)__va(start);
 	rc = create_section_mapping(start, start + size, nid,
 				    params->pgprot);
 	if (rc) {
-		pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
+		pr_warn("Unable to create linear mapping for 0x%llx..0x%llx: %d\n",
 			start, start + size, rc);
 		return -EFAULT;
 	}
-
-	return __add_pages(nid, start_pfn, nr_pages, params);
+	return 0;
 }
 
-void __ref arch_remove_memory(int nid, u64 start, u64 size,
-			     struct vmem_altmap *altmap)
+void __ref arch_remove_linear_mapping(u64 start, u64 size)
 {
-	unsigned long start_pfn = start >> PAGE_SHIFT;
-	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int ret;
 
-	__remove_pages(start_pfn, nr_pages, altmap);
-
 	/* Remove htab bolted mappings for this section of memory */
 	start = (unsigned long)__va(start);
 	flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE);
@@ -160,6 +152,29 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
 	 */
 	vm_unmap_aliases();
 }
+
+int __ref arch_add_memory(int nid, u64 start, u64 size,
+			  struct mhp_params *params)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	int rc;
+
+	rc = arch_create_linear_mapping(nid, start, size, params);
+	if (rc)
+		return rc;
+	return __add_pages(nid, start_pfn, nr_pages, params);
+}
+
+void __ref arch_remove_memory(int nid, u64 start, u64 size,
+			      struct vmem_altmap *altmap)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+
+	__remove_pages(start_pfn, nr_pages, altmap);
+	arch_remove_linear_mapping(start, size);
+}
 #endif
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index d65c6fdc5cfc..00b9e9bd3850 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -375,6 +375,9 @@ extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
 					  unsigned long pnum);
 extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
 		unsigned long nr_pages);
+extern int arch_create_linear_mapping(int nid, u64 start, u64 size,
+				      struct mhp_params *params);
+void arch_remove_linear_mapping(u64 start, u64 size);
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 #endif /* __LINUX_MEMORY_HOTPLUG_H */
-- 
2.26.2


^ permalink raw reply related

* [PATCH v2 4/8] powerpc/mm: protect linear mapping modifications by a mutex
From: David Hildenbrand @ 2020-11-11 14:53 UTC (permalink / raw)
  To: linux-kernel
  Cc: Michal Hocko, Wei Yang, David Hildenbrand, linux-mm,
	Paul Mackerras, Rashmica Gupta, linuxppc-dev, Andrew Morton,
	Mike Rapoport, Oscar Salvador
In-Reply-To: <20201111145322.15793-1-david@redhat.com>

This code currently relies on mem_hotplug_begin()/mem_hotplug_done() -
create_section_mapping()/remove_section_mapping() implementations
cannot tollerate getting called concurrently.

Let's prepare for callers (memtrace) not holding any such locks (and
don't force them to mess with memory hotplug locks).

Other parts in these functions don't seem to rely on external locking.

Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 arch/powerpc/mm/mem.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 8a86d81f8df0..ca5c4b54c366 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -58,6 +58,7 @@
 #define CPU_FTR_NOEXECUTE	0
 #endif
 
+static DEFINE_MUTEX(linear_mapping_mutex);
 unsigned long long memory_limit;
 bool init_mem_is_free;
 
@@ -126,8 +127,10 @@ int __ref arch_create_linear_mapping(int nid, u64 start, u64 size,
 	int rc;
 
 	start = (unsigned long)__va(start);
+	mutex_lock(&linear_mapping_mutex);
 	rc = create_section_mapping(start, start + size, nid,
 				    params->pgprot);
+	mutex_unlock(&linear_mapping_mutex);
 	if (rc) {
 		pr_warn("Unable to create linear mapping for 0x%llx..0x%llx: %d\n",
 			start, start + size, rc);
@@ -144,7 +147,9 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size)
 	start = (unsigned long)__va(start);
 	flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE);
 
+	mutex_lock(&linear_mapping_mutex);
 	ret = remove_section_mapping(start, start + size);
+	mutex_unlock(&linear_mapping_mutex);
 	WARN_ON_ONCE(ret);
 
 	/* Ensure all vmalloc mappings are flushed in case they also
-- 
2.26.2


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox