* [PATCH 3/3] powerpc: rewrite atomics to use ARCH_ATOMIC
From: Nicholas Piggin @ 2020-11-11 11:07 UTC (permalink / raw)
To: linuxppc-dev
Cc: Christophe Leroy, linux-arch, Arnd Bergmann, Peter Zijlstra,
Boqun Feng, linux-kernel, Nicholas Piggin, Alexey Kardashevskiy,
Will Deacon
In-Reply-To: <20201111110723.3148665-1-npiggin@gmail.com>
All the cool kids are doing it.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/atomic.h | 681 ++++++++++-------------------
arch/powerpc/include/asm/cmpxchg.h | 62 +--
2 files changed, 248 insertions(+), 495 deletions(-)
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 8a55eb8cc97b..899aa2403ba7 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -11,185 +11,285 @@
#include <asm/cmpxchg.h>
#include <asm/barrier.h>
+#define ARCH_ATOMIC
+
+#ifndef CONFIG_64BIT
+#include <asm-generic/atomic64.h>
+#endif
+
/*
* Since *_return_relaxed and {cmp}xchg_relaxed are implemented with
* a "bne-" instruction at the end, so an isync is enough as a acquire barrier
* on the platform without lwsync.
*/
#define __atomic_acquire_fence() \
- __asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory")
+ asm volatile(PPC_ACQUIRE_BARRIER "" : : : "memory")
#define __atomic_release_fence() \
- __asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory")
+ asm volatile(PPC_RELEASE_BARRIER "" : : : "memory")
-static __inline__ int atomic_read(const atomic_t *v)
-{
- int t;
+#define __atomic_pre_full_fence smp_mb
- __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
+#define __atomic_post_full_fence smp_mb
- return t;
+#define arch_atomic_read(v) __READ_ONCE((v)->counter)
+#define arch_atomic_set(v, i) __WRITE_ONCE(((v)->counter), (i))
+#ifdef CONFIG_64BIT
+#define ATOMIC64_INIT(i) { (i) }
+#define arch_atomic64_read(v) __READ_ONCE((v)->counter)
+#define arch_atomic64_set(v, i) __WRITE_ONCE(((v)->counter), (i))
+#endif
+
+#define ATOMIC_OP(name, type, dtype, width, asm_op) \
+static inline void arch_##name(dtype a, type *v) \
+{ \
+ dtype t; \
+ \
+ asm volatile( \
+"1: l" #width "arx %0,0,%3 # " #name "\n" \
+"\t" #asm_op " %0,%2,%0 \n" \
+" st" #width "cx. %0,0,%3 \n" \
+" bne- 1b \n" \
+ : "=&r" (t), "+m" (v->counter) \
+ : "r" (a), "r" (&v->counter) \
+ : "cr0", "xer"); \
}
-static __inline__ void atomic_set(atomic_t *v, int i)
-{
- __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
+#define ATOMIC_OP_IMM(name, type, dtype, width, asm_op, imm) \
+static inline void arch_##name(type *v) \
+{ \
+ dtype t; \
+ \
+ asm volatile( \
+"1: l" #width "arx %0,0,%3 # " #name "\n" \
+"\t" #asm_op " %0,%0,%2 \n" \
+" st" #width "cx. %0,0,%3 \n" \
+" bne- 1b \n" \
+ : "=&r" (t), "+m" (v->counter) \
+ : "i" (imm), "r" (&v->counter) \
+ : "cr0", "xer"); \
}
-#define ATOMIC_OP(op, asm_op) \
-static __inline__ void atomic_##op(int a, atomic_t *v) \
+#define ATOMIC_OP_RETURN_RELAXED(name, type, dtype, width, asm_op) \
+static inline dtype arch_##name##_relaxed(dtype a, type *v) \
{ \
- int t; \
+ dtype t; \
\
- __asm__ __volatile__( \
-"1: lwarx %0,0,%3 # atomic_" #op "\n" \
- #asm_op " %0,%2,%0\n" \
-" stwcx. %0,0,%3 \n" \
-" bne- 1b\n" \
+ asm volatile( \
+"1: l" #width "arx %0,0,%3 # " #name "\n" \
+"\t" #asm_op " %0,%2,%0 \n" \
+" st" #width "cx. %0,0,%3 \n" \
+" bne- 1b \n" \
: "=&r" (t), "+m" (v->counter) \
: "r" (a), "r" (&v->counter) \
- : "cc"); \
-} \
+ : "cr0", "xer"); \
+ \
+ return t; \
+}
-#define ATOMIC_OP_RETURN_RELAXED(op, asm_op) \
-static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \
+#define ATOMIC_OP_IMM_RETURN_RELAXED(name, type, dtype, width, asm_op, imm) \
+static inline dtype arch_##name##_relaxed(type *v) \
{ \
- int t; \
+ dtype t; \
\
- __asm__ __volatile__( \
-"1: lwarx %0,0,%3 # atomic_" #op "_return_relaxed\n" \
- #asm_op " %0,%2,%0\n" \
-" stwcx. %0,0,%3\n" \
-" bne- 1b\n" \
+ asm volatile( \
+"1: l" #width "arx %0,0,%3 # " #name "\n" \
+"\t" #asm_op " %0,%0,%2 \n" \
+" st" #width "cx. %0,0,%3 \n" \
+" bne- 1b \n" \
: "=&r" (t), "+m" (v->counter) \
- : "r" (a), "r" (&v->counter) \
- : "cc"); \
+ : "i" (imm), "r" (&v->counter) \
+ : "cr0", "xer"); \
\
return t; \
}
-#define ATOMIC_FETCH_OP_RELAXED(op, asm_op) \
-static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v) \
+#define ATOMIC_FETCH_OP_RELAXED(name, type, dtype, width, asm_op) \
+static inline dtype arch_##name##_relaxed(dtype a, type *v) \
{ \
- int res, t; \
+ dtype res, t; \
\
- __asm__ __volatile__( \
-"1: lwarx %0,0,%4 # atomic_fetch_" #op "_relaxed\n" \
- #asm_op " %1,%3,%0\n" \
-" stwcx. %1,0,%4\n" \
-" bne- 1b\n" \
+ asm volatile( \
+"1: l" #width "arx %0,0,%4 # " #name "\n" \
+"\t" #asm_op " %1,%3,%0 \n" \
+" st" #width "cx. %1,0,%4 \n" \
+" bne- 1b \n" \
: "=&r" (res), "=&r" (t), "+m" (v->counter) \
: "r" (a), "r" (&v->counter) \
- : "cc"); \
+ : "cr0", "xer"); \
\
return res; \
}
+#define ATOMIC_FETCH_OP_UNLESS_RELAXED(name, type, dtype, width, asm_op) \
+static inline int arch_##name##_relaxed(type *v, dtype a, dtype u) \
+{ \
+ dtype res, t; \
+ \
+ asm volatile( \
+"1: l" #width "arx %0,0,%5 # " #name "\n" \
+" cmp" #width " 0,%0,%3 \n" \
+" beq- 2f \n" \
+"\t" #asm_op " %1,%2,%0 \n" \
+" st" #width "cx. %1,0,%5 \n" \
+" bne- 1b \n" \
+"2: \n" \
+ : "=&r" (res), "=&r" (t), "+m" (v->counter) \
+ : "r" (a), "r" (u), "r" (&v->counter) \
+ : "cr0", "xer"); \
+ \
+ return res; \
+}
+
+#define ATOMIC_INC_NOT_ZERO_RELAXED(name, type, dtype, width) \
+static inline dtype arch_##name##_relaxed(type *v) \
+{ \
+ dtype t1, t2; \
+ \
+ asm volatile( \
+"1: l" #width "arx %0,0,%3 # " #name "\n" \
+" cmp" #width "i 0,%0,0 \n" \
+" beq- 2f \n" \
+" addic %1,%2,1 \n" \
+" st" #width "cx. %1,0,%3 \n" \
+" bne- 1b \n" \
+"2: \n" \
+ : "=&r" (t1), "=&r" (t2), "+m" (v->counter) \
+ : "r" (&v->counter) \
+ : "cr0", "xer"); \
+ \
+ return t1; \
+}
+
+#undef ATOMIC_OPS
#define ATOMIC_OPS(op, asm_op) \
- ATOMIC_OP(op, asm_op) \
- ATOMIC_OP_RETURN_RELAXED(op, asm_op) \
- ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+ATOMIC_OP(atomic_##op, atomic_t, int, w, asm_op) \
+ATOMIC_OP_RETURN_RELAXED(atomic_##op##_return, atomic_t, int, w, asm_op) \
+ATOMIC_FETCH_OP_RELAXED(atomic_fetch_##op, atomic_t, int, w, asm_op) \
+ATOMIC_FETCH_OP_UNLESS_RELAXED(atomic_fetch_##op##_unless, atomic_t, int, w, asm_op)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, asm_op) \
+ATOMIC_OP(atomic64_##op, atomic64_t, u64, d, asm_op) \
+ATOMIC_OP_RETURN_RELAXED(atomic64_##op##_return, atomic64_t, u64, d, asm_op) \
+ATOMIC_FETCH_OP_RELAXED(atomic64_fetch_##op, atomic64_t, u64, d, asm_op) \
+ATOMIC_FETCH_OP_UNLESS_RELAXED(atomic64_fetch_##op##_unless, atomic64_t, u64, d, asm_op)
ATOMIC_OPS(add, add)
+#define arch_atomic_add arch_atomic_add
+#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_add_unless_relaxed arch_atomic_fetch_add_unless_relaxed
+
ATOMIC_OPS(sub, subf)
+#define arch_atomic_sub arch_atomic_sub
+#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
+/* skip atomic_fetch_sub_unless_relaxed */
-#define atomic_add_return_relaxed atomic_add_return_relaxed
-#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+#ifdef CONFIG_64BIT
+ATOMIC64_OPS(add, add)
+#define arch_atomic64_add arch_atomic64_add
+#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
+#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_add_unless_relaxed arch_atomic64_fetch_add_unless_relaxed
-#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
-#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+ATOMIC64_OPS(sub, subf)
+#define arch_atomic64_sub arch_atomic64_sub
+#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
+#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
+/* skip atomic64_fetch_sub_unless_relaxed */
+#endif
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, asm_op) \
- ATOMIC_OP(op, asm_op) \
- ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+ATOMIC_OP(atomic_##op, atomic_t, int, w, asm_op) \
+ATOMIC_FETCH_OP_RELAXED(atomic_fetch_##op, atomic_t, int, w, asm_op)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, asm_op) \
+ATOMIC_OP(atomic64_##op, atomic64_t, u64, d, asm_op) \
+ATOMIC_FETCH_OP_RELAXED(atomic64_fetch_##op, atomic64_t, u64, d, asm_op)
ATOMIC_OPS(and, and)
+#define arch_atomic_and arch_atomic_and
+#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+
ATOMIC_OPS(or, or)
+#define arch_atomic_or arch_atomic_or
+#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
+
ATOMIC_OPS(xor, xor)
+#define arch_atomic_xor arch_atomic_xor
+#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
+
+#ifdef CONFIG_64BIT
+ATOMIC64_OPS(and, and)
+#define arch_atomic64_and arch_atomic64_and
+#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
-#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
-#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
-#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+ATOMIC64_OPS(or, or)
+#define arch_atomic64_or arch_atomic64_or
+#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
+
+ATOMIC64_OPS(xor, xor)
+#define arch_atomic64_xor arch_atomic64_xor
+#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
+#endif
#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm_op, imm) \
+ATOMIC_OP_IMM(atomic_##op, atomic_t, int, w, asm_op, imm) \
+ATOMIC_OP_IMM_RETURN_RELAXED(atomic_##op##_return, atomic_t, int, w, asm_op, imm)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, asm_op, imm) \
+ATOMIC_OP_IMM(atomic64_##op, atomic64_t, u64, d, asm_op, imm) \
+ATOMIC_OP_IMM_RETURN_RELAXED(atomic64_##op##_return, atomic64_t, u64, d, asm_op, imm)
+
+ATOMIC_OPS(inc, addic, 1)
+#define arch_atomic_inc arch_atomic_inc
+#define arch_atomic_inc_return_relaxed arch_atomic_inc_return_relaxed
+
+ATOMIC_OPS(dec, addic, -1)
+#define arch_atomic_dec arch_atomic_dec
+#define arch_atomic_dec_return_relaxed arch_atomic_dec_return_relaxed
+
+#ifdef CONFIG_64BIT
+ATOMIC64_OPS(inc, addic, 1)
+#define arch_atomic64_inc arch_atomic64_inc
+#define arch_atomic64_inc_return_relaxed arch_atomic64_inc_return_relaxed
+
+ATOMIC64_OPS(dec, addic, -1)
+#define arch_atomic64_dec arch_atomic64_dec
+#define arch_atomic64_dec_return_relaxed arch_atomic64_dec_return_relaxed
+#endif
+
+ATOMIC_INC_NOT_ZERO_RELAXED(atomic_inc_not_zero, atomic_t, int, w)
+#define arch_atomic_inc_not_zero_relaxed(v) arch_atomic_inc_not_zero_relaxed(v)
+
+#ifdef CONFIG_64BIT
+ATOMIC_INC_NOT_ZERO_RELAXED(atomic64_inc_not_zero, atomic64_t, u64, d)
+#define arch_atomic64_inc_not_zero_relaxed(v) arch_atomic64_inc_not_zero_relaxed(v)
+#endif
+
+#undef ATOMIC_INC_NOT_ZERO_RELAXED
+#undef ATOMIC_FETCH_OP_UNLESS_RELAXED
#undef ATOMIC_FETCH_OP_RELAXED
+#undef ATOMIC_OP_IMM_RETURN_RELAXED
#undef ATOMIC_OP_RETURN_RELAXED
+#undef ATOMIC_OP_IMM
#undef ATOMIC_OP
+#undef ATOMIC_OPS
+#undef ATOMIC64_OPS
-static __inline__ void atomic_inc(atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
-"1: lwarx %0,0,%2 # atomic_inc\n\
- addic %0,%0,1\n"
-" stwcx. %0,0,%2 \n\
- bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (&v->counter)
- : "cc", "xer");
-}
-#define atomic_inc atomic_inc
-
-static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
-"1: lwarx %0,0,%2 # atomic_inc_return_relaxed\n"
-" addic %0,%0,1\n"
-" stwcx. %0,0,%2\n"
-" bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (&v->counter)
- : "cc", "xer");
-
- return t;
-}
-
-static __inline__ void atomic_dec(atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
-"1: lwarx %0,0,%2 # atomic_dec\n\
- addic %0,%0,-1\n"
-" stwcx. %0,0,%2\n\
- bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (&v->counter)
- : "cc", "xer");
-}
-#define atomic_dec atomic_dec
-
-static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
-"1: lwarx %0,0,%2 # atomic_dec_return_relaxed\n"
-" addic %0,%0,-1\n"
-" stwcx. %0,0,%2\n"
-" bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (&v->counter)
- : "cc", "xer");
-
- return t;
-}
-
-#define atomic_inc_return_relaxed atomic_inc_return_relaxed
-#define atomic_dec_return_relaxed atomic_dec_return_relaxed
-
-#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_cmpxchg_relaxed(v, o, n) \
- cmpxchg_relaxed(&((v)->counter), (o), (n))
-#define atomic_cmpxchg_acquire(v, o, n) \
- cmpxchg_acquire(&((v)->counter), (o), (n))
+#define arch_atomic_cmpxchg_relaxed(v, o, n) arch_cmpxchg_relaxed(&((v)->counter), (o), (n))
+#define arch_atomic_xchg_relaxed(v, new) arch_xchg_relaxed(&((v)->counter), (new))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
+#ifdef CONFIG_64BIT
+#define arch_atomic64_cmpxchg_relaxed(v, o, n) arch_cmpxchg_relaxed(&((v)->counter), (o), (n))
+#define arch_atomic64_xchg_relaxed(v, new) arch_xchg_relaxed(&((v)->counter), (new))
+#endif
/*
* Don't want to override the generic atomic_try_cmpxchg_acquire, because
@@ -203,7 +303,7 @@ atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
int r, o = *old;
__asm__ __volatile__ (
-"1:\t" PPC_LWARX(%0,0,%2,1) " # atomic_try_cmpxchg_acquire \n"
+"1:\t" PPC_LWARX(%0,0,%2,1) " # atomic_try_cmpxchg_lock \n"
" cmpw 0,%0,%3 \n"
" bne- 2f \n"
" stwcx. %4,0,%2 \n"
@@ -219,270 +319,41 @@ atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
return likely(r == o);
}
-/**
- * atomic_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
- int t;
-
- __asm__ __volatile__ (
- PPC_ATOMIC_ENTRY_BARRIER
-"1: lwarx %0,0,%1 # atomic_fetch_add_unless\n\
- cmpw 0,%0,%3 \n\
- beq 2f \n\
- add %0,%2,%0 \n"
-" stwcx. %0,0,%1 \n\
- bne- 1b \n"
- PPC_ATOMIC_EXIT_BARRIER
-" subf %0,%2,%0 \n\
-2:"
- : "=&r" (t)
- : "r" (&v->counter), "r" (a), "r" (u)
- : "cc", "memory");
-
- return t;
-}
-#define atomic_fetch_add_unless atomic_fetch_add_unless
-
-/**
- * atomic_inc_not_zero - increment unless the number is zero
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1, so long as @v is non-zero.
- * Returns non-zero if @v was non-zero, and zero otherwise.
- */
-static __inline__ int atomic_inc_not_zero(atomic_t *v)
-{
- int t1, t2;
-
- __asm__ __volatile__ (
- PPC_ATOMIC_ENTRY_BARRIER
-"1: lwarx %0,0,%2 # atomic_inc_not_zero\n\
- cmpwi 0,%0,0\n\
- beq- 2f\n\
- addic %1,%0,1\n"
-" stwcx. %1,0,%2\n\
- bne- 1b\n"
- PPC_ATOMIC_EXIT_BARRIER
- "\n\
-2:"
- : "=&r" (t1), "=&r" (t2)
- : "r" (&v->counter)
- : "cc", "xer", "memory");
-
- return t1;
-}
-#define atomic_inc_not_zero(v) atomic_inc_not_zero((v))
-
/*
* Atomically test *v and decrement if it is greater than 0.
* The function returns the old value of *v minus 1, even if
* the atomic variable, v, was not decremented.
*/
-static __inline__ int atomic_dec_if_positive(atomic_t *v)
+static inline int atomic_dec_if_positive_relaxed(atomic_t *v)
{
int t;
- __asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: lwarx %0,0,%1 # atomic_dec_if_positive\n\
- cmpwi %0,1\n\
- addi %0,%0,-1\n\
- blt- 2f\n"
-" stwcx. %0,0,%1\n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- "\n\
-2:" : "=&b" (t)
+ asm volatile(
+"1: lwarx %0,0,%1 # atomic_dec_if_positive \n"
+" cmpwi %0,1 \n"
+" addi %0,%0,-1 \n"
+" blt- 2f \n"
+" stwcx. %0,0,%1 \n"
+" bne- 1b \n"
+"2: \n"
+ : "=&b" (t)
: "r" (&v->counter)
: "cc", "memory");
return t;
}
-#define atomic_dec_if_positive atomic_dec_if_positive
-
-#ifdef __powerpc64__
-
-#define ATOMIC64_INIT(i) { (i) }
-
-static __inline__ s64 atomic64_read(const atomic64_t *v)
-{
- s64 t;
-
- __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
-
- return t;
-}
-
-static __inline__ void atomic64_set(atomic64_t *v, s64 i)
-{
- __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
-}
-
-#define ATOMIC64_OP(op, asm_op) \
-static __inline__ void atomic64_##op(s64 a, atomic64_t *v) \
-{ \
- s64 t; \
- \
- __asm__ __volatile__( \
-"1: ldarx %0,0,%3 # atomic64_" #op "\n" \
- #asm_op " %0,%2,%0\n" \
-" stdcx. %0,0,%3 \n" \
-" bne- 1b\n" \
- : "=&r" (t), "+m" (v->counter) \
- : "r" (a), "r" (&v->counter) \
- : "cc"); \
-}
-
-#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \
-static inline s64 \
-atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
-{ \
- s64 t; \
- \
- __asm__ __volatile__( \
-"1: ldarx %0,0,%3 # atomic64_" #op "_return_relaxed\n" \
- #asm_op " %0,%2,%0\n" \
-" stdcx. %0,0,%3\n" \
-" bne- 1b\n" \
- : "=&r" (t), "+m" (v->counter) \
- : "r" (a), "r" (&v->counter) \
- : "cc"); \
- \
- return t; \
-}
-
-#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \
-static inline s64 \
-atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
-{ \
- s64 res, t; \
- \
- __asm__ __volatile__( \
-"1: ldarx %0,0,%4 # atomic64_fetch_" #op "_relaxed\n" \
- #asm_op " %1,%3,%0\n" \
-" stdcx. %1,0,%4\n" \
-" bne- 1b\n" \
- : "=&r" (res), "=&r" (t), "+m" (v->counter) \
- : "r" (a), "r" (&v->counter) \
- : "cc"); \
- \
- return res; \
-}
-
-#define ATOMIC64_OPS(op, asm_op) \
- ATOMIC64_OP(op, asm_op) \
- ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \
- ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
-
-ATOMIC64_OPS(add, add)
-ATOMIC64_OPS(sub, subf)
-
-#define atomic64_add_return_relaxed atomic64_add_return_relaxed
-#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
-
-#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
-#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
-
-#undef ATOMIC64_OPS
-#define ATOMIC64_OPS(op, asm_op) \
- ATOMIC64_OP(op, asm_op) \
- ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
-
-ATOMIC64_OPS(and, and)
-ATOMIC64_OPS(or, or)
-ATOMIC64_OPS(xor, xor)
-
-#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
-#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
-#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
-
-#undef ATOPIC64_OPS
-#undef ATOMIC64_FETCH_OP_RELAXED
-#undef ATOMIC64_OP_RETURN_RELAXED
-#undef ATOMIC64_OP
-
-static __inline__ void atomic64_inc(atomic64_t *v)
-{
- s64 t;
-
- __asm__ __volatile__(
-"1: ldarx %0,0,%2 # atomic64_inc\n\
- addic %0,%0,1\n\
- stdcx. %0,0,%2 \n\
- bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (&v->counter)
- : "cc", "xer");
-}
-#define atomic64_inc atomic64_inc
-
-static __inline__ s64 atomic64_inc_return_relaxed(atomic64_t *v)
-{
- s64 t;
-
- __asm__ __volatile__(
-"1: ldarx %0,0,%2 # atomic64_inc_return_relaxed\n"
-" addic %0,%0,1\n"
-" stdcx. %0,0,%2\n"
-" bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (&v->counter)
- : "cc", "xer");
-
- return t;
-}
-
-static __inline__ void atomic64_dec(atomic64_t *v)
-{
- s64 t;
-
- __asm__ __volatile__(
-"1: ldarx %0,0,%2 # atomic64_dec\n\
- addic %0,%0,-1\n\
- stdcx. %0,0,%2\n\
- bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (&v->counter)
- : "cc", "xer");
-}
-#define atomic64_dec atomic64_dec
-
-static __inline__ s64 atomic64_dec_return_relaxed(atomic64_t *v)
-{
- s64 t;
-
- __asm__ __volatile__(
-"1: ldarx %0,0,%2 # atomic64_dec_return_relaxed\n"
-" addic %0,%0,-1\n"
-" stdcx. %0,0,%2\n"
-" bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (&v->counter)
- : "cc", "xer");
-
- return t;
-}
-
-#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
-#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
+#define atomic_dec_if_positive_relaxed atomic_dec_if_positive_relaxed
+#ifdef CONFIG_64BIT
/*
* Atomically test *v and decrement if it is greater than 0.
* The function returns the old value of *v minus 1.
*/
-static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 atomic64_dec_if_positive_relaxed(atomic64_t *v)
{
s64 t;
- __asm__ __volatile__(
+ asm volatile(
PPC_ATOMIC_ENTRY_BARRIER
"1: ldarx %0,0,%1 # atomic64_dec_if_positive\n\
addic. %0,%0,-1\n\
@@ -497,80 +368,8 @@ static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
return t;
}
-#define atomic64_dec_if_positive atomic64_dec_if_positive
-
-#define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic64_cmpxchg_relaxed(v, o, n) \
- cmpxchg_relaxed(&((v)->counter), (o), (n))
-#define atomic64_cmpxchg_acquire(v, o, n) \
- cmpxchg_acquire(&((v)->counter), (o), (n))
-
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
-#define atomic64_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
-
-/**
- * atomic64_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
-{
- s64 t;
-
- __asm__ __volatile__ (
- PPC_ATOMIC_ENTRY_BARRIER
-"1: ldarx %0,0,%1 # atomic64_fetch_add_unless\n\
- cmpd 0,%0,%3 \n\
- beq 2f \n\
- add %0,%2,%0 \n"
-" stdcx. %0,0,%1 \n\
- bne- 1b \n"
- PPC_ATOMIC_EXIT_BARRIER
-" subf %0,%2,%0 \n\
-2:"
- : "=&r" (t)
- : "r" (&v->counter), "r" (a), "r" (u)
- : "cc", "memory");
-
- return t;
-}
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
-
-/**
- * atomic_inc64_not_zero - increment unless the number is zero
- * @v: pointer of type atomic64_t
- *
- * Atomically increments @v by 1, so long as @v is non-zero.
- * Returns non-zero if @v was non-zero, and zero otherwise.
- */
-static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
-{
- s64 t1, t2;
-
- __asm__ __volatile__ (
- PPC_ATOMIC_ENTRY_BARRIER
-"1: ldarx %0,0,%2 # atomic64_inc_not_zero\n\
- cmpdi 0,%0,0\n\
- beq- 2f\n\
- addic %1,%0,1\n\
- stdcx. %1,0,%2\n\
- bne- 1b\n"
- PPC_ATOMIC_EXIT_BARRIER
- "\n\
-2:"
- : "=&r" (t1), "=&r" (t2)
- : "r" (&v->counter)
- : "cc", "xer", "memory");
-
- return t1 != 0;
-}
-#define atomic64_inc_not_zero(v) atomic64_inc_not_zero((v))
-
-#endif /* __powerpc64__ */
+#define atomic64_dec_if_positive_relaxed atomic64_dec_if_positive_relaxed
+#endif
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_ATOMIC_H_ */
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index cf091c4c22e5..181f7e8b3281 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -192,7 +192,7 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
(unsigned long)_x_, sizeof(*(ptr))); \
})
-#define xchg_relaxed(ptr, x) \
+#define arch_xchg_relaxed(ptr, x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_relaxed((ptr), \
@@ -448,35 +448,7 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
return old;
}
-static __always_inline unsigned long
-__cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
- unsigned int size)
-{
- switch (size) {
- case 1:
- return __cmpxchg_u8_acquire(ptr, old, new);
- case 2:
- return __cmpxchg_u16_acquire(ptr, old, new);
- case 4:
- return __cmpxchg_u32_acquire(ptr, old, new);
-#ifdef CONFIG_PPC64
- case 8:
- return __cmpxchg_u64_acquire(ptr, old, new);
-#endif
- }
- BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire");
- return old;
-}
-#define cmpxchg(ptr, o, n) \
- ({ \
- __typeof__(*(ptr)) _o_ = (o); \
- __typeof__(*(ptr)) _n_ = (n); \
- (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \
- (unsigned long)_n_, sizeof(*(ptr))); \
- })
-
-
-#define cmpxchg_local(ptr, o, n) \
+#define arch_cmpxchg_local(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
@@ -484,7 +456,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
(unsigned long)_n_, sizeof(*(ptr))); \
})
-#define cmpxchg_relaxed(ptr, o, n) \
+#define arch_cmpxchg_relaxed(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
@@ -493,38 +465,20 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
sizeof(*(ptr))); \
})
-#define cmpxchg_acquire(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) _o_ = (o); \
- __typeof__(*(ptr)) _n_ = (n); \
- (__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \
- (unsigned long)_o_, (unsigned long)_n_, \
- sizeof(*(ptr))); \
-})
#ifdef CONFIG_PPC64
-#define cmpxchg64(ptr, o, n) \
- ({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg((ptr), (o), (n)); \
- })
-#define cmpxchg64_local(ptr, o, n) \
+#define arch_cmpxchg64_local(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg_local((ptr), (o), (n)); \
+ arch_cmpxchg_local((ptr), (o), (n)); \
})
-#define cmpxchg64_relaxed(ptr, o, n) \
-({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg_relaxed((ptr), (o), (n)); \
-})
-#define cmpxchg64_acquire(ptr, o, n) \
+#define arch_cmpxchg64_relaxed(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg_acquire((ptr), (o), (n)); \
+ arch_cmpxchg_relaxed((ptr), (o), (n)); \
})
#else
#include <asm-generic/cmpxchg-local.h>
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+#define arch_cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
#endif
#endif /* __KERNEL__ */
--
2.23.0
^ permalink raw reply related
* [PATCH] powerpc/64s/perf: perf interrupt does not have to get_user_pages to access user memory
From: Nicholas Piggin @ 2020-11-11 12:01 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
read_user_stack_slow that walks user address translation by hand is
only required on hash, because a hash fault can not be serviced from
"NMI" context (to avoid re-entering the hash code) so the user stack
can be mapped into Linux page tables but not accessible by the CPU.
Radix MMU mode does not have this restriction. A page fault failure
would indicate the page is not accessible via get_user_pages either,
so avoid this on radix.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/perf/callchain.h | 2 +-
arch/powerpc/perf/callchain_64.c | 3 ++-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h
index ae24d4a00da6..d6fa6e25234f 100644
--- a/arch/powerpc/perf/callchain.h
+++ b/arch/powerpc/perf/callchain.h
@@ -33,7 +33,7 @@ static inline int __read_user_stack(const void __user *ptr, void *ret,
rc = copy_from_user_nofault(ret, ptr, size);
- if (IS_ENABLED(CONFIG_PPC64) && rc)
+ if (IS_ENABLED(CONFIG_PPC64) && !radix_enabled() && rc)
return read_user_stack_slow(ptr, ret, size);
return rc;
diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c
index fed90e827f3a..0777b04a0c56 100644
--- a/arch/powerpc/perf/callchain_64.c
+++ b/arch/powerpc/perf/callchain_64.c
@@ -21,7 +21,8 @@
/*
* On 64-bit we don't want to invoke hash_page on user addresses from
* interrupt context, so if the access faults, we read the page tables
- * to find which page (if any) is mapped and access it directly.
+ * to find which page (if any) is mapped and access it directly. Radix
+ * has no need for this so it doesn't use read_user_stack_slow.
*/
int read_user_stack_slow(const void __user *ptr, void *buf, int nb)
{
--
2.23.0
^ permalink raw reply related
* Re: [PATCH v1 3/4] powerpc/mm: remove linear mapping if __add_pages() fails in arch_add_memory()
From: David Hildenbrand @ 2020-11-11 12:07 UTC (permalink / raw)
To: Oscar Salvador, Mike Rapoport
Cc: Michal Hocko, Wei Yang, linux-kernel, linux-mm, Paul Mackerras,
Rashmica Gupta, linuxppc-dev, Andrew Morton
In-Reply-To: <20201104121109.GA5126@localhost.localdomain>
On 04.11.20 13:11, Oscar Salvador wrote:
> On Wed, Nov 04, 2020 at 02:06:51PM +0200, Mike Rapoport wrote:
>> On Wed, Nov 04, 2020 at 10:50:07AM +0100, osalvador wrote:
>>> On Thu, Oct 29, 2020 at 05:27:17PM +0100, David Hildenbrand wrote:
>>>> Let's revert what we did in case seomthing goes wrong and we return an
>>>> error.
>>>
>>> Dumb question, but should not we do this for other arches as well?
>>
>> It seems arm64 and s390 already do that.
>> x86 could have its arch_add_memory() improved though :)
>
> Right, I only stared at x86 and see it did not have it.
> I guess we want to have all arches aligned with this.
The ultimate goal would be to get rid of arch-specific arch_add_memory()
implementations completely, providing arch_create_linear_mapping() /
arch_remove_linear_mapping() instead (as indicated in patch #1).
The x86 variant certainly needs love, but I'll keep this patch set
powerpc specific, so it can go via the powerpc tree in one piece. I'll
add unifying these implementations onto my todo list.
Thanks!
--
Thanks,
David / dhildenb
^ permalink raw reply
* Re: [PATCH v1 2/4] powerpc/mm: print warning in arch_remove_linear_mapping()
From: David Hildenbrand @ 2020-11-11 12:10 UTC (permalink / raw)
To: osalvador
Cc: Michal Hocko, Wei Yang, linux-kernel, linux-mm, Paul Mackerras,
Rashmica Gupta, linuxppc-dev, Andrew Morton, Mike Rapoport
In-Reply-To: <20201104094255.GA4981@localhost.localdomain>
On 04.11.20 10:42, osalvador wrote:
> On Thu, Oct 29, 2020 at 05:27:16PM +0100, David Hildenbrand wrote:
>> Let's print a warning similar to in arch_add_linear_mapping() instead of
>> WARN_ON_ONCE() and eventually crashing the kernel.
>>
>> Cc: Michael Ellerman <mpe@ellerman.id.au>
>> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>> Cc: Paul Mackerras <paulus@samba.org>
>> Cc: Rashmica Gupta <rashmica.g@gmail.com>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: Mike Rapoport <rppt@kernel.org>
>> Cc: Michal Hocko <mhocko@suse.com>
>> Cc: Oscar Salvador <osalvador@suse.de>
>> Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
>> ---
>> arch/powerpc/mm/mem.c | 4 +++-
>> 1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
>> index 8a86d81f8df0..685028451dd2 100644
>> --- a/arch/powerpc/mm/mem.c
>> +++ b/arch/powerpc/mm/mem.c
>> @@ -145,7 +145,9 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size)
>> flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE);
>>
>> ret = remove_section_mapping(start, start + size);
>> - WARN_ON_ONCE(ret);
>> + if (ret)
>> + pr_warn("Unable to remove linear mapping for 0x%llx..0x%llx: %d\n",
>> + start, start + size, ret);
>
> I guess the fear is to panic on systems that do have panic_on_warn (not
> sure how many productions systems have this out there).
Exactly.
> But anyway, being coherent with that, I think you should remove the WARN_ON
> in hash__remove_section_mapping as well.
Thanks, I'll add a patch doing that.
>
> Besides that:
>
> Reviewed-by: Oscar Salvador <osalvador@suse.
>
> Not sure if the functions below that also have any sort of WARN_ON.
> native_hpte_removebolted has a VM_WARN_ON, but that is on
> CONFIG_DEBUG_VM so does not really matter.
Right. Thanks!
--
Thanks,
David / dhildenb
^ permalink raw reply
* [PATCH seccomp v2 1/8] csky: Enable seccomp architecture tracking
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
To: containers
Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
In-Reply-To: <cover.1605101222.git.yifeifz2@illinois.edu>
From: YiFei Zhu <yifeifz2@illinois.edu>
To enable seccomp constant action bitmaps, we need to have a static
mapping to the audit architecture and system call table size. Add these
for csky.
Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu>
---
arch/csky/include/asm/Kbuild | 1 -
arch/csky/include/asm/seccomp.h | 11 +++++++++++
2 files changed, 11 insertions(+), 1 deletion(-)
create mode 100644 arch/csky/include/asm/seccomp.h
diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild
index 64876e59e2ef..93372255984d 100644
--- a/arch/csky/include/asm/Kbuild
+++ b/arch/csky/include/asm/Kbuild
@@ -4,6 +4,5 @@ generic-y += gpio.h
generic-y += kvm_para.h
generic-y += local64.h
generic-y += qrwlock.h
-generic-y += seccomp.h
generic-y += user.h
generic-y += vmlinux.lds.h
diff --git a/arch/csky/include/asm/seccomp.h b/arch/csky/include/asm/seccomp.h
new file mode 100644
index 000000000000..d33e758126fb
--- /dev/null
+++ b/arch/csky/include/asm/seccomp.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
+
+#include <asm-generic/seccomp.h>
+
+#define SECCOMP_ARCH_NATIVE AUDIT_ARCH_CSKY
+#define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+#define SECCOMP_ARCH_NATIVE_NAME "csky"
+
+#endif /* _ASM_SECCOMP_H */
--
2.29.2
^ permalink raw reply related
* [PATCH seccomp v2 0/8] seccomp: add bitmap cache support on remaining arches and report cache in procfs
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
To: containers
Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
From: YiFei Zhu <yifeifz2@illinois.edu>
This patch series enables bitmap cache for the remaining arches with
SECCOMP_FILTER, other than MIPS.
I was unable to find any of the arches having subarch-specific NR_syscalls
macros, so generic NR_syscalls is used. SH's syscall_get_arch seems to
only have the 32-bit subarch implementation. I'm not sure if this is
expected.
This series has not been tested; I have not built all the cross compilers
necessary to build test, let alone run the kernel or benchmark the
performance, so help on making sure the bitmap cache works as expected
(selftests/seccomp/{seccomp_benchmark,seccomp_bpf}) would be appreciated.
The series applies on top of Kees's for-next/seccomp branch.
v1 -> v2:
* ppc, sh: s/__SECCOMP_ARCH_LE_BIT/__SECCOMP_ARCH_LE/
* ppc: add "le" suffix to arch name when the arch is little endian.
* ppc: add explanation of why __LITTLE_ENDIAN__ is used to commit message.
YiFei Zhu (8):
csky: Enable seccomp architecture tracking
parisc: Enable seccomp architecture tracking
powerpc: Enable seccomp architecture tracking
riscv: Enable seccomp architecture tracking
s390: Enable seccomp architecture tracking
sh: Enable seccomp architecture tracking
xtensa: Enable seccomp architecture tracking
seccomp/cache: Report cache data through /proc/pid/seccomp_cache
arch/Kconfig | 15 ++++++++
arch/csky/include/asm/Kbuild | 1 -
arch/csky/include/asm/seccomp.h | 11 ++++++
arch/parisc/include/asm/Kbuild | 1 -
arch/parisc/include/asm/seccomp.h | 22 +++++++++++
arch/powerpc/include/asm/seccomp.h | 23 ++++++++++++
arch/riscv/include/asm/seccomp.h | 10 +++++
arch/s390/include/asm/seccomp.h | 9 +++++
arch/sh/include/asm/seccomp.h | 10 +++++
arch/xtensa/include/asm/Kbuild | 1 -
arch/xtensa/include/asm/seccomp.h | 11 ++++++
fs/proc/base.c | 6 +++
include/linux/seccomp.h | 7 ++++
kernel/seccomp.c | 59 ++++++++++++++++++++++++++++++
14 files changed, 183 insertions(+), 3 deletions(-)
create mode 100644 arch/csky/include/asm/seccomp.h
create mode 100644 arch/parisc/include/asm/seccomp.h
create mode 100644 arch/xtensa/include/asm/seccomp.h
base-commit: 38c37e8fd3d2590c4234d8cfbc22158362f0eb04
--
2.29.2
^ permalink raw reply
* [PATCH seccomp v2 2/8] parisc: Enable seccomp architecture tracking
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
To: containers
Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
In-Reply-To: <cover.1605101222.git.yifeifz2@illinois.edu>
From: YiFei Zhu <yifeifz2@illinois.edu>
To enable seccomp constant action bitmaps, we need to have a static
mapping to the audit architecture and system call table size. Add these
for parisc.
Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu>
---
arch/parisc/include/asm/Kbuild | 1 -
arch/parisc/include/asm/seccomp.h | 22 ++++++++++++++++++++++
2 files changed, 22 insertions(+), 1 deletion(-)
create mode 100644 arch/parisc/include/asm/seccomp.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index e3ee5c0bfe80..f16c4db80116 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -5,5 +5,4 @@ generated-y += syscall_table_c32.h
generic-y += kvm_para.h
generic-y += local64.h
generic-y += mcs_spinlock.h
-generic-y += seccomp.h
generic-y += user.h
diff --git a/arch/parisc/include/asm/seccomp.h b/arch/parisc/include/asm/seccomp.h
new file mode 100644
index 000000000000..b058b2220322
--- /dev/null
+++ b/arch/parisc/include/asm/seccomp.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
+
+#include <asm-generic/seccomp.h>
+
+#ifdef CONFIG_64BIT
+# define SECCOMP_ARCH_NATIVE AUDIT_ARCH_PARISC64
+# define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME "parisc64"
+# ifdef CONFIG_COMPAT
+# define SECCOMP_ARCH_COMPAT AUDIT_ARCH_PARISC
+# define SECCOMP_ARCH_COMPAT_NR NR_syscalls
+# define SECCOMP_ARCH_COMPAT_NAME "parisc"
+# endif
+#else /* !CONFIG_64BIT */
+# define SECCOMP_ARCH_NATIVE AUDIT_ARCH_PARISC
+# define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME "parisc"
+#endif
+
+#endif /* _ASM_SECCOMP_H */
--
2.29.2
^ permalink raw reply related
* [PATCH seccomp v2 3/8] powerpc: Enable seccomp architecture tracking
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
To: containers
Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
In-Reply-To: <cover.1605101222.git.yifeifz2@illinois.edu>
From: YiFei Zhu <yifeifz2@illinois.edu>
To enable seccomp constant action bitmaps, we need to have a static
mapping to the audit architecture and system call table size. Add these
for powerpc.
__LITTLE_ENDIAN__ is used here instead of CONFIG_CPU_LITTLE_ENDIAN
to keep it consistent with asm/syscall.h.
Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu>
---
arch/powerpc/include/asm/seccomp.h | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/arch/powerpc/include/asm/seccomp.h b/arch/powerpc/include/asm/seccomp.h
index 51209f6071c5..ac2033f134f0 100644
--- a/arch/powerpc/include/asm/seccomp.h
+++ b/arch/powerpc/include/asm/seccomp.h
@@ -8,4 +8,27 @@
#include <asm-generic/seccomp.h>
+#ifdef __LITTLE_ENDIAN__
+#define __SECCOMP_ARCH_LE __AUDIT_ARCH_LE
+#define __SECCOMP_ARCH_LE_NAME "le"
+#else
+#define __SECCOMP_ARCH_LE 0
+#define __SECCOMP_ARCH_LE_NAME
+#endif
+
+#ifdef CONFIG_PPC64
+# define SECCOMP_ARCH_NATIVE (AUDIT_ARCH_PPC64 | __SECCOMP_ARCH_LE)
+# define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME "ppc64" __SECCOMP_ARCH_LE_NAME
+# ifdef CONFIG_COMPAT
+# define SECCOMP_ARCH_COMPAT (AUDIT_ARCH_PPC | __SECCOMP_ARCH_LE)
+# define SECCOMP_ARCH_COMPAT_NR NR_syscalls
+# define SECCOMP_ARCH_COMPAT_NAME "ppc" __SECCOMP_ARCH_LE_NAME
+# endif
+#else /* !CONFIG_PPC64 */
+# define SECCOMP_ARCH_NATIVE (AUDIT_ARCH_PPC | __SECCOMP_ARCH_LE)
+# define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME "ppc" __SECCOMP_ARCH_LE_NAME
+#endif
+
#endif /* _ASM_POWERPC_SECCOMP_H */
--
2.29.2
^ permalink raw reply related
* [PATCH seccomp v2 5/8] s390: Enable seccomp architecture tracking
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
To: containers
Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
In-Reply-To: <cover.1605101222.git.yifeifz2@illinois.edu>
From: YiFei Zhu <yifeifz2@illinois.edu>
To enable seccomp constant action bitmaps, we need to have a static
mapping to the audit architecture and system call table size. Add these
for s390.
Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu>
---
arch/s390/include/asm/seccomp.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h
index 795bbe0d7ca6..71d46f0ba97b 100644
--- a/arch/s390/include/asm/seccomp.h
+++ b/arch/s390/include/asm/seccomp.h
@@ -16,4 +16,13 @@
#include <asm-generic/seccomp.h>
+#define SECCOMP_ARCH_NATIVE AUDIT_ARCH_S390X
+#define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+#define SECCOMP_ARCH_NATIVE_NAME "s390x"
+#ifdef CONFIG_COMPAT
+# define SECCOMP_ARCH_COMPAT AUDIT_ARCH_S390
+# define SECCOMP_ARCH_COMPAT_NR NR_syscalls
+# define SECCOMP_ARCH_COMPAT_NAME "s390"
+#endif
+
#endif /* _ASM_S390_SECCOMP_H */
--
2.29.2
^ permalink raw reply related
* [PATCH seccomp v2 4/8] riscv: Enable seccomp architecture tracking
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
To: containers
Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
In-Reply-To: <cover.1605101222.git.yifeifz2@illinois.edu>
From: YiFei Zhu <yifeifz2@illinois.edu>
To enable seccomp constant action bitmaps, we need to have a static
mapping to the audit architecture and system call table size. Add these
for riscv.
Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu>
---
arch/riscv/include/asm/seccomp.h | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/arch/riscv/include/asm/seccomp.h b/arch/riscv/include/asm/seccomp.h
index bf7744ee3b3d..c7ee6a3507be 100644
--- a/arch/riscv/include/asm/seccomp.h
+++ b/arch/riscv/include/asm/seccomp.h
@@ -7,4 +7,14 @@
#include <asm-generic/seccomp.h>
+#ifdef CONFIG_64BIT
+# define SECCOMP_ARCH_NATIVE AUDIT_ARCH_RISCV64
+# define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME "riscv64"
+#else /* !CONFIG_64BIT */
+# define SECCOMP_ARCH_NATIVE AUDIT_ARCH_RISCV32
+# define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME "riscv32"
+#endif
+
#endif /* _ASM_SECCOMP_H */
--
2.29.2
^ permalink raw reply related
* [PATCH seccomp v2 6/8] sh: Enable seccomp architecture tracking
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
To: containers
Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
In-Reply-To: <cover.1605101222.git.yifeifz2@illinois.edu>
From: YiFei Zhu <yifeifz2@illinois.edu>
To enable seccomp constant action bitmaps, we need to have a static
mapping to the audit architecture and system call table size. Add these
for sh.
Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu>
---
arch/sh/include/asm/seccomp.h | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/arch/sh/include/asm/seccomp.h b/arch/sh/include/asm/seccomp.h
index 54111e4d32b8..d4578395fd66 100644
--- a/arch/sh/include/asm/seccomp.h
+++ b/arch/sh/include/asm/seccomp.h
@@ -8,4 +8,14 @@
#define __NR_seccomp_exit __NR_exit
#define __NR_seccomp_sigreturn __NR_rt_sigreturn
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define __SECCOMP_ARCH_LE __AUDIT_ARCH_LE
+#else
+#define __SECCOMP_ARCH_LE 0
+#endif
+
+#define SECCOMP_ARCH_NATIVE (AUDIT_ARCH_SH | __SECCOMP_ARCH_LE)
+#define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+#define SECCOMP_ARCH_NATIVE_NAME "sh"
+
#endif /* __ASM_SECCOMP_H */
--
2.29.2
^ permalink raw reply related
* [PATCH seccomp v2 7/8] xtensa: Enable seccomp architecture tracking
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
To: containers
Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
In-Reply-To: <cover.1605101222.git.yifeifz2@illinois.edu>
From: YiFei Zhu <yifeifz2@illinois.edu>
To enable seccomp constant action bitmaps, we need to have a static
mapping to the audit architecture and system call table size. Add these
for xtensa.
Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu>
---
arch/xtensa/include/asm/Kbuild | 1 -
arch/xtensa/include/asm/seccomp.h | 11 +++++++++++
2 files changed, 11 insertions(+), 1 deletion(-)
create mode 100644 arch/xtensa/include/asm/seccomp.h
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index c59c42a1221a..9718e9593564 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -7,5 +7,4 @@ generic-y += mcs_spinlock.h
generic-y += param.h
generic-y += qrwlock.h
generic-y += qspinlock.h
-generic-y += seccomp.h
generic-y += user.h
diff --git a/arch/xtensa/include/asm/seccomp.h b/arch/xtensa/include/asm/seccomp.h
new file mode 100644
index 000000000000..f1cb6b0a9e1f
--- /dev/null
+++ b/arch/xtensa/include/asm/seccomp.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
+
+#include <asm-generic/seccomp.h>
+
+#define SECCOMP_ARCH_NATIVE AUDIT_ARCH_XTENSA
+#define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+#define SECCOMP_ARCH_NATIVE_NAME "xtensa"
+
+#endif /* _ASM_SECCOMP_H */
--
2.29.2
^ permalink raw reply related
* [PATCH seccomp v2 8/8] seccomp/cache: Report cache data through /proc/pid/seccomp_cache
From: YiFei Zhu @ 2020-11-11 13:33 UTC (permalink / raw)
To: containers
Cc: linux-sh, Tobin Feldman-Fitzthum, Hubertus Franke, Jack Chen,
linux-riscv, Andrea Arcangeli, linux-s390, YiFei Zhu, linux-csky,
Tianyin Xu, linux-xtensa, Kees Cook, Jann Horn, Valentin Rothberg,
Aleksa Sarai, Josep Torrellas, Will Drewry, linux-parisc,
linux-kernel, Andy Lutomirski, Dimitrios Skarlatos, David Laight,
Giuseppe Scrivano, linuxppc-dev, Tycho Andersen
In-Reply-To: <cover.1605101222.git.yifeifz2@illinois.edu>
From: YiFei Zhu <yifeifz2@illinois.edu>
Currently the kernel does not provide an infrastructure to translate
architecture numbers to a human-readable name. Translating syscall
numbers to syscall names is possible through FTRACE_SYSCALL
infrastructure but it does not provide support for compat syscalls.
This will create a file for each PID as /proc/pid/seccomp_cache.
The file will be empty when no seccomp filters are loaded, or be
in the format of:
<arch name> <decimal syscall number> <ALLOW | FILTER>
where ALLOW means the cache is guaranteed to allow the syscall,
and filter means the cache will pass the syscall to the BPF filter.
For the docker default profile on x86_64 it looks like:
x86_64 0 ALLOW
x86_64 1 ALLOW
x86_64 2 ALLOW
x86_64 3 ALLOW
[...]
x86_64 132 ALLOW
x86_64 133 ALLOW
x86_64 134 FILTER
x86_64 135 FILTER
x86_64 136 FILTER
x86_64 137 ALLOW
x86_64 138 ALLOW
x86_64 139 FILTER
x86_64 140 ALLOW
x86_64 141 ALLOW
[...]
This file is guarded by CONFIG_SECCOMP_CACHE_DEBUG with a default
of N because I think certain users of seccomp might not want the
application to know which syscalls are definitely usable. For
the same reason, it is also guarded by CAP_SYS_ADMIN.
Suggested-by: Jann Horn <jannh@google.com>
Link: https://lore.kernel.org/lkml/CAG48ez3Ofqp4crXGksLmZY6=fGrF_tWyUCg7PBkAetvbbOPeOA@mail.gmail.com/
Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu>
---
arch/Kconfig | 15 +++++++++++
fs/proc/base.c | 6 +++++
include/linux/seccomp.h | 7 +++++
kernel/seccomp.c | 59 +++++++++++++++++++++++++++++++++++++++++
4 files changed, 87 insertions(+)
diff --git a/arch/Kconfig b/arch/Kconfig
index 56b6ccc0e32d..6e2eb7171da0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -514,6 +514,21 @@ config SECCOMP_FILTER
See Documentation/userspace-api/seccomp_filter.rst for details.
+config SECCOMP_CACHE_DEBUG
+ bool "Show seccomp filter cache status in /proc/pid/seccomp_cache"
+ depends on SECCOMP
+ depends on SECCOMP_FILTER && !HAVE_SPARSE_SYSCALL_NR
+ depends on PROC_FS
+ help
+ This enables the /proc/pid/seccomp_cache interface to monitor
+ seccomp cache data. The file format is subject to change. Reading
+ the file requires CAP_SYS_ADMIN.
+
+ This option is for debugging only. Enabling presents the risk that
+ an adversary may be able to infer the seccomp filter logic.
+
+ If unsure, say N.
+
config HAVE_ARCH_STACKLEAK
bool
help
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 0f707003dda5..d652f9dbaecc 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3261,6 +3261,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_PROC_PID_ARCH_STATUS
ONE("arch_status", S_IRUGO, proc_pid_arch_status),
#endif
+#ifdef CONFIG_SECCOMP_CACHE_DEBUG
+ ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
+#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3590,6 +3593,9 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_PROC_PID_ARCH_STATUS
ONE("arch_status", S_IRUGO, proc_pid_arch_status),
#endif
+#ifdef CONFIG_SECCOMP_CACHE_DEBUG
+ ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
+#endif
};
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 02aef2844c38..76963ec4641a 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -121,4 +121,11 @@ static inline long seccomp_get_metadata(struct task_struct *task,
return -EINVAL;
}
#endif /* CONFIG_SECCOMP_FILTER && CONFIG_CHECKPOINT_RESTORE */
+
+#ifdef CONFIG_SECCOMP_CACHE_DEBUG
+struct seq_file;
+
+int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task);
+#endif
#endif /* _LINUX_SECCOMP_H */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d8cf468dbe1e..76f524e320b1 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -553,6 +553,9 @@ void seccomp_filter_release(struct task_struct *tsk)
{
struct seccomp_filter *orig = tsk->seccomp.filter;
+ /* We are effectively holding the siglock by not having any sighand. */
+ WARN_ON(tsk->sighand != NULL);
+
/* Detach task from its filter tree. */
tsk->seccomp.filter = NULL;
__seccomp_filter_release(orig);
@@ -2335,3 +2338,59 @@ static int __init seccomp_sysctl_init(void)
device_initcall(seccomp_sysctl_init)
#endif /* CONFIG_SYSCTL */
+
+#ifdef CONFIG_SECCOMP_CACHE_DEBUG
+/* Currently CONFIG_SECCOMP_CACHE_DEBUG implies SECCOMP_ARCH_NATIVE */
+static void proc_pid_seccomp_cache_arch(struct seq_file *m, const char *name,
+ const void *bitmap, size_t bitmap_size)
+{
+ int nr;
+
+ for (nr = 0; nr < bitmap_size; nr++) {
+ bool cached = test_bit(nr, bitmap);
+ char *status = cached ? "ALLOW" : "FILTER";
+
+ seq_printf(m, "%s %d %s\n", name, nr, status);
+ }
+}
+
+int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
+{
+ struct seccomp_filter *f;
+ unsigned long flags;
+
+ /*
+ * We don't want some sandboxed process to know what their seccomp
+ * filters consist of.
+ */
+ if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
+ return -EACCES;
+
+ if (!lock_task_sighand(task, &flags))
+ return -ESRCH;
+
+ f = READ_ONCE(task->seccomp.filter);
+ if (!f) {
+ unlock_task_sighand(task, &flags);
+ return 0;
+ }
+
+ /* prevent filter from being freed while we are printing it */
+ __get_seccomp_filter(f);
+ unlock_task_sighand(task, &flags);
+
+ proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_NATIVE_NAME,
+ f->cache.allow_native,
+ SECCOMP_ARCH_NATIVE_NR);
+
+#ifdef SECCOMP_ARCH_COMPAT
+ proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_COMPAT_NAME,
+ f->cache.allow_compat,
+ SECCOMP_ARCH_COMPAT_NR);
+#endif /* SECCOMP_ARCH_COMPAT */
+
+ __put_seccomp_filter(f);
+ return 0;
+}
+#endif /* CONFIG_SECCOMP_CACHE_DEBUG */
--
2.29.2
^ permalink raw reply related
* Re: [PATCH 1/3] asm-generic/atomic64: Add support for ARCH_ATOMIC
From: Christophe Leroy @ 2020-11-11 13:39 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev
Cc: Christophe Leroy, linux-arch, Arnd Bergmann, Peter Zijlstra,
Boqun Feng, linux-kernel, Alexey Kardashevskiy, Will Deacon
In-Reply-To: <20201111110723.3148665-2-npiggin@gmail.com>
Hello,
Le 11/11/2020 à 12:07, Nicholas Piggin a écrit :
> This passes atomic64 selftest on ppc32 on qemu (uniprocessor only)
> both before and after powerpc is converted to use ARCH_ATOMIC.
Can you explain what this change does and why it is needed ?
Christophe
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> include/asm-generic/atomic64.h | 70 +++++++++++++++++++++++++++-------
> lib/atomic64.c | 36 ++++++++---------
> 2 files changed, 75 insertions(+), 31 deletions(-)
>
> diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
> index 370f01d4450f..2b1ecb591bb9 100644
> --- a/include/asm-generic/atomic64.h
> +++ b/include/asm-generic/atomic64.h
> @@ -15,19 +15,17 @@ typedef struct {
>
> #define ATOMIC64_INIT(i) { (i) }
>
> -extern s64 atomic64_read(const atomic64_t *v);
> -extern void atomic64_set(atomic64_t *v, s64 i);
> -
> -#define atomic64_set_release(v, i) atomic64_set((v), (i))
> +extern s64 __atomic64_read(const atomic64_t *v);
> +extern void __atomic64_set(atomic64_t *v, s64 i);
>
> #define ATOMIC64_OP(op) \
> -extern void atomic64_##op(s64 a, atomic64_t *v);
> +extern void __atomic64_##op(s64 a, atomic64_t *v);
>
> #define ATOMIC64_OP_RETURN(op) \
> -extern s64 atomic64_##op##_return(s64 a, atomic64_t *v);
> +extern s64 __atomic64_##op##_return(s64 a, atomic64_t *v);
>
> #define ATOMIC64_FETCH_OP(op) \
> -extern s64 atomic64_fetch_##op(s64 a, atomic64_t *v);
> +extern s64 __atomic64_fetch_##op(s64 a, atomic64_t *v);
>
> #define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
>
> @@ -46,11 +44,57 @@ ATOMIC64_OPS(xor)
> #undef ATOMIC64_OP_RETURN
> #undef ATOMIC64_OP
>
> -extern s64 atomic64_dec_if_positive(atomic64_t *v);
> -#define atomic64_dec_if_positive atomic64_dec_if_positive
> -extern s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n);
> -extern s64 atomic64_xchg(atomic64_t *v, s64 new);
> -extern s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u);
> -#define atomic64_fetch_add_unless atomic64_fetch_add_unless
> +extern s64 __atomic64_dec_if_positive(atomic64_t *v);
> +extern s64 __atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n);
> +extern s64 __atomic64_xchg(atomic64_t *v, s64 new);
> +extern s64 __atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u);
> +
> +#ifdef ARCH_ATOMIC
> +#define arch_atomic64_read __atomic64_read
> +#define arch_atomic64_set __atomic64_set
> +#define arch_atomic64_add __atomic64_add
> +#define arch_atomic64_add_return __atomic64_add_return
> +#define arch_atomic64_fetch_add __atomic64_fetch_add
> +#define arch_atomic64_sub __atomic64_sub
> +#define arch_atomic64_sub_return __atomic64_sub_return
> +#define arch_atomic64_fetch_sub __atomic64_fetch_sub
> +#define arch_atomic64_and __atomic64_and
> +#define arch_atomic64_and_return __atomic64_and_return
> +#define arch_atomic64_fetch_and __atomic64_fetch_and
> +#define arch_atomic64_or __atomic64_or
> +#define arch_atomic64_or_return __atomic64_or_return
> +#define arch_atomic64_fetch_or __atomic64_fetch_or
> +#define arch_atomic64_xor __atomic64_xor
> +#define arch_atomic64_xor_return __atomic64_xor_return
> +#define arch_atomic64_fetch_xor __atomic64_fetch_xor
> +#define arch_atomic64_xchg __atomic64_xchg
> +#define arch_atomic64_cmpxchg __atomic64_cmpxchg
> +#define arch_atomic64_set_release(v, i) __atomic64_set((v), (i))
> +#define arch_atomic64_dec_if_positive __atomic64_dec_if_positive
> +#define arch_atomic64_fetch_add_unless __atomic64_fetch_add_unless
> +#else
> +#define atomic64_read __atomic64_read
> +#define atomic64_set __atomic64_set
> +#define atomic64_add __atomic64_add
> +#define atomic64_add_return __atomic64_add_return
> +#define atomic64_fetch_add __atomic64_fetch_add
> +#define atomic64_sub __atomic64_sub
> +#define atomic64_sub_return __atomic64_sub_return
> +#define atomic64_fetch_sub __atomic64_fetch_sub
> +#define atomic64_and __atomic64_and
> +#define atomic64_and_return __atomic64_and_return
> +#define atomic64_fetch_and __atomic64_fetch_and
> +#define atomic64_or __atomic64_or
> +#define atomic64_or_return __atomic64_or_return
> +#define atomic64_fetch_or __atomic64_fetch_or
> +#define atomic64_xor __atomic64_xor
> +#define atomic64_xor_return __atomic64_xor_return
> +#define atomic64_fetch_xor __atomic64_fetch_xor
> +#define atomic64_xchg __atomic64_xchg
> +#define atomic64_cmpxchg __atomic64_cmpxchg
> +#define atomic64_set_release(v, i) __atomic64_set((v), (i))
> +#define atomic64_dec_if_positive __atomic64_dec_if_positive
> +#define atomic64_fetch_add_unless __atomic64_fetch_add_unless
> +#endif
>
> #endif /* _ASM_GENERIC_ATOMIC64_H */
> diff --git a/lib/atomic64.c b/lib/atomic64.c
> index e98c85a99787..05aba5e3268f 100644
> --- a/lib/atomic64.c
> +++ b/lib/atomic64.c
> @@ -42,7 +42,7 @@ static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
> return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
> }
>
> -s64 atomic64_read(const atomic64_t *v)
> +s64 __atomic64_read(const atomic64_t *v)
> {
> unsigned long flags;
> raw_spinlock_t *lock = lock_addr(v);
> @@ -53,9 +53,9 @@ s64 atomic64_read(const atomic64_t *v)
> raw_spin_unlock_irqrestore(lock, flags);
> return val;
> }
> -EXPORT_SYMBOL(atomic64_read);
> +EXPORT_SYMBOL(__atomic64_read);
>
> -void atomic64_set(atomic64_t *v, s64 i)
> +void __atomic64_set(atomic64_t *v, s64 i)
> {
> unsigned long flags;
> raw_spinlock_t *lock = lock_addr(v);
> @@ -64,10 +64,10 @@ void atomic64_set(atomic64_t *v, s64 i)
> v->counter = i;
> raw_spin_unlock_irqrestore(lock, flags);
> }
> -EXPORT_SYMBOL(atomic64_set);
> +EXPORT_SYMBOL(__atomic64_set);
>
> #define ATOMIC64_OP(op, c_op) \
> -void atomic64_##op(s64 a, atomic64_t *v) \
> +void __atomic64_##op(s64 a, atomic64_t *v) \
> { \
> unsigned long flags; \
> raw_spinlock_t *lock = lock_addr(v); \
> @@ -76,10 +76,10 @@ void atomic64_##op(s64 a, atomic64_t *v) \
> v->counter c_op a; \
> raw_spin_unlock_irqrestore(lock, flags); \
> } \
> -EXPORT_SYMBOL(atomic64_##op);
> +EXPORT_SYMBOL(__atomic64_##op);
>
> #define ATOMIC64_OP_RETURN(op, c_op) \
> -s64 atomic64_##op##_return(s64 a, atomic64_t *v) \
> +s64 __atomic64_##op##_return(s64 a, atomic64_t *v) \
> { \
> unsigned long flags; \
> raw_spinlock_t *lock = lock_addr(v); \
> @@ -90,10 +90,10 @@ s64 atomic64_##op##_return(s64 a, atomic64_t *v) \
> raw_spin_unlock_irqrestore(lock, flags); \
> return val; \
> } \
> -EXPORT_SYMBOL(atomic64_##op##_return);
> +EXPORT_SYMBOL(__atomic64_##op##_return);
>
> #define ATOMIC64_FETCH_OP(op, c_op) \
> -s64 atomic64_fetch_##op(s64 a, atomic64_t *v) \
> +s64 __atomic64_fetch_##op(s64 a, atomic64_t *v) \
> { \
> unsigned long flags; \
> raw_spinlock_t *lock = lock_addr(v); \
> @@ -105,7 +105,7 @@ s64 atomic64_fetch_##op(s64 a, atomic64_t *v) \
> raw_spin_unlock_irqrestore(lock, flags); \
> return val; \
> } \
> -EXPORT_SYMBOL(atomic64_fetch_##op);
> +EXPORT_SYMBOL(__atomic64_fetch_##op);
>
> #define ATOMIC64_OPS(op, c_op) \
> ATOMIC64_OP(op, c_op) \
> @@ -130,7 +130,7 @@ ATOMIC64_OPS(xor, ^=)
> #undef ATOMIC64_OP_RETURN
> #undef ATOMIC64_OP
>
> -s64 atomic64_dec_if_positive(atomic64_t *v)
> +s64 __atomic64_dec_if_positive(atomic64_t *v)
> {
> unsigned long flags;
> raw_spinlock_t *lock = lock_addr(v);
> @@ -143,9 +143,9 @@ s64 atomic64_dec_if_positive(atomic64_t *v)
> raw_spin_unlock_irqrestore(lock, flags);
> return val;
> }
> -EXPORT_SYMBOL(atomic64_dec_if_positive);
> +EXPORT_SYMBOL(__atomic64_dec_if_positive);
>
> -s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
> +s64 __atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
> {
> unsigned long flags;
> raw_spinlock_t *lock = lock_addr(v);
> @@ -158,9 +158,9 @@ s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
> raw_spin_unlock_irqrestore(lock, flags);
> return val;
> }
> -EXPORT_SYMBOL(atomic64_cmpxchg);
> +EXPORT_SYMBOL(__atomic64_cmpxchg);
>
> -s64 atomic64_xchg(atomic64_t *v, s64 new)
> +s64 __atomic64_xchg(atomic64_t *v, s64 new)
> {
> unsigned long flags;
> raw_spinlock_t *lock = lock_addr(v);
> @@ -172,9 +172,9 @@ s64 atomic64_xchg(atomic64_t *v, s64 new)
> raw_spin_unlock_irqrestore(lock, flags);
> return val;
> }
> -EXPORT_SYMBOL(atomic64_xchg);
> +EXPORT_SYMBOL(__atomic64_xchg);
>
> -s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
> +s64 __atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
> {
> unsigned long flags;
> raw_spinlock_t *lock = lock_addr(v);
> @@ -188,4 +188,4 @@ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
>
> return val;
> }
> -EXPORT_SYMBOL(atomic64_fetch_add_unless);
> +EXPORT_SYMBOL(__atomic64_fetch_add_unless);
>
^ permalink raw reply
* Re: [PATCH 1/3] asm-generic/atomic64: Add support for ARCH_ATOMIC
From: Peter Zijlstra @ 2020-11-11 13:44 UTC (permalink / raw)
To: Christophe Leroy
Cc: Christophe Leroy, linux-arch, Arnd Bergmann, Alexey Kardashevskiy,
linuxppc-dev, Boqun Feng, linux-kernel, Nicholas Piggin,
Will Deacon
In-Reply-To: <3086114c-8af6-3863-0cbf-5d3956fcda95@csgroup.eu>
On Wed, Nov 11, 2020 at 02:39:01PM +0100, Christophe Leroy wrote:
> Hello,
>
> Le 11/11/2020 à 12:07, Nicholas Piggin a écrit :
> > This passes atomic64 selftest on ppc32 on qemu (uniprocessor only)
> > both before and after powerpc is converted to use ARCH_ATOMIC.
>
> Can you explain what this change does and why it is needed ?
That certainly should've been in the Changelog. This enables atomic
instrumentation, see asm-generic/atomic-instrumented.h. IOW, it makes
atomic ops visible to K*SAN.
^ permalink raw reply
* [PATCH v2 0/8] powernv/memtrace: don't abuse memory hot(un)plug infrastructure for memory allocations
From: David Hildenbrand @ 2020-11-11 14:53 UTC (permalink / raw)
To: linux-kernel
Cc: Michal Hocko, Wei Yang, David Hildenbrand, Nicholas Piggin,
Michal Hocko, linux-mm, Paul Mackerras, Aneesh Kumar K.V,
Andrew Morton, linuxppc-dev, Rashmica Gupta, Mike Rapoport,
Oscar Salvador
Based on latest linux/master
powernv/memtrace is the only in-kernel user that rips out random memory
it never added (doesn't own) in order to allocate memory without a
linear mapping. Let's stop abusing memory hot(un)plug infrastructure for
that - use alloc_contig_pages() for allocating memory and remove the
linear mapping manually.
The original idea was discussed in:
https://lkml.kernel.org/r/48340e96-7e6b-736f-9e23-d3111b915b6e@redhat.com
I only tested via QEMU TCG with a single NUMA node- see patch #8 for more
details.
Error handling and cleanup handling in memtrace code is a mess - that
should definitely get cleaned up sooner or later. Once we have __GFP_ZERO
support for alloc_contig_pages(), we can drop manual clearing. I added
a TODO for now, so this series can go via the powerpc tree - the __GFP_ZERO
change is then better suited via the mm tree, along with support for
__GFP_ZERO.
v1 -> v2:
- Tweaks to patch descriptions
- "powernv/memtrace: don't leak kernel memory to user space"
-- Added. Reported by Michael.
- "powernv/memtrace: fix crashing the kernel when enabling concurrently"
-- Added, discovered while testing.
- "powerpc/mm: protect linear mapping modifications by a mutex"
-- Added. Although we currently won't have concurrency, this is cleaner and
future-proof.
- "powerepc/book3s64/hash: drop WARN_ON in hash__remove_section_mapping"
-- Added. Suggested by Oscar
- "powernv/memtrace: don't abuse memory hot(un)plug infrastructure for
memory allocations"
-- Reshuffle the code to make review easier.
-- Add a TODO regarding __GFP_ZERO. Adapt to changed page clearing code.
-- Use GFP_KERNEL | __GFP_THISNODE | __GFP_NOWARN for allocations.
David Hildenbrand (8):
powernv/memtrace: don't leak kernel memory to user space
powernv/memtrace: fix crashing the kernel when enabling concurrently
powerpc/mm: factor out creating/removing linear mapping
powerpc/mm: protect linear mapping modifications by a mutex
powerpc/mm: print warning in arch_remove_linear_mapping()
powerepc/book3s64/hash: drop WARN_ON in hash__remove_section_mapping
powerpc/mm: remove linear mapping if __add_pages() fails in
arch_add_memory()
powernv/memtrace: don't abuse memory hot(un)plug infrastructure for
memory allocations
arch/powerpc/mm/book3s64/hash_utils.c | 1 -
arch/powerpc/mm/mem.c | 53 +++++--
arch/powerpc/platforms/powernv/Kconfig | 8 +-
arch/powerpc/platforms/powernv/memtrace.c | 175 ++++++++++------------
include/linux/memory_hotplug.h | 3 +
5 files changed, 125 insertions(+), 115 deletions(-)
--
2.26.2
^ permalink raw reply
* [PATCH v2 1/8] powernv/memtrace: don't leak kernel memory to user space
From: David Hildenbrand @ 2020-11-11 14:53 UTC (permalink / raw)
To: linux-kernel
Cc: Michal Hocko, Wei Yang, David Hildenbrand, stable, linux-mm,
Paul Mackerras, Rashmica Gupta, linuxppc-dev, Andrew Morton,
Mike Rapoport, Oscar Salvador
In-Reply-To: <20201111145322.15793-1-david@redhat.com>
We currently leak kernel memory to user space, because memory offlining
doesn't do any implicit clearing of memory and we are missing explicit
clearing of memory.
Let's keep it simple and clear pages before removing the linear mapping.
Reproduced in QEMU/TCG with 10 GiB of main memory:
[root@localhost ~]# dd obs=9G if=/dev/urandom of=/dev/null
[... wait until "free -m" used counter no longer changes and cancel]
19665802+0 records in
1+0 records out
9663676416 bytes (9.7 GB, 9.0 GiB) copied, 135.548 s, 71.3 MB/s
[root@localhost ~]# cat /sys/devices/system/memory/block_size_bytes
40000000
[root@localhost ~]# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable
[ 402.978663][ T1086] page:000000001bc4bc74 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x24900
[ 402.980063][ T1086] flags: 0x7ffff000001000(reserved)
[ 402.980415][ T1086] raw: 007ffff000001000 c00c000000924008 c00c000000924008 0000000000000000
[ 402.980627][ T1086] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000
[ 402.980845][ T1086] page dumped because: unmovable page
[ 402.989608][ T1086] Offlined Pages 16384
[ 403.324155][ T1086] memtrace: Allocated trace memory on node 0 at 0x0000000200000000
Before this patch:
[root@localhost ~]# hexdump -C /sys/kernel/debug/powerpc/memtrace/00000000/trace | head
00000000 c8 25 72 51 4d 26 36 c5 5c c2 56 15 d5 1a cd 10 |.%rQM&6.\.V.....|
00000010 19 b9 50 b2 cb e3 60 b8 ec 0a f3 ec 4b 3c 39 f0 |..P...`.....K<9.|$
00000020 4e 5a 4c cf bd 26 19 ff 37 79 13 67 24 b7 b8 57 |NZL..&..7y.g$..W|$
00000030 98 3e f5 be 6f 14 6a bd a4 52 bc 6e e9 e0 c1 5d |.>..o.j..R.n...]|$
00000040 76 b3 ae b5 88 d7 da e3 64 23 85 2c 10 88 07 b6 |v.......d#.,....|$
00000050 9a d8 91 de f7 50 27 69 2e 64 9c 6f d3 19 45 79 |.....P'i.d.o..Ey|$
00000060 6a 6f 8a 61 71 19 1f c7 f1 df 28 26 ca 0f 84 55 |jo.aq.....(&...U|$
00000070 01 3f be e4 e2 e1 da ff 7b 8c 8e 32 37 b4 24 53 |.?......{..27.$S|$
00000080 1b 70 30 45 56 e6 8c c4 0e b5 4c fb 9f dd 88 06 |.p0EV.....L.....|$
00000090 ef c4 18 79 f1 60 b1 5c 79 59 4d f4 36 d7 4a 5c |...y.`.\yYM.6.J\|$
After this patch:
[root@localhost ~]# hexdump -C /sys/kernel/debug/powerpc/memtrace/00000000/trace | head
00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
40000000
Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Fixes: 9d5171a8f248 ("powerpc/powernv: Enable removal of memory for in memory tracing")
Cc: stable@vger.kernel.org # v4.14+
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
arch/powerpc/platforms/powernv/memtrace.c | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 6828108486f8..eea1f94482ff 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -67,6 +67,23 @@ static int change_memblock_state(struct memory_block *mem, void *arg)
return 0;
}
+static void memtrace_clear_range(unsigned long start_pfn,
+ unsigned long nr_pages)
+{
+ unsigned long pfn;
+
+ /*
+ * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM
+ * does not apply, avoid passing around "struct page" and use
+ * clear_page() instead directly.
+ */
+ for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
+ if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
+ cond_resched();
+ clear_page(__va(PFN_PHYS(pfn)));
+ }
+}
+
/* called with device_hotplug_lock held */
static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
{
@@ -111,6 +128,11 @@ static u64 memtrace_alloc_node(u32 nid, u64 size)
lock_device_hotplug();
for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
+ /*
+ * Clear the range while we still have a linear
+ * mapping.
+ */
+ memtrace_clear_range(base_pfn, nr_pages);
/*
* Remove memory in memory block size chunks so that
* iomem resources are always split to the same size and
--
2.26.2
^ permalink raw reply related
* [PATCH v2 2/8] powernv/memtrace: fix crashing the kernel when enabling concurrently
From: David Hildenbrand @ 2020-11-11 14:53 UTC (permalink / raw)
To: linux-kernel
Cc: David Hildenbrand, stable, linux-mm, Paul Mackerras,
Rashmica Gupta, linuxppc-dev
In-Reply-To: <20201111145322.15793-1-david@redhat.com>
It's very easy to crash the kernel right now by simply trying to enable
memtrace concurrently, hammering on the "enable" interface
loop.sh:
#!/bin/bash
dmesg --console-off
while true; do
echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable
done
[root@localhost ~]# loop.sh &
[root@localhost ~]# loop.sh &
Resulting quickly in a kernel crash. Let's properly protect using a
mutex.
Fixes: 9d5171a8f248 ("powerpc/powernv: Enable removal of memory for in memory tracing")
Cc: stable@vger.kernel.org# v4.14+
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
arch/powerpc/platforms/powernv/memtrace.c | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index eea1f94482ff..0e42fe2d7b6a 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -30,6 +30,7 @@ struct memtrace_entry {
char name[16];
};
+static DEFINE_MUTEX(memtrace_mutex);
static u64 memtrace_size;
static struct memtrace_entry *memtrace_array;
@@ -279,6 +280,7 @@ static int memtrace_online(void)
static int memtrace_enable_set(void *data, u64 val)
{
+ int rc = -EAGAIN;
u64 bytes;
/*
@@ -291,25 +293,31 @@ static int memtrace_enable_set(void *data, u64 val)
return -EINVAL;
}
+ mutex_lock(&memtrace_mutex);
+
/* Re-add/online previously removed/offlined memory */
if (memtrace_size) {
if (memtrace_online())
- return -EAGAIN;
+ goto out_unlock;
}
- if (!val)
- return 0;
+ if (!val) {
+ rc = 0;
+ goto out_unlock;
+ }
/* Offline and remove memory */
if (memtrace_init_regions_runtime(val))
- return -EINVAL;
+ goto out_unlock;
if (memtrace_init_debugfs())
- return -EINVAL;
+ goto out_unlock;
memtrace_size = val;
-
- return 0;
+ rc = 0;
+out_unlock:
+ mutex_unlock(&memtrace_mutex);
+ return rc;
}
static int memtrace_enable_get(void *data, u64 *val)
--
2.26.2
^ permalink raw reply related
* [PATCH v2 3/8] powerpc/mm: factor out creating/removing linear mapping
From: David Hildenbrand @ 2020-11-11 14:53 UTC (permalink / raw)
To: linux-kernel
Cc: Michal Hocko, Wei Yang, David Hildenbrand, linux-mm,
Paul Mackerras, Rashmica Gupta, linuxppc-dev, Andrew Morton,
Mike Rapoport, Oscar Salvador
In-Reply-To: <20201111145322.15793-1-david@redhat.com>
We want to stop abusing memory hotplug infrastructure in memtrace code
to perform allocations and remove the linear mapping. Instead we will use
alloc_contig_pages() and remove the linear mapping manually.
Let's factor out creating/removing the linear mapping into
arch_create_linear_mapping() / arch_remove_linear_mapping() - so in the
future, we might be able to have whole arch_add_memory() /
arch_remove_memory() be implemented in common code.
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
arch/powerpc/mm/mem.c | 41 +++++++++++++++++++++++-----------
include/linux/memory_hotplug.h | 3 +++
2 files changed, 31 insertions(+), 13 deletions(-)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 01ec2a252f09..8a86d81f8df0 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -120,34 +120,26 @@ static void flush_dcache_range_chunked(unsigned long start, unsigned long stop,
}
}
-int __ref arch_add_memory(int nid, u64 start, u64 size,
- struct mhp_params *params)
+int __ref arch_create_linear_mapping(int nid, u64 start, u64 size,
+ struct mhp_params *params)
{
- unsigned long start_pfn = start >> PAGE_SHIFT;
- unsigned long nr_pages = size >> PAGE_SHIFT;
int rc;
start = (unsigned long)__va(start);
rc = create_section_mapping(start, start + size, nid,
params->pgprot);
if (rc) {
- pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
+ pr_warn("Unable to create linear mapping for 0x%llx..0x%llx: %d\n",
start, start + size, rc);
return -EFAULT;
}
-
- return __add_pages(nid, start_pfn, nr_pages, params);
+ return 0;
}
-void __ref arch_remove_memory(int nid, u64 start, u64 size,
- struct vmem_altmap *altmap)
+void __ref arch_remove_linear_mapping(u64 start, u64 size)
{
- unsigned long start_pfn = start >> PAGE_SHIFT;
- unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;
- __remove_pages(start_pfn, nr_pages, altmap);
-
/* Remove htab bolted mappings for this section of memory */
start = (unsigned long)__va(start);
flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE);
@@ -160,6 +152,29 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
*/
vm_unmap_aliases();
}
+
+int __ref arch_add_memory(int nid, u64 start, u64 size,
+ struct mhp_params *params)
+{
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+ int rc;
+
+ rc = arch_create_linear_mapping(nid, start, size, params);
+ if (rc)
+ return rc;
+ return __add_pages(nid, start_pfn, nr_pages, params);
+}
+
+void __ref arch_remove_memory(int nid, u64 start, u64 size,
+ struct vmem_altmap *altmap)
+{
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+
+ __remove_pages(start_pfn, nr_pages, altmap);
+ arch_remove_linear_mapping(start, size);
+}
#endif
#ifndef CONFIG_NEED_MULTIPLE_NODES
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index d65c6fdc5cfc..00b9e9bd3850 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -375,6 +375,9 @@ extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum);
extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
unsigned long nr_pages);
+extern int arch_create_linear_mapping(int nid, u64 start, u64 size,
+ struct mhp_params *params);
+void arch_remove_linear_mapping(u64 start, u64 size);
#endif /* CONFIG_MEMORY_HOTPLUG */
#endif /* __LINUX_MEMORY_HOTPLUG_H */
--
2.26.2
^ permalink raw reply related
* [PATCH v2 4/8] powerpc/mm: protect linear mapping modifications by a mutex
From: David Hildenbrand @ 2020-11-11 14:53 UTC (permalink / raw)
To: linux-kernel
Cc: Michal Hocko, Wei Yang, David Hildenbrand, linux-mm,
Paul Mackerras, Rashmica Gupta, linuxppc-dev, Andrew Morton,
Mike Rapoport, Oscar Salvador
In-Reply-To: <20201111145322.15793-1-david@redhat.com>
This code currently relies on mem_hotplug_begin()/mem_hotplug_done() -
create_section_mapping()/remove_section_mapping() implementations
cannot tollerate getting called concurrently.
Let's prepare for callers (memtrace) not holding any such locks (and
don't force them to mess with memory hotplug locks).
Other parts in these functions don't seem to rely on external locking.
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
arch/powerpc/mm/mem.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 8a86d81f8df0..ca5c4b54c366 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -58,6 +58,7 @@
#define CPU_FTR_NOEXECUTE 0
#endif
+static DEFINE_MUTEX(linear_mapping_mutex);
unsigned long long memory_limit;
bool init_mem_is_free;
@@ -126,8 +127,10 @@ int __ref arch_create_linear_mapping(int nid, u64 start, u64 size,
int rc;
start = (unsigned long)__va(start);
+ mutex_lock(&linear_mapping_mutex);
rc = create_section_mapping(start, start + size, nid,
params->pgprot);
+ mutex_unlock(&linear_mapping_mutex);
if (rc) {
pr_warn("Unable to create linear mapping for 0x%llx..0x%llx: %d\n",
start, start + size, rc);
@@ -144,7 +147,9 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size)
start = (unsigned long)__va(start);
flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE);
+ mutex_lock(&linear_mapping_mutex);
ret = remove_section_mapping(start, start + size);
+ mutex_unlock(&linear_mapping_mutex);
WARN_ON_ONCE(ret);
/* Ensure all vmalloc mappings are flushed in case they also
--
2.26.2
^ permalink raw reply related
* [PATCH v2 5/8] powerpc/mm: print warning in arch_remove_linear_mapping()
From: David Hildenbrand @ 2020-11-11 14:53 UTC (permalink / raw)
To: linux-kernel
Cc: Michal Hocko, Wei Yang, David Hildenbrand, linux-mm,
Paul Mackerras, Rashmica Gupta, linuxppc-dev, Andrew Morton,
Mike Rapoport, Oscar Salvador
In-Reply-To: <20201111145322.15793-1-david@redhat.com>
Let's print a warning similar to in arch_add_linear_mapping() instead of
WARN_ON_ONCE() and eventually crashing the kernel.
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
arch/powerpc/mm/mem.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index ca5c4b54c366..c5755b9efb64 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -150,7 +150,9 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size)
mutex_lock(&linear_mapping_mutex);
ret = remove_section_mapping(start, start + size);
mutex_unlock(&linear_mapping_mutex);
- WARN_ON_ONCE(ret);
+ if (ret)
+ pr_warn("Unable to remove linear mapping for 0x%llx..0x%llx: %d\n",
+ start, start + size, ret);
/* Ensure all vmalloc mappings are flushed in case they also
* hit that section of memory
--
2.26.2
^ permalink raw reply related
* [PATCH v2 6/8] powerepc/book3s64/hash: drop WARN_ON in hash__remove_section_mapping
From: David Hildenbrand @ 2020-11-11 14:53 UTC (permalink / raw)
To: linux-kernel
Cc: Michal Hocko, Wei Yang, David Hildenbrand, Nicholas Piggin,
linux-mm, Paul Mackerras, Aneesh Kumar K.V, Rashmica Gupta,
linuxppc-dev, Andrew Morton, Mike Rapoport, Oscar Salvador
In-Reply-To: <20201111145322.15793-1-david@redhat.com>
The single caller (arch_remove_linear_mapping()) prints a proper warning
when this function fails. No need to eventually crash the kernel - let's
drop this WARN_ON.
Suggested-by: Oscar Salvador <osalvador@suse.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
arch/powerpc/mm/book3s64/hash_utils.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 24702c0a92e0..d2dcb7757c68 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -845,7 +845,6 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end)
{
int rc = htab_remove_mapping(start, end, mmu_linear_psize,
mmu_kernel_ssize);
- WARN_ON(rc < 0);
if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
pr_warn("Hash collision while resizing HPT\n");
--
2.26.2
^ permalink raw reply related
* [PATCH v2 7/8] powerpc/mm: remove linear mapping if __add_pages() fails in arch_add_memory()
From: David Hildenbrand @ 2020-11-11 14:53 UTC (permalink / raw)
To: linux-kernel
Cc: Michal Hocko, Wei Yang, David Hildenbrand, linux-mm,
Paul Mackerras, Rashmica Gupta, linuxppc-dev, Andrew Morton,
Mike Rapoport, Oscar Salvador
In-Reply-To: <20201111145322.15793-1-david@redhat.com>
Let's revert what we did in case seomthing goes wrong and we return an
error - as already done on arm64 and s390x.
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
arch/powerpc/mm/mem.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index c5755b9efb64..8b946ec68d1b 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -170,7 +170,10 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
rc = arch_create_linear_mapping(nid, start, size, params);
if (rc)
return rc;
- return __add_pages(nid, start_pfn, nr_pages, params);
+ rc = __add_pages(nid, start_pfn, nr_pages, params);
+ if (rc)
+ arch_remove_linear_mapping(start, size);
+ return rc;
}
void __ref arch_remove_memory(int nid, u64 start, u64 size,
--
2.26.2
^ permalink raw reply related
* [PATCH v2 8/8] powernv/memtrace: don't abuse memory hot(un)plug infrastructure for memory allocations
From: David Hildenbrand @ 2020-11-11 14:53 UTC (permalink / raw)
To: linux-kernel
Cc: Michal Hocko, Wei Yang, David Hildenbrand, Michal Hocko, linux-mm,
Paul Mackerras, Rashmica Gupta, linuxppc-dev, Andrew Morton,
Mike Rapoport, Oscar Salvador
In-Reply-To: <20201111145322.15793-1-david@redhat.com>
Let's use alloc_contig_pages() for allocating memory and remove the
linear mapping manually via arch_remove_linear_mapping(). Mark all pages
PG_offline, such that they will definitely not get touched - e.g.,
when hibernating. When freeing memory, try to revert what we did.
The original idea was discussed in:
https://lkml.kernel.org/r/48340e96-7e6b-736f-9e23-d3111b915b6e@redhat.com
This is similar to CONFIG_DEBUG_PAGEALLOC handling on other
architectures, whereby only single pages are unmapped from the linear
mapping. Let's mimic what memory hot(un)plug would do with the linear
mapping.
We now need MEMORY_HOTPLUG and CONTIG_ALLOC as dependencies. Add a TODO
that we want to use __GFP_ZERO for clearing once alloc_contig_pages()
understands that.
Tested with in QEMU/TCG with 10 GiB of main memory:
[root@localhost ~]# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable
[ 105.903043][ T1080] memtrace: Allocated trace memory on node 0 at 0x0000000080000000
[root@localhost ~]# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable
[ 145.042493][ T1080] radix-mmu: Mapped 0x0000000080000000-0x00000000c0000000 with 64.0 KiB pages
[ 145.049019][ T1080] memtrace: Freed trace memory back on node 0
[ 145.333960][ T1080] memtrace: Allocated trace memory on node 0 at 0x0000000080000000
[root@localhost ~]# echo 0x80000000 > /sys/kernel/debug/powerpc/memtrace/enable
[ 213.606916][ T1080] radix-mmu: Mapped 0x0000000080000000-0x00000000c0000000 with 64.0 KiB pages
[ 213.613855][ T1080] memtrace: Freed trace memory back on node 0
[ 214.185094][ T1080] memtrace: Allocated trace memory on node 0 at 0x0000000080000000
[root@localhost ~]# echo 0x100000000 > /sys/kernel/debug/powerpc/memtrace/enable
[ 234.874872][ T1080] radix-mmu: Mapped 0x0000000080000000-0x0000000100000000 with 64.0 KiB pages
[ 234.886974][ T1080] memtrace: Freed trace memory back on node 0
[ 234.890153][ T1080] memtrace: Failed to allocate trace memory on node 0
[root@localhost ~]# echo 0x40000000 > /sys/kernel/debug/powerpc/memtrace/enable
[ 259.490196][ T1080] memtrace: Allocated trace memory on node 0 at 0x0000000080000000
I also made sure allocated memory is properly zeroed.
Note 1: We currently won't be allocating from ZONE_MOVABLE - because our
pages are not movable. However, as we don't run with any memory
hot(un)plug mechanism around, we could make an exception to
increase the chance of allocations succeeding.
Note 2: PG_reserved isn't sufficient. E.g., kernel_page_present() used
along PG_reserved in hibernation code will always return "true"
on powerpc, resulting in the pages getting touched. It's too
generic - e.g., indicates boot allocations.
Note 3: For now, we keep using memory_block_size_bytes() as minimum
granularity.
Suggested-by: Michal Hocko <mhocko@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
arch/powerpc/platforms/powernv/Kconfig | 8 +-
arch/powerpc/platforms/powernv/memtrace.c | 163 ++++++++--------------
2 files changed, 62 insertions(+), 109 deletions(-)
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 938803eab0ad..619b093a0657 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -27,11 +27,11 @@ config OPAL_PRD
recovery diagnostics on OpenPower machines
config PPC_MEMTRACE
- bool "Enable removal of RAM from kernel mappings for tracing"
- depends on PPC_POWERNV && MEMORY_HOTREMOVE
+ bool "Enable runtime allocation of RAM for tracing"
+ depends on PPC_POWERNV && MEMORY_HOTPLUG && CONTIG_ALLOC
help
- Enabling this option allows for the removal of memory (RAM)
- from the kernel mappings to be used for hardware tracing.
+ Enabling this option allows for runtime allocation of memory (RAM)
+ for hardware tracing.
config PPC_VAS
bool "IBM Virtual Accelerator Switchboard (VAS)"
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 0e42fe2d7b6a..5fc9408bb0b3 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -51,33 +51,12 @@ static const struct file_operations memtrace_fops = {
.open = simple_open,
};
-static int check_memblock_online(struct memory_block *mem, void *arg)
-{
- if (mem->state != MEM_ONLINE)
- return -1;
-
- return 0;
-}
-
-static int change_memblock_state(struct memory_block *mem, void *arg)
-{
- unsigned long state = (unsigned long)arg;
-
- mem->state = state;
-
- return 0;
-}
-
static void memtrace_clear_range(unsigned long start_pfn,
unsigned long nr_pages)
{
unsigned long pfn;
- /*
- * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM
- * does not apply, avoid passing around "struct page" and use
- * clear_page() instead directly.
- */
+ /* As HIGHMEM does not apply, use clear_page() directly. */
for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
cond_resched();
@@ -85,72 +64,39 @@ static void memtrace_clear_range(unsigned long start_pfn,
}
}
-/* called with device_hotplug_lock held */
-static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
-{
- const unsigned long start = PFN_PHYS(start_pfn);
- const unsigned long size = PFN_PHYS(nr_pages);
-
- if (walk_memory_blocks(start, size, NULL, check_memblock_online))
- return false;
-
- walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE,
- change_memblock_state);
-
- if (offline_pages(start_pfn, nr_pages)) {
- walk_memory_blocks(start, size, (void *)MEM_ONLINE,
- change_memblock_state);
- return false;
- }
-
- walk_memory_blocks(start, size, (void *)MEM_OFFLINE,
- change_memblock_state);
-
-
- return true;
-}
-
static u64 memtrace_alloc_node(u32 nid, u64 size)
{
- u64 start_pfn, end_pfn, nr_pages, pfn;
- u64 base_pfn;
- u64 bytes = memory_block_size_bytes();
+ const unsigned long nr_pages = PHYS_PFN(size);
+ unsigned long pfn, start_pfn;
+ struct page *page;
- if (!node_spanned_pages(nid))
+ /*
+ * Trace memory needs to be aligned to the size, which is guaranteed
+ * by alloc_contig_pages().
+ */
+ page = alloc_contig_pages(nr_pages, GFP_KERNEL | __GFP_THISNODE |
+ __GFP_NOWARN, nid, NULL);
+ if (!page)
return 0;
+ start_pfn = page_to_pfn(page);
- start_pfn = node_start_pfn(nid);
- end_pfn = node_end_pfn(nid);
- nr_pages = size >> PAGE_SHIFT;
-
- /* Trace memory needs to be aligned to the size */
- end_pfn = round_down(end_pfn - nr_pages, nr_pages);
-
- lock_device_hotplug();
- for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
- if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
- /*
- * Clear the range while we still have a linear
- * mapping.
- */
- memtrace_clear_range(base_pfn, nr_pages);
- /*
- * Remove memory in memory block size chunks so that
- * iomem resources are always split to the same size and
- * we never try to remove memory that spans two iomem
- * resources.
- */
- end_pfn = base_pfn + nr_pages;
- for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) {
- __remove_memory(nid, pfn << PAGE_SHIFT, bytes);
- }
- unlock_device_hotplug();
- return base_pfn << PAGE_SHIFT;
- }
- }
- unlock_device_hotplug();
+ /*
+ * Clear the range while we still have a linear mapping.
+ *
+ * TODO: use __GFP_ZERO with alloc_contig_pages() once supported.
+ */
+ memtrace_clear_range(start_pfn, nr_pages);
- return 0;
+ /*
+ * Set pages PageOffline(), to indicate that nobody (e.g., hibernation,
+ * dumping, ...) should be touching these pages.
+ */
+ for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++)
+ __SetPageOffline(pfn_to_page(pfn));
+
+ arch_remove_linear_mapping(PFN_PHYS(start_pfn), size);
+
+ return PFN_PHYS(start_pfn);
}
static int memtrace_init_regions_runtime(u64 size)
@@ -220,16 +166,30 @@ static int memtrace_init_debugfs(void)
return ret;
}
-static int online_mem_block(struct memory_block *mem, void *arg)
+static int memtrace_free(int nid, u64 start, u64 size)
{
- return device_online(&mem->dev);
+ struct mhp_params params = { .pgprot = PAGE_KERNEL };
+ const unsigned long nr_pages = PHYS_PFN(size);
+ const unsigned long start_pfn = PHYS_PFN(start);
+ unsigned long pfn;
+ int ret;
+
+ ret = arch_create_linear_mapping(nid, start, size, ¶ms);
+ if (ret)
+ return ret;
+
+ for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++)
+ __ClearPageOffline(pfn_to_page(pfn));
+
+ free_contig_range(start_pfn, nr_pages);
+ return 0;
}
/*
- * Iterate through the chunks of memory we have removed from the kernel
- * and attempt to add them back to the kernel.
+ * Iterate through the chunks of memory we allocated and attempt to expose
+ * them back to the kernel.
*/
-static int memtrace_online(void)
+static int memtrace_free_regions(void)
{
int i, ret = 0;
struct memtrace_entry *ent;
@@ -237,7 +197,7 @@ static int memtrace_online(void)
for (i = memtrace_array_nr - 1; i >= 0; i--) {
ent = &memtrace_array[i];
- /* We have onlined this chunk previously */
+ /* We have freed this chunk previously */
if (ent->nid == NUMA_NO_NODE)
continue;
@@ -247,30 +207,25 @@ static int memtrace_online(void)
ent->mem = 0;
}
- if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) {
- pr_err("Failed to add trace memory to node %d\n",
+ if (memtrace_free(ent->nid, ent->start, ent->size)) {
+ pr_err("Failed to free trace memory on node %d\n",
ent->nid);
ret += 1;
continue;
}
- lock_device_hotplug();
- walk_memory_blocks(ent->start, ent->size, NULL,
- online_mem_block);
- unlock_device_hotplug();
-
/*
- * Memory was added successfully so clean up references to it
- * so on reentry we can tell that this chunk was added.
+ * Memory was freed successfully so clean up references to it
+ * so on reentry we can tell that this chunk was freed.
*/
debugfs_remove_recursive(ent->dir);
- pr_info("Added trace memory back to node %d\n", ent->nid);
+ pr_info("Freed trace memory back on node %d\n", ent->nid);
ent->size = ent->start = ent->nid = NUMA_NO_NODE;
}
if (ret)
return ret;
- /* If all chunks of memory were added successfully, reset globals */
+ /* If all chunks of memory were freed successfully, reset globals */
kfree(memtrace_array);
memtrace_array = NULL;
memtrace_size = 0;
@@ -295,18 +250,16 @@ static int memtrace_enable_set(void *data, u64 val)
mutex_lock(&memtrace_mutex);
- /* Re-add/online previously removed/offlined memory */
- if (memtrace_size) {
- if (memtrace_online())
- goto out_unlock;
- }
+ /* Free all previously allocated memory. */
+ if (memtrace_size && memtrace_free_regions())
+ goto out_unlock;
if (!val) {
rc = 0;
goto out_unlock;
}
- /* Offline and remove memory */
+ /* Allocate memory. */
if (memtrace_init_regions_runtime(val))
goto out_unlock;
--
2.26.2
^ permalink raw reply related
* Re: [PATCH 3/3] powerpc: rewrite atomics to use ARCH_ATOMIC
From: kernel test robot @ 2020-11-11 19:07 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev
Cc: Christophe Leroy, kbuild-all, Arnd Bergmann, Peter Zijlstra,
Boqun Feng, linux-kernel, Nicholas Piggin, Alexey Kardashevskiy,
Will Deacon
In-Reply-To: <20201111110723.3148665-4-npiggin@gmail.com>
[-- Attachment #1: Type: text/plain, Size: 13242 bytes --]
Hi Nicholas,
I love your patch! Perhaps something to improve:
[auto build test WARNING on powerpc/next]
[also build test WARNING on asm-generic/master linus/master v5.10-rc3 next-20201111]
[cannot apply to scottwood/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-convert-to-use-ARCH_ATOMIC/20201111-190941
base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-allyesconfig (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/9e1bec8fe216b0745c647e52c40d1f0033fb4efd
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Nicholas-Piggin/powerpc-convert-to-use-ARCH_ATOMIC/20201111-190941
git checkout 9e1bec8fe216b0745c647e52c40d1f0033fb4efd
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=powerpc
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
In file included from arch/powerpc/include/asm/atomic.h:11,
from include/linux/atomic.h:7,
from include/linux/rcupdate.h:25,
from include/linux/rculist.h:11,
from include/linux/sched/signal.h:5,
from drivers/gpu/drm/drm_lock.c:37:
drivers/gpu/drm/drm_lock.c: In function 'drm_lock_take':
>> arch/powerpc/include/asm/cmpxchg.h:463:41: warning: passing argument 1 of '__cmpxchg_relaxed' discards 'volatile' qualifier from pointer target type [-Wdiscarded-qualifiers]
463 | (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
| ^~~~~
include/linux/atomic.h:73:9: note: in expansion of macro 'arch_cmpxchg_relaxed'
73 | typeof(op##_relaxed(args)) __ret; \
| ^~
include/linux/atomic-arch-fallback.h:52:2: note: in expansion of macro '__atomic_op_fence'
52 | __atomic_op_fence(arch_cmpxchg, __VA_ARGS__)
| ^~~~~~~~~~~~~~~~~
include/asm-generic/atomic-instrumented.h:1685:2: note: in expansion of macro 'arch_cmpxchg'
1685 | arch_cmpxchg(__ai_ptr, __VA_ARGS__); \
| ^~~~~~~~~~~~
drivers/gpu/drm/drm_lock.c:75:10: note: in expansion of macro 'cmpxchg'
75 | prev = cmpxchg(lock, old, new);
| ^~~~~~~
arch/powerpc/include/asm/cmpxchg.h:432:25: note: expected 'void *' but argument is of type 'volatile unsigned int *'
432 | __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
| ~~~~~~^~~
>> arch/powerpc/include/asm/cmpxchg.h:463:41: warning: passing argument 1 of '__cmpxchg_relaxed' discards 'volatile' qualifier from pointer target type [-Wdiscarded-qualifiers]
463 | (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
| ^~~~~
include/linux/atomic.h:75:10: note: in expansion of macro 'arch_cmpxchg_relaxed'
75 | __ret = op##_relaxed(args); \
| ^~
include/linux/atomic-arch-fallback.h:52:2: note: in expansion of macro '__atomic_op_fence'
52 | __atomic_op_fence(arch_cmpxchg, __VA_ARGS__)
| ^~~~~~~~~~~~~~~~~
include/asm-generic/atomic-instrumented.h:1685:2: note: in expansion of macro 'arch_cmpxchg'
1685 | arch_cmpxchg(__ai_ptr, __VA_ARGS__); \
| ^~~~~~~~~~~~
drivers/gpu/drm/drm_lock.c:75:10: note: in expansion of macro 'cmpxchg'
75 | prev = cmpxchg(lock, old, new);
| ^~~~~~~
arch/powerpc/include/asm/cmpxchg.h:432:25: note: expected 'void *' but argument is of type 'volatile unsigned int *'
432 | __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
| ~~~~~~^~~
drivers/gpu/drm/drm_lock.c: In function 'drm_lock_transfer':
>> arch/powerpc/include/asm/cmpxchg.h:463:41: warning: passing argument 1 of '__cmpxchg_relaxed' discards 'volatile' qualifier from pointer target type [-Wdiscarded-qualifiers]
463 | (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
| ^~~~~
include/linux/atomic.h:73:9: note: in expansion of macro 'arch_cmpxchg_relaxed'
73 | typeof(op##_relaxed(args)) __ret; \
| ^~
include/linux/atomic-arch-fallback.h:52:2: note: in expansion of macro '__atomic_op_fence'
52 | __atomic_op_fence(arch_cmpxchg, __VA_ARGS__)
| ^~~~~~~~~~~~~~~~~
include/asm-generic/atomic-instrumented.h:1685:2: note: in expansion of macro 'arch_cmpxchg'
1685 | arch_cmpxchg(__ai_ptr, __VA_ARGS__); \
| ^~~~~~~~~~~~
drivers/gpu/drm/drm_lock.c:118:10: note: in expansion of macro 'cmpxchg'
118 | prev = cmpxchg(lock, old, new);
| ^~~~~~~
arch/powerpc/include/asm/cmpxchg.h:432:25: note: expected 'void *' but argument is of type 'volatile unsigned int *'
432 | __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
| ~~~~~~^~~
>> arch/powerpc/include/asm/cmpxchg.h:463:41: warning: passing argument 1 of '__cmpxchg_relaxed' discards 'volatile' qualifier from pointer target type [-Wdiscarded-qualifiers]
463 | (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
| ^~~~~
include/linux/atomic.h:75:10: note: in expansion of macro 'arch_cmpxchg_relaxed'
75 | __ret = op##_relaxed(args); \
| ^~
include/linux/atomic-arch-fallback.h:52:2: note: in expansion of macro '__atomic_op_fence'
52 | __atomic_op_fence(arch_cmpxchg, __VA_ARGS__)
| ^~~~~~~~~~~~~~~~~
include/asm-generic/atomic-instrumented.h:1685:2: note: in expansion of macro 'arch_cmpxchg'
1685 | arch_cmpxchg(__ai_ptr, __VA_ARGS__); \
| ^~~~~~~~~~~~
drivers/gpu/drm/drm_lock.c:118:10: note: in expansion of macro 'cmpxchg'
118 | prev = cmpxchg(lock, old, new);
| ^~~~~~~
arch/powerpc/include/asm/cmpxchg.h:432:25: note: expected 'void *' but argument is of type 'volatile unsigned int *'
432 | __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
| ~~~~~~^~~
drivers/gpu/drm/drm_lock.c: In function 'drm_legacy_lock_free':
>> arch/powerpc/include/asm/cmpxchg.h:463:41: warning: passing argument 1 of '__cmpxchg_relaxed' discards 'volatile' qualifier from pointer target type [-Wdiscarded-qualifiers]
463 | (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
| ^~~~~
include/linux/atomic.h:73:9: note: in expansion of macro 'arch_cmpxchg_relaxed'
73 | typeof(op##_relaxed(args)) __ret; \
| ^~
include/linux/atomic-arch-fallback.h:52:2: note: in expansion of macro '__atomic_op_fence'
52 | __atomic_op_fence(arch_cmpxchg, __VA_ARGS__)
| ^~~~~~~~~~~~~~~~~
include/asm-generic/atomic-instrumented.h:1685:2: note: in expansion of macro 'arch_cmpxchg'
1685 | arch_cmpxchg(__ai_ptr, __VA_ARGS__); \
| ^~~~~~~~~~~~
drivers/gpu/drm/drm_lock.c:141:10: note: in expansion of macro 'cmpxchg'
141 | prev = cmpxchg(lock, old, new);
| ^~~~~~~
arch/powerpc/include/asm/cmpxchg.h:432:25: note: expected 'void *' but argument is of type 'volatile unsigned int *'
432 | __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
| ~~~~~~^~~
>> arch/powerpc/include/asm/cmpxchg.h:463:41: warning: passing argument 1 of '__cmpxchg_relaxed' discards 'volatile' qualifier from pointer target type [-Wdiscarded-qualifiers]
463 | (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
| ^~~~~
include/linux/atomic.h:75:10: note: in expansion of macro 'arch_cmpxchg_relaxed'
75 | __ret = op##_relaxed(args); \
| ^~
include/linux/atomic-arch-fallback.h:52:2: note: in expansion of macro '__atomic_op_fence'
52 | __atomic_op_fence(arch_cmpxchg, __VA_ARGS__)
| ^~~~~~~~~~~~~~~~~
include/asm-generic/atomic-instrumented.h:1685:2: note: in expansion of macro 'arch_cmpxchg'
1685 | arch_cmpxchg(__ai_ptr, __VA_ARGS__); \
| ^~~~~~~~~~~~
drivers/gpu/drm/drm_lock.c:141:10: note: in expansion of macro 'cmpxchg'
141 | prev = cmpxchg(lock, old, new);
| ^~~~~~~
arch/powerpc/include/asm/cmpxchg.h:432:25: note: expected 'void *' but argument is of type 'volatile unsigned int *'
432 | __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
| ~~~~~~^~~
drivers/gpu/drm/drm_lock.c: In function 'drm_legacy_idlelock_release':
>> arch/powerpc/include/asm/cmpxchg.h:463:41: warning: passing argument 1 of '__cmpxchg_relaxed' discards 'volatile' qualifier from pointer target type [-Wdiscarded-qualifiers]
463 | (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
| ^~~~~
include/linux/atomic.h:73:9: note: in expansion of macro 'arch_cmpxchg_relaxed'
73 | typeof(op##_relaxed(args)) __ret; \
| ^~
include/linux/atomic-arch-fallback.h:52:2: note: in expansion of macro '__atomic_op_fence'
52 | __atomic_op_fence(arch_cmpxchg, __VA_ARGS__)
| ^~~~~~~~~~~~~~~~~
include/asm-generic/atomic-instrumented.h:1685:2: note: in expansion of macro 'arch_cmpxchg'
1685 | arch_cmpxchg(__ai_ptr, __VA_ARGS__); \
| ^~~~~~~~~~~~
drivers/gpu/drm/drm_lock.c:319:12: note: in expansion of macro 'cmpxchg'
319 | prev = cmpxchg(lock, old, DRM_KERNEL_CONTEXT);
| ^~~~~~~
arch/powerpc/include/asm/cmpxchg.h:432:25: note: expected 'void *' but argument is of type 'volatile unsigned int *'
432 | __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
| ~~~~~~^~~
>> arch/powerpc/include/asm/cmpxchg.h:463:41: warning: passing argument 1 of '__cmpxchg_relaxed' discards 'volatile' qualifier from pointer target type [-Wdiscarded-qualifiers]
463 | (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
| ^~~~~
include/linux/atomic.h:75:10: note: in expansion of macro 'arch_cmpxchg_relaxed'
75 | __ret = op##_relaxed(args); \
| ^~
include/linux/atomic-arch-fallback.h:52:2: note: in expansion of macro '__atomic_op_fence'
52 | __atomic_op_fence(arch_cmpxchg, __VA_ARGS__)
| ^~~~~~~~~~~~~~~~~
include/asm-generic/atomic-instrumented.h:1685:2: note: in expansion of macro 'arch_cmpxchg'
1685 | arch_cmpxchg(__ai_ptr, __VA_ARGS__); \
| ^~~~~~~~~~~~
drivers/gpu/drm/drm_lock.c:319:12: note: in expansion of macro 'cmpxchg'
319 | prev = cmpxchg(lock, old, DRM_KERNEL_CONTEXT);
| ^~~~~~~
arch/powerpc/include/asm/cmpxchg.h:432:25: note: expected 'void *' but argument is of type 'volatile unsigned int *'
432 | __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
| ~~~~~~^~~
vim +463 arch/powerpc/include/asm/cmpxchg.h
56c08e6d226c860 Boqun Feng 2015-12-15 450
9e1bec8fe216b07 Nicholas Piggin 2020-11-11 451 #define arch_cmpxchg_local(ptr, o, n) \
ae3a197e3d0bfe3 David Howells 2012-03-28 452 ({ \
ae3a197e3d0bfe3 David Howells 2012-03-28 453 __typeof__(*(ptr)) _o_ = (o); \
ae3a197e3d0bfe3 David Howells 2012-03-28 454 __typeof__(*(ptr)) _n_ = (n); \
ae3a197e3d0bfe3 David Howells 2012-03-28 455 (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \
ae3a197e3d0bfe3 David Howells 2012-03-28 456 (unsigned long)_n_, sizeof(*(ptr))); \
ae3a197e3d0bfe3 David Howells 2012-03-28 457 })
ae3a197e3d0bfe3 David Howells 2012-03-28 458
9e1bec8fe216b07 Nicholas Piggin 2020-11-11 459 #define arch_cmpxchg_relaxed(ptr, o, n) \
56c08e6d226c860 Boqun Feng 2015-12-15 460 ({ \
56c08e6d226c860 Boqun Feng 2015-12-15 461 __typeof__(*(ptr)) _o_ = (o); \
56c08e6d226c860 Boqun Feng 2015-12-15 462 __typeof__(*(ptr)) _n_ = (n); \
56c08e6d226c860 Boqun Feng 2015-12-15 @463 (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
56c08e6d226c860 Boqun Feng 2015-12-15 464 (unsigned long)_o_, (unsigned long)_n_, \
56c08e6d226c860 Boqun Feng 2015-12-15 465 sizeof(*(ptr))); \
56c08e6d226c860 Boqun Feng 2015-12-15 466 })
56c08e6d226c860 Boqun Feng 2015-12-15 467
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 71460 bytes --]
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox