* [PATCH v6 1/3] riscv: io: avoid null-pointer arithmetic in PIO helpers
2026-07-01 12:29 [PATCH v6 0/3] riscv: fix PIO helpers and add optimized percpu ops Yunhui Cui
@ 2026-07-01 12:29 ` Yunhui Cui
2026-07-01 12:40 ` sashiko-bot
2026-07-01 12:29 ` [PATCH v6 2/3] riscv: introduce percpu.h into include/asm Yunhui Cui
2026-07-01 12:29 ` [PATCH v6 3/3] riscv: store percpu offset into thread_info Yunhui Cui
2 siblings, 1 reply; 8+ messages in thread
From: Yunhui Cui @ 2026-07-01 12:29 UTC (permalink / raw)
To: pjw, palmer, aou, alex, dennis, tj, cl, ast, daniel, andrii,
martin.lau, eddyz87, memxor, song, yonghong.song, jolsa, bjorn,
pulehui, puranjay, thuth, ajones, ben.dooks, rkrcmar, cuiyunhui,
samuel.holland, zong.li, conor.dooley, tglx, debug, seanwascoding,
andybnac, menglong8.dong, cyrilbur, wangruikang, atishp, apatel,
linux-riscv, linux-kernel, linux-mm, bpf, arnd, nathan,
nick.desaulniers+lkml, morbo, justinstitt, qingfang.deng,
linux-arch, llvm
When port I/O is not supported, exposing the port-string helpers is both
unnecessary and can make clang diagnose null-pointer arithmetic from the
PCI_IOBASE based address expression. Keep the MMIO string helpers
available as before, but only provide the port I/O variants when
CONFIG_HAS_IOPORT is enabled.
Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
---
arch/riscv/include/asm/io.h | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index 09bb5f57a9d34..92d5f831f3495 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -102,12 +102,14 @@ __io_reads_ins(reads, u32, l, __io_br(), __io_ar(addr))
#define readsw(addr, buffer, count) __readsw(addr, buffer, count)
#define readsl(addr, buffer, count) __readsl(addr, buffer, count)
+#ifdef CONFIG_HAS_IOPORT
__io_reads_ins(ins, u8, b, __io_pbr(), __io_par(addr))
__io_reads_ins(ins, u16, w, __io_pbr(), __io_par(addr))
__io_reads_ins(ins, u32, l, __io_pbr(), __io_par(addr))
#define insb(addr, buffer, count) __insb(PCI_IOBASE + (addr), buffer, count)
#define insw(addr, buffer, count) __insw(PCI_IOBASE + (addr), buffer, count)
#define insl(addr, buffer, count) __insl(PCI_IOBASE + (addr), buffer, count)
+#endif
__io_writes_outs(writes, u8, b, __io_bw(), __io_aw())
__io_writes_outs(writes, u16, w, __io_bw(), __io_aw())
@@ -116,26 +118,32 @@ __io_writes_outs(writes, u32, l, __io_bw(), __io_aw())
#define writesw(addr, buffer, count) __writesw(addr, buffer, count)
#define writesl(addr, buffer, count) __writesl(addr, buffer, count)
+#ifdef CONFIG_HAS_IOPORT
__io_writes_outs(outs, u8, b, __io_pbw(), __io_paw())
__io_writes_outs(outs, u16, w, __io_pbw(), __io_paw())
__io_writes_outs(outs, u32, l, __io_pbw(), __io_paw())
#define outsb(addr, buffer, count) __outsb(PCI_IOBASE + (addr), buffer, count)
#define outsw(addr, buffer, count) __outsw(PCI_IOBASE + (addr), buffer, count)
#define outsl(addr, buffer, count) __outsl(PCI_IOBASE + (addr), buffer, count)
+#endif
#ifdef CONFIG_64BIT
__io_reads_ins(reads, u64, q, __io_br(), __io_ar(addr))
#define readsq(addr, buffer, count) __readsq(addr, buffer, count)
+#ifdef CONFIG_HAS_IOPORT
__io_reads_ins(ins, u64, q, __io_pbr(), __io_par(addr))
#define insq(addr, buffer, count) __insq(PCI_IOBASE + (addr), buffer, count)
+#endif
__io_writes_outs(writes, u64, q, __io_bw(), __io_aw())
#define writesq(addr, buffer, count) __writesq(addr, buffer, count)
+#ifdef CONFIG_HAS_IOPORT
__io_writes_outs(outs, u64, q, __io_pbr(), __io_paw())
#define outsq(addr, buffer, count) __outsq(PCI_IOBASE + (addr), buffer, count)
#endif
+#endif
#include <asm-generic/io.h>
--
2.39.5
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v6 2/3] riscv: introduce percpu.h into include/asm
2026-07-01 12:29 [PATCH v6 0/3] riscv: fix PIO helpers and add optimized percpu ops Yunhui Cui
2026-07-01 12:29 ` [PATCH v6 1/3] riscv: io: avoid null-pointer arithmetic in PIO helpers Yunhui Cui
@ 2026-07-01 12:29 ` Yunhui Cui
2026-07-01 12:46 ` sashiko-bot
2026-07-01 13:12 ` bot+bpf-ci
2026-07-01 12:29 ` [PATCH v6 3/3] riscv: store percpu offset into thread_info Yunhui Cui
2 siblings, 2 replies; 8+ messages in thread
From: Yunhui Cui @ 2026-07-01 12:29 UTC (permalink / raw)
To: pjw, palmer, aou, alex, dennis, tj, cl, ast, daniel, andrii,
martin.lau, eddyz87, memxor, song, yonghong.song, jolsa, bjorn,
pulehui, puranjay, thuth, ajones, ben.dooks, rkrcmar, cuiyunhui,
samuel.holland, zong.li, conor.dooley, tglx, debug, seanwascoding,
andybnac, menglong8.dong, cyrilbur, wangruikang, atishp, apatel,
linux-riscv, linux-kernel, linux-mm, bpf, arnd, nathan,
nick.desaulniers+lkml, morbo, justinstitt, qingfang.deng,
linux-arch, llvm
Add RISC-V specific this_cpu helpers so common percpu operations can use
short architecture sequences instead of the generic implementation.
Native-width operations use AMOs, while 8/16-bit operations use Zabha when
available and a local 32-bit LR/SC fallback otherwise.
Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
---
arch/riscv/include/asm/percpu.h | 284 ++++++++++++++++++++++++++++++++
1 file changed, 284 insertions(+)
create mode 100644 arch/riscv/include/asm/percpu.h
diff --git a/arch/riscv/include/asm/percpu.h b/arch/riscv/include/asm/percpu.h
new file mode 100644
index 0000000000000..34483253e946f
--- /dev/null
+++ b/arch/riscv/include/asm/percpu.h
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef __ASM_PERCPU_H
+#define __ASM_PERCPU_H
+
+#include <linux/bits.h>
+#include <linux/preempt.h>
+
+#include <asm/alternative-macros.h>
+#include <asm/cmpxchg.h>
+#include <asm/cpufeature-macros.h>
+#include <asm/hwcap.h>
+
+#define PERCPU_RW_OPS(sz) \
+static inline unsigned long __percpu_read_##sz(void *ptr) \
+{ \
+ return READ_ONCE(*(u##sz *)ptr); \
+} \
+ \
+static inline void __percpu_write_##sz(void *ptr, unsigned long val) \
+{ \
+ WRITE_ONCE(*(u##sz *)ptr, (u##sz)val); \
+}
+
+PERCPU_RW_OPS(8)
+PERCPU_RW_OPS(16)
+PERCPU_RW_OPS(32)
+
+#ifdef CONFIG_64BIT
+PERCPU_RW_OPS(64)
+#endif
+
+#define __PERCPU_AMO_OP_CASE(sfx, name, sz, amo_insn) \
+static inline void \
+__percpu_##name##_amo_case_##sz(void *ptr, unsigned long val) \
+{ \
+ asm volatile ( \
+ "amo" #amo_insn #sfx " zero, %[val], %[ptr]" \
+ : [ptr] "+A" (*(u##sz *)ptr) \
+ : [val] "r" ((u##sz)(val)) \
+ : "memory"); \
+}
+
+#ifdef CONFIG_64BIT
+#define PERCPU_OP(name, amo_insn) \
+ __PERCPU_AMO_OP_CASE(.w, name, 32, amo_insn) \
+ __PERCPU_AMO_OP_CASE(.d, name, 64, amo_insn)
+#else
+#define PERCPU_OP(name, amo_insn) \
+ __PERCPU_AMO_OP_CASE(.w, name, 32, amo_insn)
+#endif
+
+PERCPU_OP(add, add)
+PERCPU_OP(andnot, and)
+PERCPU_OP(or, or)
+
+/*
+ * Currently, only this_cpu_add_return_xxx() requires a return value,
+ * and the PERCPU_RET_OP() does not account for other operations.
+ */
+#define __PERCPU_AMO_RET_OP_CASE(sfx, name, sz, amo_insn) \
+static inline u##sz \
+__percpu_##name##_return_amo_case_##sz(void *ptr, unsigned long val) \
+{ \
+ register u##sz ret; \
+ \
+ asm volatile ( \
+ "amo" #amo_insn #sfx " %[ret], %[val], %[ptr]" \
+ : [ptr] "+A" (*(u##sz *)ptr), [ret] "=r" (ret) \
+ : [val] "r" ((u##sz)(val)) \
+ : "memory"); \
+ \
+ return ret + val; \
+}
+
+#ifdef CONFIG_64BIT
+#define PERCPU_RET_OP(name, amo_insn) \
+ __PERCPU_AMO_RET_OP_CASE(.w, name, 32, amo_insn) \
+ __PERCPU_AMO_RET_OP_CASE(.d, name, 64, amo_insn)
+#else
+#define PERCPU_RET_OP(name, amo_insn) \
+ __PERCPU_AMO_RET_OP_CASE(.w, name, 32, amo_insn)
+#endif
+
+PERCPU_RET_OP(add, add)
+
+#define PERCPU_8_16_GET_SHIFT(ptr) (((unsigned long)(ptr) & 0x3) * BITS_PER_BYTE)
+#define PERCPU_8_16_GET_MASK(sz) GENMASK((sz) - 1, 0)
+#define PERCPU_8_16_GET_PTR32(ptr) ((u32 *)((unsigned long)(ptr) & ~0x3))
+
+#define PERCPU_8_16_OP(name, amo_insn, sz, sfx, val_type, new_val_expr, asm_op) \
+static inline void __percpu_##name##_amo_case_##sz(void *ptr, unsigned long val) \
+{ \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \
+ asm volatile ("amo" #amo_insn #sfx " zero, %[val], %[ptr]" \
+ : [ptr] "+A"(*(val_type *)ptr) \
+ : [val] "r"((val_type)((new_val_expr) & PERCPU_8_16_GET_MASK(sz))) \
+ : "memory"); \
+ } else { \
+ u32 *ptr32 = PERCPU_8_16_GET_PTR32(ptr); \
+ const unsigned long shift = PERCPU_8_16_GET_SHIFT(ptr); \
+ const u32 mask = PERCPU_8_16_GET_MASK(sz) << shift; \
+ const val_type val_trunc = (val_type)((new_val_expr) \
+ & PERCPU_8_16_GET_MASK(sz)); \
+ u32 old, new; \
+ val_type field; \
+ \
+ asm volatile ( \
+ "0: lr.w %0, %2\n" \
+ "and %3, %0, %4\n" \
+ "srl %3, %3, %5\n" \
+ #asm_op " %3, %3, %6\n" \
+ "and %3, %3, %8\n" \
+ "sll %3, %3, %5\n" \
+ "and %1, %0, %7\n" \
+ "or %1, %1, %3\n" \
+ "sc.w %1, %1, %2\n" \
+ "bnez %1, 0b\n" \
+ : "=&r"(old), "=&r"(new), "+A"(*ptr32), "=&r"(field) \
+ : "r"(mask), "r"(shift), "r"(val_trunc), "r"(~mask), \
+ "r"(PERCPU_8_16_GET_MASK(sz)) \
+ : "memory"); \
+ } \
+}
+
+#define PERCPU_OP_8_16(op_name, op, expr, final_op) \
+ PERCPU_8_16_OP(op_name, op, 8, .b, u8, expr, final_op); \
+ PERCPU_8_16_OP(op_name, op, 16, .h, u16, expr, final_op)
+
+PERCPU_OP_8_16(add, add, val, add)
+PERCPU_OP_8_16(andnot, and, ~(val), and)
+PERCPU_OP_8_16(or, or, val, or)
+
+#define PERCPU_8_16_RET_OP(name, amo_insn, sz, sfx, val_type, new_val_expr) \
+static inline val_type __percpu_##name##_return_amo_case_##sz(void *ptr, unsigned long val) \
+{ \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \
+ register val_type ret; \
+ asm volatile ("amo" #amo_insn #sfx " %[ret], %[val], %[ptr]" \
+ : [ptr] "+A"(*(val_type *)ptr), [ret] "=r"(ret) \
+ : [val] "r"((val_type)((new_val_expr) & PERCPU_8_16_GET_MASK(sz))) \
+ : "memory"); \
+ return ret + (val_type)((new_val_expr) & PERCPU_8_16_GET_MASK(sz)); \
+ } else { \
+ u32 *ptr32 = PERCPU_8_16_GET_PTR32(ptr); \
+ const unsigned long shift = PERCPU_8_16_GET_SHIFT(ptr); \
+ const u32 mask = (PERCPU_8_16_GET_MASK(sz) << shift); \
+ const u32 inv_mask = ~mask; \
+ const val_type val_trunc = (val_type)((new_val_expr) \
+ & PERCPU_8_16_GET_MASK(sz)); \
+ u32 old, new, field; \
+ \
+ asm volatile ( \
+ "0: lr.w %0, %3\n" \
+ "and %1, %0, %4\n" \
+ "srl %1, %1, %5\n" \
+ "add %1, %1, %6\n" \
+ "and %1, %1, %7\n" \
+ "sll %1, %1, %5\n" \
+ "and %2, %0, %8\n" \
+ "or %2, %2, %1\n" \
+ "sc.w %2, %2, %3\n" \
+ "bnez %2, 0b\n" \
+ : "=&r"(old), "=&r"(field), "=&r"(new), "+A"(*ptr32) \
+ : "r"(mask), "r"(shift), "r"(val_trunc), "r"(PERCPU_8_16_GET_MASK(sz)), \
+ "r"(inv_mask) \
+ : "memory"); \
+ return (val_type)(field >> shift); \
+ } \
+}
+
+PERCPU_8_16_RET_OP(add, add, 8, .b, u8, val)
+PERCPU_8_16_RET_OP(add, add, 16, .h, u16, val)
+
+#define _pcp_protect(op, pcp, ...) \
+({ \
+ preempt_disable_notrace(); \
+ op(raw_cpu_ptr(&(pcp)), __VA_ARGS__); \
+ preempt_enable_notrace(); \
+})
+
+#define _pcp_protect_return(op, pcp, args...) \
+({ \
+ typeof(pcp) __retval; \
+ preempt_disable_notrace(); \
+ __retval = (typeof(pcp))op(raw_cpu_ptr(&(pcp)), ##args); \
+ preempt_enable_notrace(); \
+ __retval; \
+})
+
+#define this_cpu_read_1(pcp) _pcp_protect_return(__percpu_read_8, pcp)
+#define this_cpu_read_2(pcp) _pcp_protect_return(__percpu_read_16, pcp)
+#define this_cpu_read_4(pcp) _pcp_protect_return(__percpu_read_32, pcp)
+
+#ifdef CONFIG_64BIT
+#define this_cpu_read_8(pcp) _pcp_protect_return(__percpu_read_64, pcp)
+#endif
+
+#define this_cpu_write_1(pcp, val) _pcp_protect(__percpu_write_8, pcp, (unsigned long)val)
+#define this_cpu_write_2(pcp, val) _pcp_protect(__percpu_write_16, pcp, (unsigned long)val)
+#define this_cpu_write_4(pcp, val) _pcp_protect(__percpu_write_32, pcp, (unsigned long)val)
+
+#ifdef CONFIG_64BIT
+#define this_cpu_write_8(pcp, val) _pcp_protect(__percpu_write_64, pcp, (unsigned long)val)
+#endif
+
+#define this_cpu_add_1(pcp, val) _pcp_protect(__percpu_add_amo_case_8, pcp, val)
+#define this_cpu_add_2(pcp, val) _pcp_protect(__percpu_add_amo_case_16, pcp, val)
+#define this_cpu_add_4(pcp, val) _pcp_protect(__percpu_add_amo_case_32, pcp, val)
+
+#ifdef CONFIG_64BIT
+#define this_cpu_add_8(pcp, val) _pcp_protect(__percpu_add_amo_case_64, pcp, val)
+#endif
+
+#define this_cpu_add_return_1(pcp, val) \
+_pcp_protect_return(__percpu_add_return_amo_case_8, pcp, val)
+
+#define this_cpu_add_return_2(pcp, val) \
+_pcp_protect_return(__percpu_add_return_amo_case_16, pcp, val)
+
+#define this_cpu_add_return_4(pcp, val) \
+_pcp_protect_return(__percpu_add_return_amo_case_32, pcp, val)
+
+#ifdef CONFIG_64BIT
+#define this_cpu_add_return_8(pcp, val) \
+_pcp_protect_return(__percpu_add_return_amo_case_64, pcp, val)
+#endif
+
+#define this_cpu_and_1(pcp, val) _pcp_protect(__percpu_andnot_amo_case_8, pcp, ~(val))
+#define this_cpu_and_2(pcp, val) _pcp_protect(__percpu_andnot_amo_case_16, pcp, ~(val))
+#define this_cpu_and_4(pcp, val) _pcp_protect(__percpu_andnot_amo_case_32, pcp, val)
+
+#ifdef CONFIG_64BIT
+#define this_cpu_and_8(pcp, val) _pcp_protect(__percpu_andnot_amo_case_64, pcp, val)
+#endif
+
+#define this_cpu_or_1(pcp, val) _pcp_protect(__percpu_or_amo_case_8, pcp, val)
+#define this_cpu_or_2(pcp, val) _pcp_protect(__percpu_or_amo_case_16, pcp, val)
+#define this_cpu_or_4(pcp, val) _pcp_protect(__percpu_or_amo_case_32, pcp, val)
+
+#ifdef CONFIG_64BIT
+#define this_cpu_or_8(pcp, val) _pcp_protect(__percpu_or_amo_case_64, pcp, val)
+#endif
+
+#define this_cpu_xchg_1(pcp, val) _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_2(pcp, val) _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_4(pcp, val) _pcp_protect_return(xchg_relaxed, pcp, val)
+
+#ifdef CONFIG_64BIT
+#define this_cpu_xchg_8(pcp, val) _pcp_protect_return(xchg_relaxed, pcp, val)
+#endif
+
+#define this_cpu_cmpxchg_1(pcp, o, n) _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_2(pcp, o, n) _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_4(pcp, o, n) _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+
+#ifdef CONFIG_64BIT
+#define this_cpu_cmpxchg_8(pcp, o, n) _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+
+#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n)
+#endif
+
+#ifdef system_has_cmpxchg128
+#define this_cpu_cmpxchg128(pcp, o, n) \
+({ \
+ u128 ret__; \
+ typeof(pcp) *ptr__; \
+ \
+ preempt_disable_notrace(); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
+ if (system_has_cmpxchg128()) \
+ ret__ = cmpxchg128_local(ptr__, (o), (n)); \
+ else \
+ ret__ = this_cpu_generic_cmpxchg(pcp, (o), (n)); \
+ preempt_enable_notrace(); \
+ ret__; \
+})
+#endif
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ASM_PERCPU_H */
--
2.39.5
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v6 3/3] riscv: store percpu offset into thread_info
2026-07-01 12:29 [PATCH v6 0/3] riscv: fix PIO helpers and add optimized percpu ops Yunhui Cui
2026-07-01 12:29 ` [PATCH v6 1/3] riscv: io: avoid null-pointer arithmetic in PIO helpers Yunhui Cui
2026-07-01 12:29 ` [PATCH v6 2/3] riscv: introduce percpu.h into include/asm Yunhui Cui
@ 2026-07-01 12:29 ` Yunhui Cui
2 siblings, 0 replies; 8+ messages in thread
From: Yunhui Cui @ 2026-07-01 12:29 UTC (permalink / raw)
To: pjw, palmer, aou, alex, dennis, tj, cl, ast, daniel, andrii,
martin.lau, eddyz87, memxor, song, yonghong.song, jolsa, bjorn,
pulehui, puranjay, thuth, ajones, ben.dooks, rkrcmar, cuiyunhui,
samuel.holland, zong.li, conor.dooley, tglx, debug, seanwascoding,
andybnac, menglong8.dong, cyrilbur, wangruikang, atishp, apatel,
linux-riscv, linux-kernel, linux-mm, bpf, arnd, nathan,
nick.desaulniers+lkml, morbo, justinstitt, qingfang.deng,
linux-arch, llvm
RISC-V percpu addressing currently derives the base offset from the CPU
number and __per_cpu_offset[]. Cache the current CPU percpu offset in
thread_info so percpu accesses can load it directly.
Keep the cached value up to date for the boot CPU, context switches and
secondary CPU bringup. Initialize secondary idle tasks before they start
running so early percpu accesses use the secondary CPU offset rather than
inheriting the boot CPU value.
Link: https://lists.riscv.org/g/tech-privileged/topic/risc_v_tech_arch_review/113437553?page=2
Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
---
arch/riscv/include/asm/asm.h | 6 +-----
arch/riscv/include/asm/percpu.h | 4 ++++
arch/riscv/include/asm/switch_to.h | 8 ++++++++
arch/riscv/include/asm/thread_info.h | 3 ++-
arch/riscv/kernel/asm-offsets.c | 1 +
arch/riscv/kernel/smpboot.c | 8 ++++++++
arch/riscv/net/bpf_jit_comp64.c | 9 +--------
7 files changed, 25 insertions(+), 14 deletions(-)
diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
index e9e8ba83e632f..137a49488325e 100644
--- a/arch/riscv/include/asm/asm.h
+++ b/arch/riscv/include/asm/asm.h
@@ -91,11 +91,7 @@
#ifdef CONFIG_SMP
.macro asm_per_cpu dst sym tmp
- lw \tmp, TASK_TI_CPU_NUM(tp)
- slli \tmp, \tmp, RISCV_LGPTR
- la \dst, __per_cpu_offset
- add \dst, \dst, \tmp
- REG_L \tmp, 0(\dst)
+ REG_L \tmp, TASK_TI_PCPU_OFFSET(tp)
la \dst, \sym
add \dst, \dst, \tmp
.endm
diff --git a/arch/riscv/include/asm/percpu.h b/arch/riscv/include/asm/percpu.h
index 34483253e946f..52926c27f1f00 100644
--- a/arch/riscv/include/asm/percpu.h
+++ b/arch/riscv/include/asm/percpu.h
@@ -9,7 +9,9 @@
#include <asm/alternative-macros.h>
#include <asm/cmpxchg.h>
#include <asm/cpufeature-macros.h>
+#include <asm/current.h>
#include <asm/hwcap.h>
+#include <asm/thread_info.h>
#define PERCPU_RW_OPS(sz) \
static inline unsigned long __percpu_read_##sz(void *ptr) \
@@ -279,6 +281,8 @@ _pcp_protect_return(__percpu_add_return_amo_case_64, pcp, val)
})
#endif
+#define __my_cpu_offset (((struct thread_info *)current)->pcpu_offset)
+
#include <asm-generic/percpu.h>
#endif /* __ASM_PERCPU_H */
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index 0e71eb82f920c..733b6cd306e40 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -88,6 +88,13 @@ static inline void __switch_to_envcfg(struct task_struct *next)
:: "r" (next->thread.envcfg) : "memory");
}
+static inline void __switch_to_pcpu_offset(struct task_struct *next)
+{
+#ifdef CONFIG_SMP
+ next->thread_info.pcpu_offset = __my_cpu_offset;
+#endif
+}
+
extern struct task_struct *__switch_to(struct task_struct *,
struct task_struct *);
@@ -122,6 +129,7 @@ do { \
if (switch_to_should_flush_icache(__next)) \
local_flush_icache_all(); \
__switch_to_envcfg(__next); \
+ __switch_to_pcpu_offset(__next); \
((last) = __switch_to(__prev, __next)); \
} while (0)
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 55019fdfa9eca..f10ba62b61016 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -53,6 +53,7 @@
struct thread_info {
unsigned long flags; /* low level flags */
int preempt_count; /* 0=>preemptible, <0=>BUG */
+ int cpu;
/*
* These stack pointers are overwritten on every system call or
* exception. SP is also saved to the stack it can be recovered when
@@ -60,8 +61,8 @@ struct thread_info {
*/
long kernel_sp; /* Kernel stack pointer */
long user_sp; /* User stack pointer */
- int cpu;
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
+ unsigned long pcpu_offset;
#ifdef CONFIG_SHADOW_CALL_STACK
void *scs_base;
void *scs_sp;
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index a75f0cfea1e9f..20d46c28fdde9 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -38,6 +38,7 @@ void asm_offsets(void)
OFFSET(TASK_THREAD_SUM, task_struct, thread.sum);
OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu);
+ OFFSET(TASK_TI_PCPU_OFFSET, task_struct, thread_info.pcpu_offset);
OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp);
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index f6ef57930b50a..7876854d16279 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -191,6 +191,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int ret;
tidle->thread_info.cpu = cpu;
+ tidle->thread_info.pcpu_offset = per_cpu_offset(cpu);
ret = start_secondary_cpu(cpu, tidle);
if (!ret) {
@@ -208,6 +209,11 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
}
#endif
+void __init smp_prepare_boot_cpu(void)
+{
+ __my_cpu_offset = per_cpu_offset(smp_processor_id());
+}
+
void __init smp_cpus_done(unsigned int max_cpus)
{
}
@@ -233,6 +239,8 @@ asmlinkage __visible void smp_callin(void)
mmgrab(mm);
current->active_mm = mm;
+ __my_cpu_offset = per_cpu_offset(smp_processor_id());
+
#ifdef CONFIG_HOTPLUG_PARALLEL
cpuhp_ap_sync_alive();
#endif
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index c03c1de16b79a..eab93d5258e9e 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -1395,15 +1395,8 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
if (rd != rs)
emit_mv(rd, rs, ctx);
#ifdef CONFIG_SMP
- /* Load current CPU number in T1 */
- emit_lw(RV_REG_T1, offsetof(struct thread_info, cpu),
+ emit_ld(RV_REG_T1, offsetof(struct thread_info, pcpu_offset),
RV_REG_TP, ctx);
- /* Load address of __per_cpu_offset array in T2 */
- emit_addr(RV_REG_T2, (u64)&__per_cpu_offset, extra_pass, ctx);
- /* Get address of __per_cpu_offset[cpu] in T1 */
- emit_sh3add(RV_REG_T1, RV_REG_T1, RV_REG_T2, ctx);
- /* Load __per_cpu_offset[cpu] in T1 */
- emit_ld(RV_REG_T1, 0, RV_REG_T1, ctx);
/* Add the offset to Rd */
emit_add(rd, rd, RV_REG_T1, ctx);
#endif
--
2.39.5
^ permalink raw reply related [flat|nested] 8+ messages in thread