Building the Linux kernel with Clang and LLVM
 help / color / mirror / Atom feed
* [PATCH v5 0/3] riscv: improve percpu helpers and PIO mapping
@ 2026-07-01  4:20 Yunhui Cui
  2026-07-01  4:20 ` [PATCH v5 1/3] riscv: io: avoid null-pointer arithmetic in PIO helpers Yunhui Cui
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Yunhui Cui @ 2026-07-01  4:20 UTC (permalink / raw)
  To: pjw, palmer, aou, alex, dennis, tj, cl, ast, daniel, andrii,
	martin.lau, eddyz87, memxor, song, yonghong.song, jolsa, bjorn,
	pulehui, puranjay, thuth, ajones, ben.dooks, rkrcmar, cuiyunhui,
	samuel.holland, zong.li, conor.dooley, tglx, debug, seanwascoding,
	andybnac, menglong8.dong, cyrilbur, wangruikang, atishp, apatel,
	linux-riscv, linux-kernel, linux-mm, bpf, arnd, nathan,
	nick.desaulniers+lkml, morbo, justinstitt, qingfang.deng,
	linux-arch, llvm

Changes in v5:
- Keep the PIO helper fix local to RISC-V: drop the generic
  HAS_IOPORT_MAP dependency change and the generic ioport_map() change
  from v4.
- Add the missing linux/bits.h include for GENMASK and BITS_PER_BYTE.
- Fix the 8/16-bit LR/SC fallback to mask subword results before writing
  them back.
- Use early-clobber constraints in the 8/16-bit add_return LR/SC fallback.
- Fix this_cpu_and_4/8 to pass the intended operand to RISC-V amoand.
- Initialize the secondary idle task pcpu_offset before starting the CPU.

Yunhui Cui (3):
  riscv: io: avoid null-pointer arithmetic in PIO helpers
  riscv: introduce percpu.h into include/asm
  riscv: store percpu offset into thread_info

 arch/riscv/include/asm/asm.h         |   6 +-
 arch/riscv/include/asm/io.h          |  26 ++-
 arch/riscv/include/asm/percpu.h      | 287 +++++++++++++++++++++++++++
 arch/riscv/include/asm/switch_to.h   |   8 +
 arch/riscv/include/asm/thread_info.h |   3 +-
 arch/riscv/kernel/asm-offsets.c      |   1 +
 arch/riscv/kernel/smpboot.c          |   8 +
 arch/riscv/net/bpf_jit_comp64.c      |   9 +-
 8 files changed, 326 insertions(+), 22 deletions(-)
 create mode 100644 arch/riscv/include/asm/percpu.h

-- 
2.39.5


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v5 1/3] riscv: io: avoid null-pointer arithmetic in PIO helpers
  2026-07-01  4:20 [PATCH v5 0/3] riscv: improve percpu helpers and PIO mapping Yunhui Cui
@ 2026-07-01  4:20 ` Yunhui Cui
  2026-07-01  6:24   ` Arnd Bergmann
  2026-07-01  4:20 ` [PATCH v5 2/3] riscv: introduce percpu.h into include/asm Yunhui Cui
  2026-07-01  4:20 ` [PATCH v5 3/3] riscv: store percpu offset into thread_info Yunhui Cui
  2 siblings, 1 reply; 8+ messages in thread
From: Yunhui Cui @ 2026-07-01  4:20 UTC (permalink / raw)
  To: pjw, palmer, aou, alex, dennis, tj, cl, ast, daniel, andrii,
	martin.lau, eddyz87, memxor, song, yonghong.song, jolsa, bjorn,
	pulehui, puranjay, thuth, ajones, ben.dooks, rkrcmar, cuiyunhui,
	samuel.holland, zong.li, conor.dooley, tglx, debug, seanwascoding,
	andybnac, menglong8.dong, cyrilbur, wangruikang, atishp, apatel,
	linux-riscv, linux-kernel, linux-mm, bpf, arnd, nathan,
	nick.desaulniers+lkml, morbo, justinstitt, qingfang.deng,
	linux-arch, llvm

RISC-V implements its own string I/O helpers so port I/O can use the
architecture-specific ordering rules.  These helpers build an I/O address
from PCI_IOBASE and the port number before calling the raw accessors.

When port I/O is not supported, exposing the port-string helpers is both
unnecessary and can make clang diagnose null-pointer arithmetic from the
PCI_IOBASE based address expression.  Keep the MMIO string helpers
available as before, but only provide the port I/O variants when
CONFIG_HAS_IOPORT is enabled.

Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
---
 arch/riscv/include/asm/io.h | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index 09bb5f57a9d34..6f5d70313c83e 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -56,6 +56,8 @@
 #define __io_pbw()	RISCV_FENCE(iow, o)
 #define __io_paw()	RISCV_FENCE(o, io)
 
+#define PCI_IO_ADDR(addr)	((void __iomem *)((unsigned long)PCI_IOBASE + (addr)))
+
 /*
  * Accesses from a single hart to a single I/O address must be ordered.  This
  * allows us to use the raw read macros, but we still need to fence before and
@@ -102,12 +104,14 @@ __io_reads_ins(reads, u32, l, __io_br(), __io_ar(addr))
 #define readsw(addr, buffer, count) __readsw(addr, buffer, count)
 #define readsl(addr, buffer, count) __readsl(addr, buffer, count)
 
+#ifdef CONFIG_HAS_IOPORT
 __io_reads_ins(ins,  u8, b, __io_pbr(), __io_par(addr))
 __io_reads_ins(ins, u16, w, __io_pbr(), __io_par(addr))
 __io_reads_ins(ins, u32, l, __io_pbr(), __io_par(addr))
-#define insb(addr, buffer, count) __insb(PCI_IOBASE + (addr), buffer, count)
-#define insw(addr, buffer, count) __insw(PCI_IOBASE + (addr), buffer, count)
-#define insl(addr, buffer, count) __insl(PCI_IOBASE + (addr), buffer, count)
+#define insb(addr, buffer, count) __insb(PCI_IO_ADDR(addr), buffer, count)
+#define insw(addr, buffer, count) __insw(PCI_IO_ADDR(addr), buffer, count)
+#define insl(addr, buffer, count) __insl(PCI_IO_ADDR(addr), buffer, count)
+#endif
 
 __io_writes_outs(writes,  u8, b, __io_bw(), __io_aw())
 __io_writes_outs(writes, u16, w, __io_bw(), __io_aw())
@@ -116,25 +120,31 @@ __io_writes_outs(writes, u32, l, __io_bw(), __io_aw())
 #define writesw(addr, buffer, count) __writesw(addr, buffer, count)
 #define writesl(addr, buffer, count) __writesl(addr, buffer, count)
 
+#ifdef CONFIG_HAS_IOPORT
 __io_writes_outs(outs,  u8, b, __io_pbw(), __io_paw())
 __io_writes_outs(outs, u16, w, __io_pbw(), __io_paw())
 __io_writes_outs(outs, u32, l, __io_pbw(), __io_paw())
-#define outsb(addr, buffer, count) __outsb(PCI_IOBASE + (addr), buffer, count)
-#define outsw(addr, buffer, count) __outsw(PCI_IOBASE + (addr), buffer, count)
-#define outsl(addr, buffer, count) __outsl(PCI_IOBASE + (addr), buffer, count)
+#define outsb(addr, buffer, count) __outsb(PCI_IO_ADDR(addr), buffer, count)
+#define outsw(addr, buffer, count) __outsw(PCI_IO_ADDR(addr), buffer, count)
+#define outsl(addr, buffer, count) __outsl(PCI_IO_ADDR(addr), buffer, count)
+#endif
 
 #ifdef CONFIG_64BIT
 __io_reads_ins(reads, u64, q, __io_br(), __io_ar(addr))
 #define readsq(addr, buffer, count) __readsq(addr, buffer, count)
 
+#ifdef CONFIG_HAS_IOPORT
 __io_reads_ins(ins, u64, q, __io_pbr(), __io_par(addr))
-#define insq(addr, buffer, count) __insq(PCI_IOBASE + (addr), buffer, count)
+#define insq(addr, buffer, count) __insq(PCI_IO_ADDR(addr), buffer, count)
+#endif
 
 __io_writes_outs(writes, u64, q, __io_bw(), __io_aw())
 #define writesq(addr, buffer, count) __writesq(addr, buffer, count)
 
+#ifdef CONFIG_HAS_IOPORT
 __io_writes_outs(outs, u64, q, __io_pbr(), __io_paw())
-#define outsq(addr, buffer, count) __outsq(PCI_IOBASE + (addr), buffer, count)
+#define outsq(addr, buffer, count) __outsq(PCI_IO_ADDR(addr), buffer, count)
+#endif
 #endif
 
 #include <asm-generic/io.h>
-- 
2.39.5


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v5 2/3] riscv: introduce percpu.h into include/asm
  2026-07-01  4:20 [PATCH v5 0/3] riscv: improve percpu helpers and PIO mapping Yunhui Cui
  2026-07-01  4:20 ` [PATCH v5 1/3] riscv: io: avoid null-pointer arithmetic in PIO helpers Yunhui Cui
@ 2026-07-01  4:20 ` Yunhui Cui
  2026-07-01  5:05   ` bot+bpf-ci
  2026-07-01  4:20 ` [PATCH v5 3/3] riscv: store percpu offset into thread_info Yunhui Cui
  2 siblings, 1 reply; 8+ messages in thread
From: Yunhui Cui @ 2026-07-01  4:20 UTC (permalink / raw)
  To: pjw, palmer, aou, alex, dennis, tj, cl, ast, daniel, andrii,
	martin.lau, eddyz87, memxor, song, yonghong.song, jolsa, bjorn,
	pulehui, puranjay, thuth, ajones, ben.dooks, rkrcmar, cuiyunhui,
	samuel.holland, zong.li, conor.dooley, tglx, debug, seanwascoding,
	andybnac, menglong8.dong, cyrilbur, wangruikang, atishp, apatel,
	linux-riscv, linux-kernel, linux-mm, bpf, arnd, nathan,
	nick.desaulniers+lkml, morbo, justinstitt, qingfang.deng,
	linux-arch, llvm

Add RISC-V specific this_cpu helpers so common percpu operations can use
short architecture sequences instead of the generic implementation.
Native-width operations use AMOs, while 8/16-bit operations use Zabha when
available and a local 32-bit LR/SC fallback otherwise.

Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
---
 arch/riscv/include/asm/percpu.h | 283 ++++++++++++++++++++++++++++++++
 1 file changed, 283 insertions(+)
 create mode 100644 arch/riscv/include/asm/percpu.h

diff --git a/arch/riscv/include/asm/percpu.h b/arch/riscv/include/asm/percpu.h
new file mode 100644
index 0000000000000..46f1901d3bb88
--- /dev/null
+++ b/arch/riscv/include/asm/percpu.h
@@ -0,0 +1,283 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef __ASM_PERCPU_H
+#define __ASM_PERCPU_H
+
+#include <linux/bits.h>
+#include <linux/preempt.h>
+
+#include <asm/alternative-macros.h>
+#include <asm/cpufeature-macros.h>
+#include <asm/hwcap.h>
+
+#define PERCPU_RW_OPS(sz)						\
+static inline unsigned long __percpu_read_##sz(void *ptr)		\
+{									\
+	return READ_ONCE(*(u##sz *)ptr);				\
+}									\
+									\
+static inline void __percpu_write_##sz(void *ptr, unsigned long val)	\
+{									\
+	WRITE_ONCE(*(u##sz *)ptr, (u##sz)val);				\
+}
+
+PERCPU_RW_OPS(8)
+PERCPU_RW_OPS(16)
+PERCPU_RW_OPS(32)
+
+#ifdef CONFIG_64BIT
+PERCPU_RW_OPS(64)
+#endif
+
+#define __PERCPU_AMO_OP_CASE(sfx, name, sz, amo_insn)			\
+static inline void							\
+__percpu_##name##_amo_case_##sz(void *ptr, unsigned long val)		\
+{									\
+	asm volatile (							\
+		"amo" #amo_insn #sfx " zero, %[val], %[ptr]"		\
+		: [ptr] "+A" (*(u##sz *)ptr)				\
+		: [val] "r" ((u##sz)(val))				\
+		: "memory");						\
+}
+
+#ifdef CONFIG_64BIT
+#define PERCPU_OP(name, amo_insn)					\
+	__PERCPU_AMO_OP_CASE(.w, name, 32, amo_insn)			\
+	__PERCPU_AMO_OP_CASE(.d, name, 64, amo_insn)
+#else
+#define PERCPU_OP(name, amo_insn)					\
+	__PERCPU_AMO_OP_CASE(.w, name, 32, amo_insn)
+#endif
+
+PERCPU_OP(add, add)
+PERCPU_OP(andnot, and)
+PERCPU_OP(or, or)
+
+/*
+ * Currently, only this_cpu_add_return_xxx() requires a return value,
+ * and the PERCPU_RET_OP() does not account for other operations.
+ */
+#define __PERCPU_AMO_RET_OP_CASE(sfx, name, sz, amo_insn)		\
+static inline u##sz							\
+__percpu_##name##_return_amo_case_##sz(void *ptr, unsigned long val)	\
+{									\
+	register u##sz ret;						\
+									\
+	asm volatile (							\
+		"amo" #amo_insn #sfx " %[ret], %[val], %[ptr]"		\
+		: [ptr] "+A" (*(u##sz *)ptr), [ret] "=r" (ret)		\
+		: [val] "r" ((u##sz)(val))				\
+		: "memory");						\
+									\
+	return ret + val;						\
+}
+
+#ifdef CONFIG_64BIT
+#define PERCPU_RET_OP(name, amo_insn)					\
+	__PERCPU_AMO_RET_OP_CASE(.w, name, 32, amo_insn)		\
+	__PERCPU_AMO_RET_OP_CASE(.d, name, 64, amo_insn)
+#else
+#define PERCPU_RET_OP(name, amo_insn)					\
+	__PERCPU_AMO_RET_OP_CASE(.w, name, 32, amo_insn)
+#endif
+
+PERCPU_RET_OP(add, add)
+
+#define PERCPU_8_16_GET_SHIFT(ptr)	(((unsigned long)(ptr) & 0x3) * BITS_PER_BYTE)
+#define PERCPU_8_16_GET_MASK(sz)	GENMASK((sz) - 1, 0)
+#define PERCPU_8_16_GET_PTR32(ptr)	((u32 *)((unsigned long)(ptr) & ~0x3))
+
+#define PERCPU_8_16_OP(name, amo_insn, sz, sfx, val_type, new_val_expr, asm_op)			\
+static inline void __percpu_##name##_amo_case_##sz(void *ptr, unsigned long val)		\
+{												\
+	if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&						\
+		riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) {				\
+		asm volatile ("amo" #amo_insn #sfx " zero, %[val], %[ptr]"			\
+			: [ptr] "+A"(*(val_type *)ptr)						\
+			: [val] "r"((val_type)((new_val_expr) & PERCPU_8_16_GET_MASK(sz)))	\
+			: "memory");								\
+	} else {										\
+		u32 *ptr32 = PERCPU_8_16_GET_PTR32(ptr);					\
+		const unsigned long shift = PERCPU_8_16_GET_SHIFT(ptr);				\
+		const u32 mask = PERCPU_8_16_GET_MASK(sz) << shift;				\
+		const val_type val_trunc = (val_type)((new_val_expr)				\
+					   & PERCPU_8_16_GET_MASK(sz));				\
+		u32 retx, rc;									\
+		val_type new_val_type;								\
+												\
+		asm volatile (									\
+			"0: lr.w %0, %2\n"							\
+			"and %3, %0, %4\n"							\
+			"srl %3, %3, %5\n"							\
+			#asm_op " %3, %3, %6\n"							\
+			"and %3, %3, %8\n"						\
+			"sll %3, %3, %5\n"							\
+			"and %1, %0, %7\n"							\
+			"or %1, %1, %3\n"							\
+			"sc.w %1, %1, %2\n"							\
+			"bnez %1, 0b\n"								\
+			: "=&r"(retx), "=&r"(rc), "+A"(*ptr32), "=&r"(new_val_type)		\
+			: "r"(mask), "r"(shift), "r"(val_trunc), "r"(~mask),			\
+			  "r"(PERCPU_8_16_GET_MASK(sz))						\
+			: "memory");								\
+		}										\
+}
+
+#define PERCPU_OP_8_16(op_name, op, expr, final_op)			\
+	PERCPU_8_16_OP(op_name, op, 8, .b, u8, expr, final_op);		\
+	PERCPU_8_16_OP(op_name, op, 16, .h, u16, expr, final_op)
+
+PERCPU_OP_8_16(add, add, val, add)
+PERCPU_OP_8_16(andnot, and, ~(val), and)
+PERCPU_OP_8_16(or, or, val, or)
+
+#define PERCPU_8_16_RET_OP(name, amo_insn, sz, sfx, val_type, new_val_expr)			\
+static inline val_type __percpu_##name##_return_amo_case_##sz(void *ptr, unsigned long val)	\
+{												\
+	if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&						\
+		riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) {				\
+		register val_type ret;								\
+		asm volatile ("amo" #amo_insn #sfx " %[ret], %[val], %[ptr]"			\
+			: [ptr] "+A"(*(val_type *)ptr), [ret] "=r"(ret)				\
+			: [val] "r"((val_type)((new_val_expr) & PERCPU_8_16_GET_MASK(sz)))	\
+			: "memory");								\
+		return ret + (val_type)((new_val_expr) & PERCPU_8_16_GET_MASK(sz));		\
+	} else {										\
+		u32 *ptr32 = PERCPU_8_16_GET_PTR32(ptr);					\
+		const unsigned long shift = PERCPU_8_16_GET_SHIFT(ptr);				\
+		const u32 mask = (PERCPU_8_16_GET_MASK(sz) << shift);				\
+		const u32 inv_mask = ~mask;							\
+		const val_type val_trunc = (val_type)((new_val_expr)				\
+					   & PERCPU_8_16_GET_MASK(sz));				\
+		u32 old, new, tmp;								\
+												\
+		asm volatile (									\
+			"0: lr.w %0, %3\n"							\
+			"and %1, %0, %4\n"							\
+			"srl %1, %1, %5\n"							\
+			"add %1, %1, %6\n"							\
+			"and %1, %1, %7\n"							\
+			"sll %1, %1, %5\n"							\
+			"and %2, %0, %8\n"							\
+			"or %2, %2, %1\n"							\
+			"sc.w %2, %2, %3\n"							\
+			"bnez %2, 0b\n"								\
+			: "=&r"(old), "=&r"(tmp), "=&r"(new), "+A"(*ptr32)			\
+			: "r"(mask), "r"(shift), "r"(val_trunc), "r"(PERCPU_8_16_GET_MASK(sz)), \
+			"r"(inv_mask)								\
+			: "memory");								\
+		return (val_type)(tmp >> shift);						\
+	}											\
+}
+
+PERCPU_8_16_RET_OP(add, add, 8, .b, u8, val)
+PERCPU_8_16_RET_OP(add, add, 16, .h, u16, val)
+
+#define _pcp_protect(op, pcp, ...)					\
+({									\
+	preempt_disable_notrace();					\
+	op(raw_cpu_ptr(&(pcp)), __VA_ARGS__);				\
+	preempt_enable_notrace();					\
+})
+
+#define _pcp_protect_return(op, pcp, args...)				\
+({									\
+	typeof(pcp) __retval;						\
+	preempt_disable_notrace();					\
+	__retval = (typeof(pcp))op(raw_cpu_ptr(&(pcp)), ##args);	\
+	preempt_enable_notrace();					\
+	__retval;							\
+})
+
+#define this_cpu_read_1(pcp)		_pcp_protect_return(__percpu_read_8, pcp)
+#define this_cpu_read_2(pcp)		_pcp_protect_return(__percpu_read_16, pcp)
+#define this_cpu_read_4(pcp)		_pcp_protect_return(__percpu_read_32, pcp)
+
+#ifdef CONFIG_64BIT
+#define this_cpu_read_8(pcp)		_pcp_protect_return(__percpu_read_64, pcp)
+#endif
+
+#define this_cpu_write_1(pcp, val)	_pcp_protect(__percpu_write_8, pcp, (unsigned long)val)
+#define this_cpu_write_2(pcp, val)	_pcp_protect(__percpu_write_16, pcp, (unsigned long)val)
+#define this_cpu_write_4(pcp, val)	_pcp_protect(__percpu_write_32, pcp, (unsigned long)val)
+
+#ifdef CONFIG_64BIT
+#define this_cpu_write_8(pcp, val)	_pcp_protect(__percpu_write_64, pcp, (unsigned long)val)
+#endif
+
+#define this_cpu_add_1(pcp, val)	_pcp_protect(__percpu_add_amo_case_8, pcp, val)
+#define this_cpu_add_2(pcp, val)	_pcp_protect(__percpu_add_amo_case_16, pcp, val)
+#define this_cpu_add_4(pcp, val)	_pcp_protect(__percpu_add_amo_case_32, pcp, val)
+
+#ifdef CONFIG_64BIT
+#define this_cpu_add_8(pcp, val)	_pcp_protect(__percpu_add_amo_case_64, pcp, val)
+#endif
+
+#define this_cpu_add_return_1(pcp, val)		\
+_pcp_protect_return(__percpu_add_return_amo_case_8, pcp, val)
+
+#define this_cpu_add_return_2(pcp, val)		\
+_pcp_protect_return(__percpu_add_return_amo_case_16, pcp, val)
+
+#define this_cpu_add_return_4(pcp, val)		\
+_pcp_protect_return(__percpu_add_return_amo_case_32, pcp, val)
+
+#ifdef CONFIG_64BIT
+#define this_cpu_add_return_8(pcp, val)		\
+_pcp_protect_return(__percpu_add_return_amo_case_64, pcp, val)
+#endif
+
+#define this_cpu_and_1(pcp, val)	_pcp_protect(__percpu_andnot_amo_case_8, pcp, ~(val))
+#define this_cpu_and_2(pcp, val)	_pcp_protect(__percpu_andnot_amo_case_16, pcp, ~(val))
+#define this_cpu_and_4(pcp, val)	_pcp_protect(__percpu_andnot_amo_case_32, pcp, val)
+
+#ifdef CONFIG_64BIT
+#define this_cpu_and_8(pcp, val)	_pcp_protect(__percpu_andnot_amo_case_64, pcp, val)
+#endif
+
+#define this_cpu_or_1(pcp, val)	_pcp_protect(__percpu_or_amo_case_8, pcp, val)
+#define this_cpu_or_2(pcp, val)	_pcp_protect(__percpu_or_amo_case_16, pcp, val)
+#define this_cpu_or_4(pcp, val)	_pcp_protect(__percpu_or_amo_case_32, pcp, val)
+
+#ifdef CONFIG_64BIT
+#define this_cpu_or_8(pcp, val)	_pcp_protect(__percpu_or_amo_case_64, pcp, val)
+#endif
+
+#define this_cpu_xchg_1(pcp, val)	_pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_2(pcp, val)	_pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_4(pcp, val)	_pcp_protect_return(xchg_relaxed, pcp, val)
+
+#ifdef CONFIG_64BIT
+#define this_cpu_xchg_8(pcp, val)	_pcp_protect_return(xchg_relaxed, pcp, val)
+#endif
+
+#define this_cpu_cmpxchg_1(pcp, o, n)	_pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_2(pcp, o, n)	_pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_4(pcp, o, n)	_pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+
+#ifdef CONFIG_64BIT
+#define this_cpu_cmpxchg_8(pcp, o, n)	_pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+
+#define this_cpu_cmpxchg64(pcp, o, n)	this_cpu_cmpxchg_8(pcp, o, n)
+#endif
+
+#ifdef system_has_cmpxchg128
+#define this_cpu_cmpxchg128(pcp, o, n)					\
+({									\
+	u128 ret__;							\
+	typeof(pcp) *ptr__;						\
+									\
+	preempt_disable_notrace();					\
+	ptr__ = raw_cpu_ptr(&(pcp));					\
+	if (system_has_cmpxchg128())					\
+		ret__ = cmpxchg128_local(ptr__, (o), (n));		\
+	else								\
+		ret__ = this_cpu_generic_cmpxchg(pcp, (o), (n));	\
+	preempt_enable_notrace();					\
+	ret__;								\
+})
+#endif
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ASM_PERCPU_H */
-- 
2.39.5


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v5 3/3] riscv: store percpu offset into thread_info
  2026-07-01  4:20 [PATCH v5 0/3] riscv: improve percpu helpers and PIO mapping Yunhui Cui
  2026-07-01  4:20 ` [PATCH v5 1/3] riscv: io: avoid null-pointer arithmetic in PIO helpers Yunhui Cui
  2026-07-01  4:20 ` [PATCH v5 2/3] riscv: introduce percpu.h into include/asm Yunhui Cui
@ 2026-07-01  4:20 ` Yunhui Cui
  2 siblings, 0 replies; 8+ messages in thread
From: Yunhui Cui @ 2026-07-01  4:20 UTC (permalink / raw)
  To: pjw, palmer, aou, alex, dennis, tj, cl, ast, daniel, andrii,
	martin.lau, eddyz87, memxor, song, yonghong.song, jolsa, bjorn,
	pulehui, puranjay, thuth, ajones, ben.dooks, rkrcmar, cuiyunhui,
	samuel.holland, zong.li, conor.dooley, tglx, debug, seanwascoding,
	andybnac, menglong8.dong, cyrilbur, wangruikang, atishp, apatel,
	linux-riscv, linux-kernel, linux-mm, bpf, arnd, nathan,
	nick.desaulniers+lkml, morbo, justinstitt, qingfang.deng,
	linux-arch, llvm

RISC-V percpu addressing currently derives the base offset from the CPU
number and __per_cpu_offset[]. Cache the current CPU percpu offset in
thread_info so percpu accesses can load it directly.

Keep the cached value up to date for the boot CPU, context switches and
secondary CPU bringup. Initialize secondary idle tasks before they start
running so early percpu accesses use the secondary CPU offset rather than
inheriting the boot CPU value.

Link: https://lists.riscv.org/g/tech-privileged/topic/risc_v_tech_arch_review/113437553?page=2
Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
---
 arch/riscv/include/asm/asm.h         | 6 +-----
 arch/riscv/include/asm/percpu.h      | 4 ++++
 arch/riscv/include/asm/switch_to.h   | 8 ++++++++
 arch/riscv/include/asm/thread_info.h | 3 ++-
 arch/riscv/kernel/asm-offsets.c      | 1 +
 arch/riscv/kernel/smpboot.c          | 8 ++++++++
 arch/riscv/net/bpf_jit_comp64.c      | 9 +--------
 7 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
index e9e8ba83e632f..137a49488325e 100644
--- a/arch/riscv/include/asm/asm.h
+++ b/arch/riscv/include/asm/asm.h
@@ -91,11 +91,7 @@
 
 #ifdef CONFIG_SMP
 .macro asm_per_cpu dst sym tmp
-	lw    \tmp, TASK_TI_CPU_NUM(tp)
-	slli  \tmp, \tmp, RISCV_LGPTR
-	la    \dst, __per_cpu_offset
-	add   \dst, \dst, \tmp
-	REG_L \tmp, 0(\dst)
+	REG_L \tmp, TASK_TI_PCPU_OFFSET(tp)
 	la    \dst, \sym
 	add   \dst, \dst, \tmp
 .endm
diff --git a/arch/riscv/include/asm/percpu.h b/arch/riscv/include/asm/percpu.h
index 46f1901d3bb88..dad975b55d0c3 100644
--- a/arch/riscv/include/asm/percpu.h
+++ b/arch/riscv/include/asm/percpu.h
@@ -8,7 +8,9 @@
 
 #include <asm/alternative-macros.h>
 #include <asm/cpufeature-macros.h>
+#include <asm/current.h>
 #include <asm/hwcap.h>
+#include <asm/thread_info.h>
 
 #define PERCPU_RW_OPS(sz)						\
 static inline unsigned long __percpu_read_##sz(void *ptr)		\
@@ -278,6 +280,8 @@ _pcp_protect_return(__percpu_add_return_amo_case_64, pcp, val)
 })
 #endif
 
+#define __my_cpu_offset (((struct thread_info *)current)->pcpu_offset)
+
 #include <asm-generic/percpu.h>
 
 #endif /* __ASM_PERCPU_H */
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index 0e71eb82f920c..733b6cd306e40 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -88,6 +88,13 @@ static inline void __switch_to_envcfg(struct task_struct *next)
 			:: "r" (next->thread.envcfg) : "memory");
 }
 
+static inline void __switch_to_pcpu_offset(struct task_struct *next)
+{
+#ifdef CONFIG_SMP
+	next->thread_info.pcpu_offset = __my_cpu_offset;
+#endif
+}
+
 extern struct task_struct *__switch_to(struct task_struct *,
 				       struct task_struct *);
 
@@ -122,6 +129,7 @@ do {							\
 	if (switch_to_should_flush_icache(__next))	\
 		local_flush_icache_all();		\
 	__switch_to_envcfg(__next);			\
+	__switch_to_pcpu_offset(__next);		\
 	((last) = __switch_to(__prev, __next));		\
 } while (0)
 
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 55019fdfa9eca..f10ba62b61016 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -53,6 +53,7 @@
 struct thread_info {
 	unsigned long		flags;		/* low level flags */
 	int                     preempt_count;  /* 0=>preemptible, <0=>BUG */
+	int			cpu;
 	/*
 	 * These stack pointers are overwritten on every system call or
 	 * exception.  SP is also saved to the stack it can be recovered when
@@ -60,8 +61,8 @@ struct thread_info {
 	 */
 	long			kernel_sp;	/* Kernel stack pointer */
 	long			user_sp;	/* User stack pointer */
-	int			cpu;
 	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
+	unsigned long		pcpu_offset;
 #ifdef CONFIG_SHADOW_CALL_STACK
 	void			*scs_base;
 	void			*scs_sp;
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index a75f0cfea1e9f..20d46c28fdde9 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -38,6 +38,7 @@ void asm_offsets(void)
 	OFFSET(TASK_THREAD_SUM, task_struct, thread.sum);
 
 	OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu);
+	OFFSET(TASK_TI_PCPU_OFFSET, task_struct, thread_info.pcpu_offset);
 	OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
 	OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
 	OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp);
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index f6ef57930b50a..7876854d16279 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -191,6 +191,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
 	int ret;
 	tidle->thread_info.cpu = cpu;
+	tidle->thread_info.pcpu_offset = per_cpu_offset(cpu);
 
 	ret = start_secondary_cpu(cpu, tidle);
 	if (!ret) {
@@ -208,6 +209,11 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 }
 #endif
 
+void __init smp_prepare_boot_cpu(void)
+{
+	__my_cpu_offset = per_cpu_offset(smp_processor_id());
+}
+
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 }
@@ -233,6 +239,8 @@ asmlinkage __visible void smp_callin(void)
 	mmgrab(mm);
 	current->active_mm = mm;
 
+	__my_cpu_offset = per_cpu_offset(smp_processor_id());
+
 #ifdef CONFIG_HOTPLUG_PARALLEL
 	cpuhp_ap_sync_alive();
 #endif
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index c03c1de16b79a..eab93d5258e9e 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -1395,15 +1395,8 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 			if (rd != rs)
 				emit_mv(rd, rs, ctx);
 #ifdef CONFIG_SMP
-			/* Load current CPU number in T1 */
-			emit_lw(RV_REG_T1, offsetof(struct thread_info, cpu),
+			emit_ld(RV_REG_T1, offsetof(struct thread_info, pcpu_offset),
 				RV_REG_TP, ctx);
-			/* Load address of __per_cpu_offset array in T2 */
-			emit_addr(RV_REG_T2, (u64)&__per_cpu_offset, extra_pass, ctx);
-			/* Get address of __per_cpu_offset[cpu] in T1 */
-			emit_sh3add(RV_REG_T1, RV_REG_T1, RV_REG_T2, ctx);
-			/* Load __per_cpu_offset[cpu] in T1 */
-			emit_ld(RV_REG_T1, 0, RV_REG_T1, ctx);
 			/* Add the offset to Rd */
 			emit_add(rd, rd, RV_REG_T1, ctx);
 #endif
-- 
2.39.5


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v5 2/3] riscv: introduce percpu.h into include/asm
  2026-07-01  4:20 ` [PATCH v5 2/3] riscv: introduce percpu.h into include/asm Yunhui Cui
@ 2026-07-01  5:05   ` bot+bpf-ci
  2026-07-01 11:34     ` [External] " yunhui cui
  0 siblings, 1 reply; 8+ messages in thread
From: bot+bpf-ci @ 2026-07-01  5:05 UTC (permalink / raw)
  To: cuiyunhui, pjw, palmer, aou, alex, dennis, tj, cl, ast, daniel,
	andrii, martin.lau, eddyz87, memxor, song, yonghong.song, jolsa,
	bjorn, pulehui, puranjay, thuth, ajones, ben.dooks, rkrcmar,
	cuiyunhui, samuel.holland, zong.li, conor.dooley, tglx, debug,
	seanwascoding, andybnac, menglong8.dong, cyrilbur, wangruikang,
	atishp, apatel, linux-riscv, linux-kernel, linux-mm, bpf, arnd,
	nathan, nick.desaulniers+lkml, morbo, justinstitt, qingfang.deng,
	linux-arch
  Cc: ast, andrii, daniel, martin.lau, eddyz87, yonghong.song, clm,
	ihor.solodrai

[-- Attachment #1: Type: text/plain, Size: 4577 bytes --]

> diff --git a/arch/riscv/include/asm/percpu.h b/arch/riscv/include/asm/percpu.h
> new file mode 100644
> index 000000000000..46f1901d3bb8
> --- /dev/null
> +++ b/arch/riscv/include/asm/percpu.h
> @@ -0,0 +1,283 @@

[ ... ]

> +#define PERCPU_8_16_OP(name, amo_insn, sz, sfx, val_type, new_val_expr, asm_op)			\
> +static inline void __percpu_##name##_amo_case_##sz(void *ptr, unsigned long val)		\
> +{												\
> +	if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&						\
> +		riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) {				\
> +		asm volatile ("amo" #amo_insn #sfx " zero, %[val], %[ptr]"			\
> +			: [ptr] "+A"(*(val_type *)ptr)						\
> +			: [val] "r"((val_type)((new_val_expr) & PERCPU_8_16_GET_MASK(sz)))	\
> +			: "memory");								\
> +	} else {										\
> +		u32 *ptr32 = PERCPU_8_16_GET_PTR32(ptr);					\
> +		const unsigned long shift = PERCPU_8_16_GET_SHIFT(ptr);				\
> +		const u32 mask = PERCPU_8_16_GET_MASK(sz) << shift;				\
> +		const val_type val_trunc = (val_type)((new_val_expr)				\
> +					   & PERCPU_8_16_GET_MASK(sz));				\
> +		u32 retx, rc;									\
> +		val_type new_val_type;								\
> +												\
> +		asm volatile (									\
> +			"0: lr.w %0, %2\n"							\
> +			"and %3, %0, %4\n"							\
> +			"srl %3, %3, %5\n"							\
> +			#asm_op " %3, %3, %6\n"							\
> +			"and %3, %3, %8\n"						\
> +			"sll %3, %3, %5\n"							\
> +			"and %1, %0, %7\n"							\
> +			"or %1, %1, %3\n"							\
> +			"sc.w %1, %1, %2\n"							\
> +			"bnez %1, 0b\n"								\
> +			: "=&r"(retx), "=&r"(rc), "+A"(*ptr32), "=&r"(new_val_type)		\
> +			: "r"(mask), "r"(shift), "r"(val_trunc), "r"(~mask),			\
> +			  "r"(PERCPU_8_16_GET_MASK(sz))						\
> +			: "memory");								\
> +		}										\
> +}

[ ... ]

> +#define PERCPU_8_16_RET_OP(name, amo_insn, sz, sfx, val_type, new_val_expr)			\
> +static inline val_type __percpu_##name##_return_amo_case_##sz(void *ptr, unsigned long val)	\
> +{												\
> +	if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&						\
> +		riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) {				\
> +		register val_type ret;								\
> +		asm volatile ("amo" #amo_insn #sfx " %[ret], %[val], %[ptr]"			\
> +			: [ptr] "+A"(*(val_type *)ptr), [ret] "=r"(ret)				\
> +			: [val] "r"((val_type)((new_val_expr) & PERCPU_8_16_GET_MASK(sz)))	\
> +			: "memory");								\
> +		return ret + (val_type)((new_val_expr) & PERCPU_8_16_GET_MASK(sz));		\
> +	} else {										\
> +		u32 *ptr32 = PERCPU_8_16_GET_PTR32(ptr);					\
> +		const unsigned long shift = PERCPU_8_16_GET_SHIFT(ptr);				\
> +		const u32 mask = (PERCPU_8_16_GET_MASK(sz) << shift);				\
> +		const u32 inv_mask = ~mask;							\
> +		const val_type val_trunc = (val_type)((new_val_expr)				\
> +					   & PERCPU_8_16_GET_MASK(sz));				\
> +		u32 old, new, tmp;								\

This isn't a bug, but the two LR/SC fallback loops name the same roles
differently.  The non-return loop uses:

	u32 retx, rc;
	val_type new_val_type;

while the return loop above uses:

	u32 old, new, tmp;

The loaded word, the scratch/merge register and the computed field play the
same role in both loops.  Could they use one consistent set of names (e.g.
old/new/tmp) so the two loops read as clearly parallel?  As a smaller point,
new_val_type reads like a type name rather than a value, and new shadows a
common keyword.

> +												\
> +		asm volatile (									\
> +			"0: lr.w %0, %3\n"							\
> +			"and %1, %0, %4\n"							\
> +			"srl %1, %1, %5\n"							\
> +			"add %1, %1, %6\n"							\
> +			"and %1, %1, %7\n"							\
> +			"sll %1, %1, %5\n"							\
> +			"and %2, %0, %8\n"							\
> +			"or %2, %2, %1\n"							\
> +			"sc.w %2, %2, %3\n"							\
> +			"bnez %2, 0b\n"								\

This isn't a bug, but the read-modify-write LR/SC sequence here is nearly
identical to the one in PERCPU_8_16_OP, differing only in operand numbering.
The non-return loop parameterises the operation via #asm_op, but this loop
hardcodes add:

	"add %1, %1, %6\n"

Was the intent to keep the return path add-only, or could the two loops share
one templated sequence?

> +			: "=&r"(old), "=&r"(tmp), "=&r"(new), "+A"(*ptr32)			\
> +			: "r"(mask), "r"(shift), "r"(val_trunc), "r"(PERCPU_8_16_GET_MASK(sz)), \
> +			"r"(inv_mask)								\
> +			: "memory");								\
> +		return (val_type)(tmp >> shift);						\
> +	}											\
> +}

[ ... ]


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/28494034360

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v5 1/3] riscv: io: avoid null-pointer arithmetic in PIO helpers
  2026-07-01  4:20 ` [PATCH v5 1/3] riscv: io: avoid null-pointer arithmetic in PIO helpers Yunhui Cui
@ 2026-07-01  6:24   ` Arnd Bergmann
  2026-07-01 11:57     ` [External] " yunhui cui
  0 siblings, 1 reply; 8+ messages in thread
From: Arnd Bergmann @ 2026-07-01  6:24 UTC (permalink / raw)
  To: Yunhui Cui, Paul Walmsley, Palmer Dabbelt, Albert Ou,
	Alexandre Ghiti, Dennis Zhou, Tejun Heo,
	Christoph Lameter (Ampere), Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman,
	Kumar Kartikeya Dwivedi, Song Liu, Yonghong Song, Jiri Olsa,
	Björn Töpel, pulehui, puranjay, Thomas Huth,
	Andrew Jones, Ben Dooks, Radim Krčmář,
	Samuel Holland, Zong Li, Conor.Dooley, Thomas Gleixner,
	Deepak Gupta, seanwascoding, Andy Chiu, menglong8.dong, cyrilbur,
	Vivian Wang, Atish Patra, Anup Patel, linux-riscv, linux-kernel,
	linux-mm, bpf, Nathan Chancellor, Nick Desaulniers, Bill Wendling,
	Justin Stitt, qingfang.deng, Linux-Arch, llvm

On Wed, Jul 1, 2026, at 06:20, Yunhui Cui wrote:
> @@ -56,6 +56,8 @@
>  #define __io_pbw()	RISCV_FENCE(iow, o)
>  #define __io_paw()	RISCV_FENCE(o, io)
> 
> +#define PCI_IO_ADDR(addr)	((void __iomem *)((unsigned long)PCI_IOBASE 
> + (addr)))

This does not seem to have any purpose, is this just left over
from the previous version?

> 
> +#ifdef CONFIG_HAS_IOPORT
>  __io_reads_ins(ins,  u8, b, __io_pbr(), __io_par(addr))
>  __io_reads_ins(ins, u16, w, __io_pbr(), __io_par(addr))
>  __io_reads_ins(ins, u32, l, __io_pbr(), __io_par(addr))
> -#define insb(addr, buffer, count) __insb(PCI_IOBASE + (addr), buffer, count)
> -#define insw(addr, buffer, count) __insw(PCI_IOBASE + (addr), buffer, count)
> -#define insl(addr, buffer, count) __insl(PCI_IOBASE + (addr), buffer, count)
> +#define insb(addr, buffer, count) __insb(PCI_IO_ADDR(addr), buffer, count)
> +#define insw(addr, buffer, count) __insw(PCI_IO_ADDR(addr), buffer, count)
> +#define insl(addr, buffer, count) __insl(PCI_IO_ADDR(addr), buffer, count)
> +#endif
 
As far as I can tell, just adding the #ifdef here should be sufficient.

      Arnd

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [External] Re: [PATCH v5 2/3] riscv: introduce percpu.h into include/asm
  2026-07-01  5:05   ` bot+bpf-ci
@ 2026-07-01 11:34     ` yunhui cui
  0 siblings, 0 replies; 8+ messages in thread
From: yunhui cui @ 2026-07-01 11:34 UTC (permalink / raw)
  To: bot+bpf-ci
  Cc: pjw, palmer, aou, alex, dennis, tj, cl, ast, daniel, andrii,
	martin.lau, eddyz87, memxor, song, yonghong.song, jolsa, bjorn,
	pulehui, puranjay, thuth, ajones, ben.dooks, rkrcmar,
	samuel.holland, zong.li, conor.dooley, tglx, debug, seanwascoding,
	andybnac, menglong8.dong, cyrilbur, wangruikang, atishp, apatel,
	linux-riscv, linux-kernel, linux-mm, bpf, arnd, nathan,
	nick.desaulniers+lkml, morbo, justinstitt, qingfang.deng, llvm,
	martin.lau, clm, ihor.solodrai

Hi bot+bpf-ci,

On Wed, Jul 1, 2026 at 1:06 PM <bot+bpf-ci@kernel.org> wrote:
>
> > diff --git a/arch/riscv/include/asm/percpu.h b/arch/riscv/include/asm/percpu.h
> > new file mode 100644
> > index 000000000000..46f1901d3bb8
> > --- /dev/null
> > +++ b/arch/riscv/include/asm/percpu.h
> > @@ -0,0 +1,283 @@
>
> [ ... ]
>
> > +#define PERCPU_8_16_OP(name, amo_insn, sz, sfx, val_type, new_val_expr, asm_op)                      \
> > +static inline void __percpu_##name##_amo_case_##sz(void *ptr, unsigned long val)             \
> > +{                                                                                            \
> > +     if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&                                               \
> > +             riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) {                            \
> > +             asm volatile ("amo" #amo_insn #sfx " zero, %[val], %[ptr]"                      \
> > +                     : [ptr] "+A"(*(val_type *)ptr)                                          \
> > +                     : [val] "r"((val_type)((new_val_expr) & PERCPU_8_16_GET_MASK(sz)))      \
> > +                     : "memory");                                                            \
> > +     } else {                                                                                \
> > +             u32 *ptr32 = PERCPU_8_16_GET_PTR32(ptr);                                        \
> > +             const unsigned long shift = PERCPU_8_16_GET_SHIFT(ptr);                         \
> > +             const u32 mask = PERCPU_8_16_GET_MASK(sz) << shift;                             \
> > +             const val_type val_trunc = (val_type)((new_val_expr)                            \
> > +                                        & PERCPU_8_16_GET_MASK(sz));                         \
> > +             u32 retx, rc;                                                                   \
> > +             val_type new_val_type;                                                          \
> > +                                                                                             \

This does not impact functional correctness. I will rename LR/SC
fallback local variables to use consistent naming, and keep the return
helper separate since it only handles add operations for now.

> > +             asm volatile (                                                                  \
> > +                     "0: lr.w %0, %2\n"                                                      \
> > +                     "and %3, %0, %4\n"                                                      \
> > +                     "srl %3, %3, %5\n"                                                      \
> > +                     #asm_op " %3, %3, %6\n"                                                 \
> > +                     "and %3, %3, %8\n"                                              \
> > +                     "sll %3, %3, %5\n"                                                      \
> > +                     "and %1, %0, %7\n"                                                      \
> > +                     "or %1, %1, %3\n"                                                       \
> > +                     "sc.w %1, %1, %2\n"                                                     \
> > +                     "bnez %1, 0b\n"                                                         \
> > +                     : "=&r"(retx), "=&r"(rc), "+A"(*ptr32), "=&r"(new_val_type)             \
> > +                     : "r"(mask), "r"(shift), "r"(val_trunc), "r"(~mask),                    \
> > +                       "r"(PERCPU_8_16_GET_MASK(sz))                                         \
> > +                     : "memory");                                                            \
> > +             }                                                                               \
> > +}
>
> [ ... ]
>
> > +#define PERCPU_8_16_RET_OP(name, amo_insn, sz, sfx, val_type, new_val_expr)                  \
> > +static inline val_type __percpu_##name##_return_amo_case_##sz(void *ptr, unsigned long val)  \
> > +{                                                                                            \
> > +     if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&                                               \
> > +             riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) {                            \
> > +             register val_type ret;                                                          \
> > +             asm volatile ("amo" #amo_insn #sfx " %[ret], %[val], %[ptr]"                    \
> > +                     : [ptr] "+A"(*(val_type *)ptr), [ret] "=r"(ret)                         \
> > +                     : [val] "r"((val_type)((new_val_expr) & PERCPU_8_16_GET_MASK(sz)))      \
> > +                     : "memory");                                                            \
> > +             return ret + (val_type)((new_val_expr) & PERCPU_8_16_GET_MASK(sz));             \
> > +     } else {                                                                                \
> > +             u32 *ptr32 = PERCPU_8_16_GET_PTR32(ptr);                                        \
> > +             const unsigned long shift = PERCPU_8_16_GET_SHIFT(ptr);                         \
> > +             const u32 mask = (PERCPU_8_16_GET_MASK(sz) << shift);                           \
> > +             const u32 inv_mask = ~mask;                                                     \
> > +             const val_type val_trunc = (val_type)((new_val_expr)                            \
> > +                                        & PERCPU_8_16_GET_MASK(sz));                         \
> > +             u32 old, new, tmp;                                                              \
>
> This isn't a bug, but the two LR/SC fallback loops name the same roles
> differently.  The non-return loop uses:
>
>         u32 retx, rc;
>         val_type new_val_type;
>
> while the return loop above uses:
>
>         u32 old, new, tmp;
>
> The loaded word, the scratch/merge register and the computed field play the
> same role in both loops.  Could they use one consistent set of names (e.g.
> old/new/tmp) so the two loops read as clearly parallel?  As a smaller point,
> new_val_type reads like a type name rather than a value, and new shadows a
> common keyword.
>
> > +                                                                                             \
> > +             asm volatile (                                                                  \
> > +                     "0: lr.w %0, %3\n"                                                      \
> > +                     "and %1, %0, %4\n"                                                      \
> > +                     "srl %1, %1, %5\n"                                                      \
> > +                     "add %1, %1, %6\n"                                                      \
> > +                     "and %1, %1, %7\n"                                                      \
> > +                     "sll %1, %1, %5\n"                                                      \
> > +                     "and %2, %0, %8\n"                                                      \
> > +                     "or %2, %2, %1\n"                                                       \
> > +                     "sc.w %2, %2, %3\n"                                                     \
> > +                     "bnez %2, 0b\n"                                                         \
>
> This isn't a bug, but the read-modify-write LR/SC sequence here is nearly
> identical to the one in PERCPU_8_16_OP, differing only in operand numbering.
> The non-return loop parameterises the operation via #asm_op, but this loop
> hardcodes add:
>
>         "add %1, %1, %6\n"
>
> Was the intent to keep the return path add-only, or could the two loops share
> one templated sequence?
>
> > +                     : "=&r"(old), "=&r"(tmp), "=&r"(new), "+A"(*ptr32)                      \
> > +                     : "r"(mask), "r"(shift), "r"(val_trunc), "r"(PERCPU_8_16_GET_MASK(sz)), \
> > +                     "r"(inv_mask)                                                           \
> > +                     : "memory");                                                            \
> > +             return (val_type)(tmp >> shift);                                                \
> > +     }                                                                                       \
> > +}
>
> [ ... ]
>
>
> ---
> AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
> See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
>
> CI run summary: https://github.com/kernel-patches/bpf/actions/runs/28494034360

Thanks,
Yunhui

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [External] Re: [PATCH v5 1/3] riscv: io: avoid null-pointer arithmetic in PIO helpers
  2026-07-01  6:24   ` Arnd Bergmann
@ 2026-07-01 11:57     ` yunhui cui
  0 siblings, 0 replies; 8+ messages in thread
From: yunhui cui @ 2026-07-01 11:57 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Paul Walmsley, Palmer Dabbelt, Albert Ou, Alexandre Ghiti,
	Dennis Zhou, Tejun Heo, Christoph Lameter (Ampere),
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, Eduard Zingerman, Kumar Kartikeya Dwivedi,
	Song Liu, Yonghong Song, Jiri Olsa, Björn Töpel,
	pulehui, puranjay, Thomas Huth, Andrew Jones, Ben Dooks,
	Radim Krčmář, Samuel Holland, Zong Li,
	Conor.Dooley, Thomas Gleixner, Deepak Gupta, seanwascoding,
	Andy Chiu, menglong8.dong, cyrilbur, Vivian Wang, Atish Patra,
	Anup Patel, linux-riscv, linux-kernel, linux-mm, bpf,
	Nathan Chancellor, Nick Desaulniers, Bill Wendling, Justin Stitt,
	qingfang.deng, Linux-Arch, llvm

Hi Arnd,

On Wed, Jul 1, 2026 at 2:25 PM Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Wed, Jul 1, 2026, at 06:20, Yunhui Cui wrote:
> > @@ -56,6 +56,8 @@
> >  #define __io_pbw()   RISCV_FENCE(iow, o)
> >  #define __io_paw()   RISCV_FENCE(o, io)
> >
> > +#define PCI_IO_ADDR(addr)    ((void __iomem *)((unsigned long)PCI_IOBASE
> > + (addr)))
>
> This does not seem to have any purpose, is this just left over
> from the previous version?
>
> >
> > +#ifdef CONFIG_HAS_IOPORT
> >  __io_reads_ins(ins,  u8, b, __io_pbr(), __io_par(addr))
> >  __io_reads_ins(ins, u16, w, __io_pbr(), __io_par(addr))
> >  __io_reads_ins(ins, u32, l, __io_pbr(), __io_par(addr))
> > -#define insb(addr, buffer, count) __insb(PCI_IOBASE + (addr), buffer, count)
> > -#define insw(addr, buffer, count) __insw(PCI_IOBASE + (addr), buffer, count)
> > -#define insl(addr, buffer, count) __insl(PCI_IOBASE + (addr), buffer, count)
> > +#define insb(addr, buffer, count) __insb(PCI_IO_ADDR(addr), buffer, count)
> > +#define insw(addr, buffer, count) __insw(PCI_IO_ADDR(addr), buffer, count)
> > +#define insl(addr, buffer, count) __insl(PCI_IO_ADDR(addr), buffer, count)
> > +#endif
>
> As far as I can tell, just adding the #ifdef here should be sufficient.

Okay.

>
>       Arnd

Thanks,
Yunhui

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2026-07-01 11:57 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-07-01  4:20 [PATCH v5 0/3] riscv: improve percpu helpers and PIO mapping Yunhui Cui
2026-07-01  4:20 ` [PATCH v5 1/3] riscv: io: avoid null-pointer arithmetic in PIO helpers Yunhui Cui
2026-07-01  6:24   ` Arnd Bergmann
2026-07-01 11:57     ` [External] " yunhui cui
2026-07-01  4:20 ` [PATCH v5 2/3] riscv: introduce percpu.h into include/asm Yunhui Cui
2026-07-01  5:05   ` bot+bpf-ci
2026-07-01 11:34     ` [External] " yunhui cui
2026-07-01  4:20 ` [PATCH v5 3/3] riscv: store percpu offset into thread_info Yunhui Cui

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox