On Thu, 20 Nov 2025 17:25:34 +0800 hev wrote: > On Thu, Nov 20, 2025 at 3:46 PM George Guo > wrote: > > > > From: George Guo > > > > Implement 128-bit atomic compare-and-exchange using LoongArch's > > LL.D/SC.Q instructions. > > > > At the same time, fix BPF scheduler test failures (scx_central > > scx_qmap) caused by kmalloc_nolock_noprof returning NULL due to > > missing 128-bit atomics. The NULL returns led to -ENOMEM errors > > during scheduler initialization, causing test cases to fail. > > > > Verified by testing with the scx_qmap scheduler (located in > > tools/sched_ext/). Building with `make` and running > > ./tools/sched_ext/build/bin/scx_qmap. > > > > Signed-off-by: George Guo > > --- > > arch/loongarch/include/asm/cmpxchg.h | 46 > > ++++++++++++++++++++++++++++++++++++ 1 file changed, 46 > > insertions(+) > > > > diff --git a/arch/loongarch/include/asm/cmpxchg.h > > b/arch/loongarch/include/asm/cmpxchg.h index > > 979fde61bba8a42cb4f019f13ded2a3119d4aaf4..5f8d418595cf62ec3153dd3825d80ac1fb31e883 > > 100644 --- a/arch/loongarch/include/asm/cmpxchg.h +++ > > b/arch/loongarch/include/asm/cmpxchg.h @@ -111,6 +111,43 @@ > > __arch_xchg(volatile void *ptr, unsigned long x, int size) __ret; > > \ }) > > > > +union __u128_halves { > > + u128 full; > > + struct { > > + u64 low; > > + u64 high; > > + }; > > +}; > > + > > +#define __cmpxchg128_asm(ld, st, ptr, old, new) > > \ +({ > > \ > > + union __u128_halves __old, __new, __ret; > > \ > > + volatile u64 *__ptr = (volatile u64 *)(ptr); > > \ > > + > > \ > > + __old.full = (old); > > \ > > + __new.full = (new); > > \ > > + > > \ > > + __asm__ __volatile__( > > \ > > + "1: " ld " %0, %4 # 128-bit cmpxchg low \n" > > \ > > + " " ld " %1, %5 # 128-bit cmpxchg high \n" > > \ > > + " bne %0, %z6, 2f \n" > > \ > > + " bne %1, %z7, 2f \n" > > \ > > + " move $t0, %z8 \n" > > \ > > + " move $t1, %z9 \n" > > \ > > + " " st " $t0, $t1, %2 \n" > > \ > > + " beqz $t0, 1b \n" > > \ > > + "2: \n" > > \ > > + __WEAK_LLSC_MB > > \ > > + : "=&r" (__ret.low), "=&r" (__ret.high), > > \ > > + "=ZB" (__ptr[0]), "=ZB" (__ptr[1]) > > \ > > + : "ZB" (__ptr[0]), "ZB" (__ptr[1]), > > \ > > Address operand constraints: > - ld.d: "m" > - ll.d: "ZC" > - sc.q: "r" > Thanks for your advice. Could you tell me how to find these constraints? > > + "Jr" (__old.low), "Jr" (__old.high), > > \ > > + "Jr" (__new.low), "Jr" (__new.high) > > \ > > + : "t0", "t1", "memory"); > > \ > > + > > \ > > + __ret.full; > > \ +}) > > + > > static inline unsigned int __cmpxchg_small(volatile void *ptr, > > unsigned int old, unsigned int new, unsigned int size) > > { > > @@ -198,6 +235,15 @@ __cmpxchg(volatile void *ptr, unsigned long > > old, unsigned long new, unsigned int __res; > > \ }) > > > > +/* cmpxchg128 */ > > +#define system_has_cmpxchg128() 1 > > + > > +#define arch_cmpxchg128(ptr, o, n) > > \ +({ > > \ > > + BUILD_BUG_ON(sizeof(*(ptr)) != 16); > > \ > > + __cmpxchg128_asm("ll.d", "sc.d", ptr, o, n); > > \ > > "sc.d" -> "sc.q" > > __cmpxchg128_asm doesn’t have multiple variants, so no need to > genericize it? > > > +}) > > + > > #ifdef CONFIG_64BIT > > #define arch_cmpxchg64_local(ptr, o, n) > > \ ({ > > \ > > > > -- > > 2.48.1 > > > > > > -- > Rui