All of lore.kernel.org
 help / color / mirror / Atom feed
From: Boqun Feng <boqun.feng@gmail.com>
To: Nicholas Piggin <npiggin@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org, Will Deacon <will@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Michael Ellerman <mpe@ellerman.id.au>,
	Arnd Bergmann <arnd@arndb.de>,
	Christophe Leroy <christophe.leroy@c-s.fr>,
	Alexey Kardashevskiy <aik@ozlabs.ru>,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org
Subject: Re: [PATCH 3/3] powerpc: rewrite atomics to use ARCH_ATOMIC
Date: Fri, 13 Nov 2020 23:30:12 +0800	[thread overview]
Message-ID: <20201113153012.GD286534@boqun-archlinux> (raw)
In-Reply-To: <20201111110723.3148665-4-npiggin@gmail.com>

Hi Nicholas,

On Wed, Nov 11, 2020 at 09:07:23PM +1000, Nicholas Piggin wrote:
> All the cool kids are doing it.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/include/asm/atomic.h  | 681 ++++++++++-------------------
>  arch/powerpc/include/asm/cmpxchg.h |  62 +--
>  2 files changed, 248 insertions(+), 495 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
> index 8a55eb8cc97b..899aa2403ba7 100644
> --- a/arch/powerpc/include/asm/atomic.h
> +++ b/arch/powerpc/include/asm/atomic.h
> @@ -11,185 +11,285 @@
>  #include <asm/cmpxchg.h>
>  #include <asm/barrier.h>
>  
> +#define ARCH_ATOMIC
> +
> +#ifndef CONFIG_64BIT
> +#include <asm-generic/atomic64.h>
> +#endif
> +
>  /*
>   * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with
>   * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
>   * on the platform without lwsync.
>   */
>  #define __atomic_acquire_fence()					\
> -	__asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory")
> +	asm volatile(PPC_ACQUIRE_BARRIER "" : : : "memory")
>  
>  #define __atomic_release_fence()					\
> -	__asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory")
> +	asm volatile(PPC_RELEASE_BARRIER "" : : : "memory")
>  
> -static __inline__ int atomic_read(const atomic_t *v)
> -{
> -	int t;
> +#define __atomic_pre_full_fence		smp_mb
>  
> -	__asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
> +#define __atomic_post_full_fence	smp_mb
>  

Do you need to define __atomic_{pre,post}_full_fence for PPC? IIRC, they
are default smp_mb__{before,atomic}_atomic(), so are smp_mb() defautly
on PPC.

> -	return t;
> +#define arch_atomic_read(v)			__READ_ONCE((v)->counter)
> +#define arch_atomic_set(v, i)			__WRITE_ONCE(((v)->counter), (i))
> +#ifdef CONFIG_64BIT
> +#define ATOMIC64_INIT(i)			{ (i) }
> +#define arch_atomic64_read(v)			__READ_ONCE((v)->counter)
> +#define arch_atomic64_set(v, i)			__WRITE_ONCE(((v)->counter), (i))
> +#endif
> +
[...]
>  
> +#define ATOMIC_FETCH_OP_UNLESS_RELAXED(name, type, dtype, width, asm_op) \
> +static inline int arch_##name##_relaxed(type *v, dtype a, dtype u)	\

I don't think we have atomic_fetch_*_unless_relaxed() at atomic APIs,
ditto for:

	atomic_fetch_add_unless_relaxed()
	atomic_inc_not_zero_relaxed()
	atomic_dec_if_positive_relaxed()

, and we don't have the _acquire() and _release() variants for them
either, and if you don't define their fully-ordered version (e.g.
atomic_inc_not_zero()), atomic-arch-fallback.h will use read and cmpxchg
to implement them, and I think not what we want.

[...]
>  
>  #endif /* __KERNEL__ */
>  #endif /* _ASM_POWERPC_ATOMIC_H_ */
> diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
> index cf091c4c22e5..181f7e8b3281 100644
> --- a/arch/powerpc/include/asm/cmpxchg.h
> +++ b/arch/powerpc/include/asm/cmpxchg.h
> @@ -192,7 +192,7 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
>       		(unsigned long)_x_, sizeof(*(ptr))); 			     \
>    })
>  
> -#define xchg_relaxed(ptr, x)						\
> +#define arch_xchg_relaxed(ptr, x)					\
>  ({									\
>  	__typeof__(*(ptr)) _x_ = (x);					\
>  	(__typeof__(*(ptr))) __xchg_relaxed((ptr),			\
> @@ -448,35 +448,7 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
>  	return old;
>  }
>  
> -static __always_inline unsigned long
> -__cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
> -		  unsigned int size)
> -{
> -	switch (size) {
> -	case 1:
> -		return __cmpxchg_u8_acquire(ptr, old, new);
> -	case 2:
> -		return __cmpxchg_u16_acquire(ptr, old, new);
> -	case 4:
> -		return __cmpxchg_u32_acquire(ptr, old, new);
> -#ifdef CONFIG_PPC64
> -	case 8:
> -		return __cmpxchg_u64_acquire(ptr, old, new);
> -#endif
> -	}
> -	BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire");
> -	return old;
> -}
> -#define cmpxchg(ptr, o, n)						 \
> -  ({									 \
> -     __typeof__(*(ptr)) _o_ = (o);					 \
> -     __typeof__(*(ptr)) _n_ = (n);					 \
> -     (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,		 \
> -				    (unsigned long)_n_, sizeof(*(ptr))); \
> -  })
> -
> -

If you remove {atomic_}_cmpxchg_{,_acquire}() and use the version
provided by atomic-arch-fallback.h, then a fail cmpxchg or
cmpxchg_acquire() will still result into a full barrier or a acquire
barrier after the RMW operation, the barrier is not necessary and
probably this is not what we want?

Regards,
Boqun

> -#define cmpxchg_local(ptr, o, n)					 \
> +#define arch_cmpxchg_local(ptr, o, n)					 \
>    ({									 \
>       __typeof__(*(ptr)) _o_ = (o);					 \
>       __typeof__(*(ptr)) _n_ = (n);					 \
> @@ -484,7 +456,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
>  				    (unsigned long)_n_, sizeof(*(ptr))); \
>    })
>  
> -#define cmpxchg_relaxed(ptr, o, n)					\
> +#define arch_cmpxchg_relaxed(ptr, o, n)					\
>  ({									\
>  	__typeof__(*(ptr)) _o_ = (o);					\
>  	__typeof__(*(ptr)) _n_ = (n);					\
> @@ -493,38 +465,20 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
>  			sizeof(*(ptr)));				\
>  })
>  
> -#define cmpxchg_acquire(ptr, o, n)					\
> -({									\
> -	__typeof__(*(ptr)) _o_ = (o);					\
> -	__typeof__(*(ptr)) _n_ = (n);					\
> -	(__typeof__(*(ptr))) __cmpxchg_acquire((ptr),			\
> -			(unsigned long)_o_, (unsigned long)_n_,		\
> -			sizeof(*(ptr)));				\
> -})
>  #ifdef CONFIG_PPC64
> -#define cmpxchg64(ptr, o, n)						\
> -  ({									\
> -	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
> -	cmpxchg((ptr), (o), (n));					\
> -  })
> -#define cmpxchg64_local(ptr, o, n)					\
> +#define arch_cmpxchg64_local(ptr, o, n)					\
>    ({									\
>  	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
> -	cmpxchg_local((ptr), (o), (n));					\
> +	arch_cmpxchg_local((ptr), (o), (n));				\
>    })
> -#define cmpxchg64_relaxed(ptr, o, n)					\
> -({									\
> -	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
> -	cmpxchg_relaxed((ptr), (o), (n));				\
> -})
> -#define cmpxchg64_acquire(ptr, o, n)					\
> +#define arch_cmpxchg64_relaxed(ptr, o, n)				\
>  ({									\
>  	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
> -	cmpxchg_acquire((ptr), (o), (n));				\
> +	arch_cmpxchg_relaxed((ptr), (o), (n));				\
>  })
>  #else
>  #include <asm-generic/cmpxchg-local.h>
> -#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
> +#define arch_cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
>  #endif
>  
>  #endif /* __KERNEL__ */
> -- 
> 2.23.0
> 

WARNING: multiple messages have this Message-ID (diff)
From: Boqun Feng <boqun.feng@gmail.com>
To: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>,
	linux-arch@vger.kernel.org, Arnd Bergmann <arnd@arndb.de>,
	Peter Zijlstra <peterz@infradead.org>,
	linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org,
	Alexey Kardashevskiy <aik@ozlabs.ru>,
	Will Deacon <will@kernel.org>
Subject: Re: [PATCH 3/3] powerpc: rewrite atomics to use ARCH_ATOMIC
Date: Fri, 13 Nov 2020 23:30:12 +0800	[thread overview]
Message-ID: <20201113153012.GD286534@boqun-archlinux> (raw)
In-Reply-To: <20201111110723.3148665-4-npiggin@gmail.com>

Hi Nicholas,

On Wed, Nov 11, 2020 at 09:07:23PM +1000, Nicholas Piggin wrote:
> All the cool kids are doing it.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/include/asm/atomic.h  | 681 ++++++++++-------------------
>  arch/powerpc/include/asm/cmpxchg.h |  62 +--
>  2 files changed, 248 insertions(+), 495 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
> index 8a55eb8cc97b..899aa2403ba7 100644
> --- a/arch/powerpc/include/asm/atomic.h
> +++ b/arch/powerpc/include/asm/atomic.h
> @@ -11,185 +11,285 @@
>  #include <asm/cmpxchg.h>
>  #include <asm/barrier.h>
>  
> +#define ARCH_ATOMIC
> +
> +#ifndef CONFIG_64BIT
> +#include <asm-generic/atomic64.h>
> +#endif
> +
>  /*
>   * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with
>   * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
>   * on the platform without lwsync.
>   */
>  #define __atomic_acquire_fence()					\
> -	__asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory")
> +	asm volatile(PPC_ACQUIRE_BARRIER "" : : : "memory")
>  
>  #define __atomic_release_fence()					\
> -	__asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory")
> +	asm volatile(PPC_RELEASE_BARRIER "" : : : "memory")
>  
> -static __inline__ int atomic_read(const atomic_t *v)
> -{
> -	int t;
> +#define __atomic_pre_full_fence		smp_mb
>  
> -	__asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
> +#define __atomic_post_full_fence	smp_mb
>  

Do you need to define __atomic_{pre,post}_full_fence for PPC? IIRC, they
are default smp_mb__{before,atomic}_atomic(), so are smp_mb() defautly
on PPC.

> -	return t;
> +#define arch_atomic_read(v)			__READ_ONCE((v)->counter)
> +#define arch_atomic_set(v, i)			__WRITE_ONCE(((v)->counter), (i))
> +#ifdef CONFIG_64BIT
> +#define ATOMIC64_INIT(i)			{ (i) }
> +#define arch_atomic64_read(v)			__READ_ONCE((v)->counter)
> +#define arch_atomic64_set(v, i)			__WRITE_ONCE(((v)->counter), (i))
> +#endif
> +
[...]
>  
> +#define ATOMIC_FETCH_OP_UNLESS_RELAXED(name, type, dtype, width, asm_op) \
> +static inline int arch_##name##_relaxed(type *v, dtype a, dtype u)	\

I don't think we have atomic_fetch_*_unless_relaxed() at atomic APIs,
ditto for:

	atomic_fetch_add_unless_relaxed()
	atomic_inc_not_zero_relaxed()
	atomic_dec_if_positive_relaxed()

, and we don't have the _acquire() and _release() variants for them
either, and if you don't define their fully-ordered version (e.g.
atomic_inc_not_zero()), atomic-arch-fallback.h will use read and cmpxchg
to implement them, and I think not what we want.

[...]
>  
>  #endif /* __KERNEL__ */
>  #endif /* _ASM_POWERPC_ATOMIC_H_ */
> diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
> index cf091c4c22e5..181f7e8b3281 100644
> --- a/arch/powerpc/include/asm/cmpxchg.h
> +++ b/arch/powerpc/include/asm/cmpxchg.h
> @@ -192,7 +192,7 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
>       		(unsigned long)_x_, sizeof(*(ptr))); 			     \
>    })
>  
> -#define xchg_relaxed(ptr, x)						\
> +#define arch_xchg_relaxed(ptr, x)					\
>  ({									\
>  	__typeof__(*(ptr)) _x_ = (x);					\
>  	(__typeof__(*(ptr))) __xchg_relaxed((ptr),			\
> @@ -448,35 +448,7 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
>  	return old;
>  }
>  
> -static __always_inline unsigned long
> -__cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
> -		  unsigned int size)
> -{
> -	switch (size) {
> -	case 1:
> -		return __cmpxchg_u8_acquire(ptr, old, new);
> -	case 2:
> -		return __cmpxchg_u16_acquire(ptr, old, new);
> -	case 4:
> -		return __cmpxchg_u32_acquire(ptr, old, new);
> -#ifdef CONFIG_PPC64
> -	case 8:
> -		return __cmpxchg_u64_acquire(ptr, old, new);
> -#endif
> -	}
> -	BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire");
> -	return old;
> -}
> -#define cmpxchg(ptr, o, n)						 \
> -  ({									 \
> -     __typeof__(*(ptr)) _o_ = (o);					 \
> -     __typeof__(*(ptr)) _n_ = (n);					 \
> -     (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,		 \
> -				    (unsigned long)_n_, sizeof(*(ptr))); \
> -  })
> -
> -

If you remove {atomic_}_cmpxchg_{,_acquire}() and use the version
provided by atomic-arch-fallback.h, then a fail cmpxchg or
cmpxchg_acquire() will still result into a full barrier or a acquire
barrier after the RMW operation, the barrier is not necessary and
probably this is not what we want?

Regards,
Boqun

> -#define cmpxchg_local(ptr, o, n)					 \
> +#define arch_cmpxchg_local(ptr, o, n)					 \
>    ({									 \
>       __typeof__(*(ptr)) _o_ = (o);					 \
>       __typeof__(*(ptr)) _n_ = (n);					 \
> @@ -484,7 +456,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
>  				    (unsigned long)_n_, sizeof(*(ptr))); \
>    })
>  
> -#define cmpxchg_relaxed(ptr, o, n)					\
> +#define arch_cmpxchg_relaxed(ptr, o, n)					\
>  ({									\
>  	__typeof__(*(ptr)) _o_ = (o);					\
>  	__typeof__(*(ptr)) _n_ = (n);					\
> @@ -493,38 +465,20 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
>  			sizeof(*(ptr)));				\
>  })
>  
> -#define cmpxchg_acquire(ptr, o, n)					\
> -({									\
> -	__typeof__(*(ptr)) _o_ = (o);					\
> -	__typeof__(*(ptr)) _n_ = (n);					\
> -	(__typeof__(*(ptr))) __cmpxchg_acquire((ptr),			\
> -			(unsigned long)_o_, (unsigned long)_n_,		\
> -			sizeof(*(ptr)));				\
> -})
>  #ifdef CONFIG_PPC64
> -#define cmpxchg64(ptr, o, n)						\
> -  ({									\
> -	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
> -	cmpxchg((ptr), (o), (n));					\
> -  })
> -#define cmpxchg64_local(ptr, o, n)					\
> +#define arch_cmpxchg64_local(ptr, o, n)					\
>    ({									\
>  	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
> -	cmpxchg_local((ptr), (o), (n));					\
> +	arch_cmpxchg_local((ptr), (o), (n));				\
>    })
> -#define cmpxchg64_relaxed(ptr, o, n)					\
> -({									\
> -	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
> -	cmpxchg_relaxed((ptr), (o), (n));				\
> -})
> -#define cmpxchg64_acquire(ptr, o, n)					\
> +#define arch_cmpxchg64_relaxed(ptr, o, n)				\
>  ({									\
>  	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
> -	cmpxchg_acquire((ptr), (o), (n));				\
> +	arch_cmpxchg_relaxed((ptr), (o), (n));				\
>  })
>  #else
>  #include <asm-generic/cmpxchg-local.h>
> -#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
> +#define arch_cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
>  #endif
>  
>  #endif /* __KERNEL__ */
> -- 
> 2.23.0
> 

  parent reply	other threads:[~2020-11-13 15:30 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-11 11:07 [PATCH 0/3] powerpc: convert to use ARCH_ATOMIC Nicholas Piggin
2020-11-11 11:07 ` Nicholas Piggin
2020-11-11 11:07 ` [PATCH 1/3] asm-generic/atomic64: Add support for ARCH_ATOMIC Nicholas Piggin
2020-11-11 11:07   ` Nicholas Piggin
2020-11-11 13:39   ` Christophe Leroy
2020-11-11 13:39     ` Christophe Leroy
2020-11-11 13:44     ` Peter Zijlstra
2020-11-11 13:44       ` Peter Zijlstra
2020-11-16  1:48       ` Nicholas Piggin
2020-11-16  1:48         ` Nicholas Piggin
2020-11-11 11:07 ` [PATCH 2/3] powerpc/64s/iommu: don't use atomic_ function on atomic64_t type Nicholas Piggin
2020-11-11 11:07   ` Nicholas Piggin
2020-11-11 11:07 ` [PATCH 3/3] powerpc: rewrite atomics to use ARCH_ATOMIC Nicholas Piggin
2020-11-11 11:07   ` Nicholas Piggin
2020-11-11 19:07   ` kernel test robot
2020-11-11 19:07     ` kernel test robot
2020-11-11 19:07     ` kernel test robot
2020-11-13  5:05   ` kernel test robot
2020-11-13  5:05     ` kernel test robot
2020-11-13  5:05     ` kernel test robot
2020-11-13 15:30   ` Boqun Feng [this message]
2020-11-13 15:30     ` Boqun Feng
2020-12-22  3:52     ` Nicholas Piggin
2020-12-22  3:52       ` Nicholas Piggin
2020-12-23  2:45       ` Boqun Feng
2020-12-23  2:45         ` Boqun Feng
2020-12-15 11:01 ` [PATCH 0/3] powerpc: convert " Michael Ellerman
2020-12-15 11:01   ` Michael Ellerman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201113153012.GD286534@boqun-archlinux \
    --to=boqun.feng@gmail.com \
    --cc=aik@ozlabs.ru \
    --cc=arnd@arndb.de \
    --cc=christophe.leroy@c-s.fr \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mpe@ellerman.id.au \
    --cc=npiggin@gmail.com \
    --cc=peterz@infradead.org \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.