public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()
@ 2024-03-25 14:09 Uros Bizjak
  2024-03-25 14:09 ` [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h Uros Bizjak
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Uros Bizjak @ 2024-03-25 14:09 UTC (permalink / raw)
  To: linux-kernel
  Cc: Uros Bizjak, Peter Zijlstra, Ingo Molnar, Will Deacon,
	Waiman Long, Boqun Feng

Use try_cmpxchg_acquire(*ptr, &old, new) instead of
cmpxchg_relaxed(*ptr, old, new) == old in trylock_clear_pending().
x86 CMPXCHG instruction returns success in ZF flag, so this change
saves a compare after cmpxchg.

Also change the return type of the function to bool.

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
---
 kernel/locking/qspinlock_paravirt.h | 31 ++++++++++++-----------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 169950fe1aad..77ba80bd95f9 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -116,11 +116,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
  * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
  * lock just to be sure that it will get it.
  */
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
 {
+	u16 old = _Q_PENDING_VAL;
+
 	return !READ_ONCE(lock->locked) &&
-	       (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
-				_Q_LOCKED_VAL) == _Q_PENDING_VAL);
+	       try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
 }
 #else /* _Q_PENDING_BITS == 8 */
 static __always_inline void set_pending(struct qspinlock *lock)
@@ -128,27 +129,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
 	atomic_or(_Q_PENDING_VAL, &lock->val);
 }
 
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
 {
-	int val = atomic_read(&lock->val);
-
-	for (;;) {
-		int old, new;
-
-		if (val  & _Q_LOCKED_MASK)
-			break;
+	int old, new;
 
+	old = atomic_read(&lock->val);
+	do {
+		if (old & _Q_LOCKED_MASK)
+			return false;
 		/*
 		 * Try to clear pending bit & set locked bit
 		 */
-		old = val;
-		new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
-		val = atomic_cmpxchg_acquire(&lock->val, old, new);
+		new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+	} while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));
 
-		if (val == old)
-			return 1;
-	}
-	return 0;
+	return true;
 }
 #endif /* _Q_PENDING_BITS == 8 */
 
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h
  2024-03-25 14:09 [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() Uros Bizjak
@ 2024-03-25 14:09 ` Uros Bizjak
  2024-03-28  2:22   ` Waiman Long
  2024-04-11 13:24   ` Ingo Molnar
  2024-03-28  1:52 ` [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() Waiman Long
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 11+ messages in thread
From: Uros Bizjak @ 2024-03-25 14:09 UTC (permalink / raw)
  To: linux-kernel
  Cc: Uros Bizjak, Peter Zijlstra, Ingo Molnar, Will Deacon,
	Waiman Long, Boqun Feng

Use try_cmpxchg(*ptr, &old, new) instead of
cmpxchg(*ptr, old, new) == old in qspinlock_paravirt.h
x86 CMPXCHG instruction returns success in ZF flag, so
this change saves a compare after cmpxchg.

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
---
 kernel/locking/qspinlock_paravirt.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 77ba80bd95f9..3db5f811260f 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -86,9 +86,10 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
 	 */
 	for (;;) {
 		int val = atomic_read(&lock->val);
+		u8 old = 0;
 
 		if (!(val & _Q_LOCKED_PENDING_MASK) &&
-		   (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
+		    try_cmpxchg_acquire(&lock->locked, &old, _Q_LOCKED_VAL)) {
 			lockevent_inc(pv_lock_stealing);
 			return true;
 		}
@@ -211,8 +212,9 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
 	int hopcnt = 0;
 
 	for_each_hash_entry(he, offset, hash) {
+		struct qspinlock *old = NULL;
 		hopcnt++;
-		if (!cmpxchg(&he->lock, NULL, lock)) {
+		if (try_cmpxchg(&he->lock, &old, lock)) {
 			WRITE_ONCE(he->node, node);
 			lockevent_pv_hop(hopcnt);
 			return &he->lock;
@@ -355,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
 static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
 {
 	struct pv_node *pn = (struct pv_node *)node;
-
+	enum vcpu_state old = vcpu_halted;
 	/*
 	 * If the vCPU is indeed halted, advance its state to match that of
 	 * pv_wait_node(). If OTOH this fails, the vCPU was running and will
@@ -372,8 +374,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
 	 * subsequent writes.
 	 */
 	smp_mb__before_atomic();
-	if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
-	    != vcpu_halted)
+	if (!try_cmpxchg_relaxed(&pn->state, &old, vcpu_hashed))
 		return;
 
 	/*
@@ -541,15 +542,14 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
 #ifndef __pv_queued_spin_unlock
 __visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock)
 {
-	u8 locked;
+	u8 locked = _Q_LOCKED_VAL;
 
 	/*
 	 * We must not unlock if SLOW, because in that case we must first
 	 * unhash. Otherwise it would be possible to have multiple @lock
 	 * entries, which would be BAD.
 	 */
-	locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
-	if (likely(locked == _Q_LOCKED_VAL))
+	if (try_cmpxchg_release(&lock->locked, &locked, 0);
 		return;
 
 	__pv_queued_spin_unlock_slowpath(lock, locked);
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()
  2024-03-25 14:09 [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() Uros Bizjak
  2024-03-25 14:09 ` [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h Uros Bizjak
@ 2024-03-28  1:52 ` Waiman Long
  2024-04-11 13:33 ` [tip: locking/core] " tip-bot2 for Uros Bizjak
  2024-04-12  9:46 ` [tip: locking/core] " tip-bot2 for Uros Bizjak
  3 siblings, 0 replies; 11+ messages in thread
From: Waiman Long @ 2024-03-28  1:52 UTC (permalink / raw)
  To: Uros Bizjak, linux-kernel
  Cc: Peter Zijlstra, Ingo Molnar, Will Deacon, Boqun Feng

On 3/25/24 10:09, Uros Bizjak wrote:
> Use try_cmpxchg_acquire(*ptr, &old, new) instead of
> cmpxchg_relaxed(*ptr, old, new) == old in trylock_clear_pending().
> x86 CMPXCHG instruction returns success in ZF flag, so this change
> saves a compare after cmpxchg.
>
> Also change the return type of the function to bool.
>
> No functional change intended.
>
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Waiman Long <longman@redhat.com>
> Cc: Boqun Feng <boqun.feng@gmail.com>
> ---
>   kernel/locking/qspinlock_paravirt.h | 31 ++++++++++++-----------------
>   1 file changed, 13 insertions(+), 18 deletions(-)
>
> diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
> index 169950fe1aad..77ba80bd95f9 100644
> --- a/kernel/locking/qspinlock_paravirt.h
> +++ b/kernel/locking/qspinlock_paravirt.h
> @@ -116,11 +116,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
>    * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
>    * lock just to be sure that it will get it.
>    */
> -static __always_inline int trylock_clear_pending(struct qspinlock *lock)
> +static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
>   {
> +	u16 old = _Q_PENDING_VAL;
> +
>   	return !READ_ONCE(lock->locked) &&
> -	       (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
> -				_Q_LOCKED_VAL) == _Q_PENDING_VAL);
> +	       try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
>   }
>   #else /* _Q_PENDING_BITS == 8 */
>   static __always_inline void set_pending(struct qspinlock *lock)
> @@ -128,27 +129,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
>   	atomic_or(_Q_PENDING_VAL, &lock->val);
>   }
>   
> -static __always_inline int trylock_clear_pending(struct qspinlock *lock)
> +static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
>   {
> -	int val = atomic_read(&lock->val);
> -
> -	for (;;) {
> -		int old, new;
> -
> -		if (val  & _Q_LOCKED_MASK)
> -			break;
> +	int old, new;
>   
> +	old = atomic_read(&lock->val);
> +	do {
> +		if (old & _Q_LOCKED_MASK)
> +			return false;
>   		/*
>   		 * Try to clear pending bit & set locked bit
>   		 */
> -		old = val;
> -		new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
> -		val = atomic_cmpxchg_acquire(&lock->val, old, new);
> +		new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
> +	} while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));
>   
> -		if (val == old)
> -			return 1;
> -	}
> -	return 0;
> +	return true;
>   }
>   #endif /* _Q_PENDING_BITS == 8 */
>   
Reviewed-by: Waiman Long <longman@redhat.com>


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h
  2024-03-25 14:09 ` [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h Uros Bizjak
@ 2024-03-28  2:22   ` Waiman Long
  2024-04-11 13:24   ` Ingo Molnar
  1 sibling, 0 replies; 11+ messages in thread
From: Waiman Long @ 2024-03-28  2:22 UTC (permalink / raw)
  To: Uros Bizjak, linux-kernel
  Cc: Peter Zijlstra, Ingo Molnar, Will Deacon, Boqun Feng


On 3/25/24 10:09, Uros Bizjak wrote:
> Use try_cmpxchg(*ptr, &old, new) instead of
> cmpxchg(*ptr, old, new) == old in qspinlock_paravirt.h
> x86 CMPXCHG instruction returns success in ZF flag, so
> this change saves a compare after cmpxchg.
>
> No functional change intended.
>
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Waiman Long <longman@redhat.com>
> Cc: Boqun Feng <boqun.feng@gmail.com>
> ---
>   kernel/locking/qspinlock_paravirt.h | 16 ++++++++--------
>   1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
> index 77ba80bd95f9..3db5f811260f 100644
> --- a/kernel/locking/qspinlock_paravirt.h
> +++ b/kernel/locking/qspinlock_paravirt.h
> @@ -86,9 +86,10 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
>   	 */
>   	for (;;) {
>   		int val = atomic_read(&lock->val);
> +		u8 old = 0;
>   
>   		if (!(val & _Q_LOCKED_PENDING_MASK) &&
> -		   (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
> +		    try_cmpxchg_acquire(&lock->locked, &old, _Q_LOCKED_VAL)) {
>   			lockevent_inc(pv_lock_stealing);
>   			return true;
>   		}
> @@ -211,8 +212,9 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
>   	int hopcnt = 0;
>   
>   	for_each_hash_entry(he, offset, hash) {
> +		struct qspinlock *old = NULL;
>   		hopcnt++;
> -		if (!cmpxchg(&he->lock, NULL, lock)) {
> +		if (try_cmpxchg(&he->lock, &old, lock)) {
>   			WRITE_ONCE(he->node, node);
>   			lockevent_pv_hop(hopcnt);
>   			return &he->lock;
> @@ -355,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
>   static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
>   {
>   	struct pv_node *pn = (struct pv_node *)node;
> -
> +	enum vcpu_state old = vcpu_halted;
>   	/*
>   	 * If the vCPU is indeed halted, advance its state to match that of
>   	 * pv_wait_node(). If OTOH this fails, the vCPU was running and will
> @@ -372,8 +374,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
>   	 * subsequent writes.
>   	 */
>   	smp_mb__before_atomic();
> -	if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
> -	    != vcpu_halted)
> +	if (!try_cmpxchg_relaxed(&pn->state, &old, vcpu_hashed))
>   		return;
>   
>   	/*
> @@ -541,15 +542,14 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
>   #ifndef __pv_queued_spin_unlock
>   __visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock)
>   {
> -	u8 locked;
> +	u8 locked = _Q_LOCKED_VAL;
>   
>   	/*
>   	 * We must not unlock if SLOW, because in that case we must first
>   	 * unhash. Otherwise it would be possible to have multiple @lock
>   	 * entries, which would be BAD.
>   	 */
> -	locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
> -	if (likely(locked == _Q_LOCKED_VAL))
> +	if (try_cmpxchg_release(&lock->locked, &locked, 0);
>   		return;
>   
>   	__pv_queued_spin_unlock_slowpath(lock, locked);
Reviewed-by: Waiman Long <longman@redhat.com>


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h
  2024-03-25 14:09 ` [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h Uros Bizjak
  2024-03-28  2:22   ` Waiman Long
@ 2024-04-11 13:24   ` Ingo Molnar
  2024-04-11 13:35     ` Uros Bizjak
  1 sibling, 1 reply; 11+ messages in thread
From: Ingo Molnar @ 2024-04-11 13:24 UTC (permalink / raw)
  To: Uros Bizjak
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar, Will Deacon,
	Waiman Long, Boqun Feng


* Uros Bizjak <ubizjak@gmail.com> wrote:

> -	locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
> -	if (likely(locked == _Q_LOCKED_VAL))
> +	if (try_cmpxchg_release(&lock->locked, &locked, 0);
>  		return;                                   ^------------ ???

This doesn't appear to be a particularly well-tested patch. ;-)

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [tip: locking/core] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()
  2024-03-25 14:09 [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() Uros Bizjak
  2024-03-25 14:09 ` [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h Uros Bizjak
  2024-03-28  1:52 ` [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() Waiman Long
@ 2024-04-11 13:33 ` tip-bot2 for Uros Bizjak
  2024-04-11 16:31   ` Linus Torvalds
  2024-04-12  9:46 ` [tip: locking/core] " tip-bot2 for Uros Bizjak
  3 siblings, 1 reply; 11+ messages in thread
From: tip-bot2 for Uros Bizjak @ 2024-04-11 13:33 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Uros Bizjak, Ingo Molnar, Waiman Long, Linus Torvalds, x86,
	linux-kernel

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     7316aec453e4f5cb7bd0d7b856c01ed71d64eafc
Gitweb:        https://git.kernel.org/tip/7316aec453e4f5cb7bd0d7b856c01ed71d64eafc
Author:        Uros Bizjak <ubizjak@gmail.com>
AuthorDate:    Mon, 25 Mar 2024 15:09:32 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Thu, 11 Apr 2024 15:19:08 +02:00

locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()

Use try_cmpxchg_acquire(*ptr, &old, new) instead of
cmpxchg_relaxed(*ptr, old, new) == old in trylock_clear_pending().

x86 CMPXCHG instruction returns success in ZF flag, so this change
saves a compare after CMPXCHG.

Also change the return type of the function to bool.

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20240325140943.815051-1-ubizjak@gmail.com
---
 kernel/locking/qspinlock_paravirt.h | 31 +++++++++++-----------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 169950f..77ba80b 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -116,11 +116,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
  * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
  * lock just to be sure that it will get it.
  */
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
 {
+	u16 old = _Q_PENDING_VAL;
+
 	return !READ_ONCE(lock->locked) &&
-	       (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
-				_Q_LOCKED_VAL) == _Q_PENDING_VAL);
+	       try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
 }
 #else /* _Q_PENDING_BITS == 8 */
 static __always_inline void set_pending(struct qspinlock *lock)
@@ -128,27 +129,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
 	atomic_or(_Q_PENDING_VAL, &lock->val);
 }
 
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
 {
-	int val = atomic_read(&lock->val);
-
-	for (;;) {
-		int old, new;
-
-		if (val  & _Q_LOCKED_MASK)
-			break;
+	int old, new;
 
+	old = atomic_read(&lock->val);
+	do {
+		if (old & _Q_LOCKED_MASK)
+			return false;
 		/*
 		 * Try to clear pending bit & set locked bit
 		 */
-		old = val;
-		new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
-		val = atomic_cmpxchg_acquire(&lock->val, old, new);
+		new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+	} while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));
 
-		if (val == old)
-			return 1;
-	}
-	return 0;
+	return true;
 }
 #endif /* _Q_PENDING_BITS == 8 */
 

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h
  2024-04-11 13:24   ` Ingo Molnar
@ 2024-04-11 13:35     ` Uros Bizjak
  2024-04-11 19:08       ` Uros Bizjak
  0 siblings, 1 reply; 11+ messages in thread
From: Uros Bizjak @ 2024-04-11 13:35 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar, Will Deacon,
	Waiman Long, Boqun Feng

On Thu, Apr 11, 2024 at 3:24 PM Ingo Molnar <mingo@kernel.org> wrote:
>
>
> * Uros Bizjak <ubizjak@gmail.com> wrote:
>
> > -     locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
> > -     if (likely(locked == _Q_LOCKED_VAL))
> > +     if (try_cmpxchg_release(&lock->locked, &locked, 0);
> >               return;                                   ^------------ ???
>
> This doesn't appear to be a particularly well-tested patch. ;-)

Ouch, embarrassing... oh it is a generic function, conditionally compiled with

#ifndef __pv_queued_spin_lock
#endif

and x86 defines its own function ...

I concentrated on different settings of _Q_PENDING_BITS and the above
slipped through.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [tip: locking/core] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()
  2024-04-11 13:33 ` [tip: locking/core] " tip-bot2 for Uros Bizjak
@ 2024-04-11 16:31   ` Linus Torvalds
  2024-04-12  9:42     ` [PATCH v2] " Ingo Molnar
  0 siblings, 1 reply; 11+ messages in thread
From: Linus Torvalds @ 2024-04-11 16:31 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-tip-commits, Uros Bizjak, Ingo Molnar, Waiman Long, x86

On Thu, 11 Apr 2024 at 06:33, tip-bot2 for Uros Bizjak
<tip-bot2@linutronix.de> wrote:
>
> Use try_cmpxchg_acquire(*ptr, &old, new) instead of
> cmpxchg_relaxed(*ptr, old, new) == old in trylock_clear_pending().

The above commit message is horribly confusing and wrong.

I was going "that's not right", because it says "use acquire instead
of relaxed" memory ordering, and then goes on to say "No functional
change intended".

But it turns out the *code* was always acquire, and it's only the
commit message that is wrong, presumably due to a bit too much
cut-and-paste.

But please fix the commit message, and use the right memory ordering
in the explanations too.

            Linus

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h
  2024-04-11 13:35     ` Uros Bizjak
@ 2024-04-11 19:08       ` Uros Bizjak
  0 siblings, 0 replies; 11+ messages in thread
From: Uros Bizjak @ 2024-04-11 19:08 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar, Will Deacon,
	Waiman Long, Boqun Feng

On Thu, Apr 11, 2024 at 3:35 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Thu, Apr 11, 2024 at 3:24 PM Ingo Molnar <mingo@kernel.org> wrote:
> >
> >
> > * Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > > -     locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
> > > -     if (likely(locked == _Q_LOCKED_VAL))
> > > +     if (try_cmpxchg_release(&lock->locked, &locked, 0);
> > >               return;                                   ^------------ ???
> >
> > This doesn't appear to be a particularly well-tested patch. ;-)
>
> Ouch, embarrassing... oh it is a generic function, conditionally compiled with
>
> #ifndef __pv_queued_spin_lock
> #endif
>
> and x86 defines its own function ...

Looking at the assembly of the fixed function, it looks that improved
generic function is better than x86_64 special asm:

This is the new generic function:

0000000000000750 <__pv_queued_spin_unlock>:
 750:    f3 0f 1e fa              endbr64
 754:    b8 01 00 00 00           mov    $0x1,%eax
 759:    31 d2                    xor    %edx,%edx
 75b:    f0 0f b0 17              lock cmpxchg %dl,(%rdi)
 75f:    75 05                    jne    766 <__pv_queued_spin_unlock+0x16>
 761:    e9 00 00 00 00           jmp    766 <__pv_queued_spin_unlock+0x16>
            762: R_X86_64_PLT32    __x86_return_thunk-0x4
 766:    0f b6 f0                 movzbl %al,%esi
 769:    e9 02 ff ff ff           jmp    670 <__pv_queued_spin_unlock_slowpath>

and  the x86_64 asm version:

0000000000000050 <__raw_callee_save___pv_queued_spin_unlock>:
  50:    f3 0f 1e fa              endbr64
  54:    52                       push   %rdx
  55:    b8 01 00 00 00           mov    $0x1,%eax
  5a:    31 d2                    xor    %edx,%edx
  5c:    f0 0f b0 17              lock cmpxchg %dl,(%rdi)
  60:    3c 01                    cmp    $0x1,%al
  62:    75 06                    jne    6a <.slowpath>
  64:    5a                       pop    %rdx
  65:    e9 00 00 00 00           jmp    6a <.slowpath>

I didn't investigate slowpath, but the generic fast-path part is
certainly better than x86_64 special asm.

Uros.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()
  2024-04-11 16:31   ` Linus Torvalds
@ 2024-04-12  9:42     ` Ingo Molnar
  0 siblings, 0 replies; 11+ messages in thread
From: Ingo Molnar @ 2024-04-12  9:42 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, linux-tip-commits, Uros Bizjak, Waiman Long, x86


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Thu, 11 Apr 2024 at 06:33, tip-bot2 for Uros Bizjak
> <tip-bot2@linutronix.de> wrote:
> >
> > Use try_cmpxchg_acquire(*ptr, &old, new) instead of
> > cmpxchg_relaxed(*ptr, old, new) == old in trylock_clear_pending().
> 
> The above commit message is horribly confusing and wrong.
> 
> I was going "that's not right", because it says "use acquire instead
> of relaxed" memory ordering, and then goes on to say "No functional
> change intended".
> 
> But it turns out the *code* was always acquire, and it's only the
> commit message that is wrong, presumably due to a bit too much
> cut-and-paste.

Yeah, the replacement is cmpxchg_acquire() => try_cmpxchg_acquire(), with 
no change to memory ordering.

> But please fix the commit message, and use the right memory ordering
> in the explanations too.

Done, find below the new patch, which a hopefully better commit message.

I also added your Reviewed-by tag optimistically :)

Thanks,

	Ingo

===========================>
From: Uros Bizjak <ubizjak@gmail.com>
Date: Mon, 25 Mar 2024 15:09:32 +0100
Subject: [PATCH] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()

Replace this pattern in trylock_clear_pending():

    cmpxchg_acquire(*ptr, old, new) == old

... with the simpler and faster:

    try_cmpxchg_acquire(*ptr, &old, new)

The x86 CMPXCHG instruction returns success in the ZF flag, so this change
saves a compare after the CMPXCHG.

Also change the return type of the function to bool and streamline
the control flow in the _Q_PENDING_BITS == 8 variant a bit.

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Reviewed-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20240325140943.815051-1-ubizjak@gmail.com
---
 kernel/locking/qspinlock_paravirt.h | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 169950fe1aad..77ba80bd95f9 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -116,11 +116,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
  * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
  * lock just to be sure that it will get it.
  */
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
 {
+	u16 old = _Q_PENDING_VAL;
+
 	return !READ_ONCE(lock->locked) &&
-	       (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
-				_Q_LOCKED_VAL) == _Q_PENDING_VAL);
+	       try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
 }
 #else /* _Q_PENDING_BITS == 8 */
 static __always_inline void set_pending(struct qspinlock *lock)
@@ -128,27 +129,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
 	atomic_or(_Q_PENDING_VAL, &lock->val);
 }
 
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
 {
-	int val = atomic_read(&lock->val);
-
-	for (;;) {
-		int old, new;
-
-		if (val  & _Q_LOCKED_MASK)
-			break;
+	int old, new;
 
+	old = atomic_read(&lock->val);
+	do {
+		if (old & _Q_LOCKED_MASK)
+			return false;
 		/*
 		 * Try to clear pending bit & set locked bit
 		 */
-		old = val;
-		new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
-		val = atomic_cmpxchg_acquire(&lock->val, old, new);
+		new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+	} while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));
 
-		if (val == old)
-			return 1;
-	}
-	return 0;
+	return true;
 }
 #endif /* _Q_PENDING_BITS == 8 */
 

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [tip: locking/core] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()
  2024-03-25 14:09 [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() Uros Bizjak
                   ` (2 preceding siblings ...)
  2024-04-11 13:33 ` [tip: locking/core] " tip-bot2 for Uros Bizjak
@ 2024-04-12  9:46 ` tip-bot2 for Uros Bizjak
  3 siblings, 0 replies; 11+ messages in thread
From: tip-bot2 for Uros Bizjak @ 2024-04-12  9:46 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Uros Bizjak, Ingo Molnar, Waiman Long, Linus Torvalds, x86,
	linux-kernel

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     6a97734f2222e0352f1900e3eb3167e9069b91bb
Gitweb:        https://git.kernel.org/tip/6a97734f2222e0352f1900e3eb3167e9069b91bb
Author:        Uros Bizjak <ubizjak@gmail.com>
AuthorDate:    Mon, 25 Mar 2024 15:09:32 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Fri, 12 Apr 2024 11:40:51 +02:00

locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()

Replace this pattern in trylock_clear_pending():

    cmpxchg_acquire(*ptr, old, new) == old

... with the simpler and faster:

    try_cmpxchg_acquire(*ptr, &old, new)

The x86 CMPXCHG instruction returns success in the ZF flag, so this change
saves a compare after the CMPXCHG.

Also change the return type of the function to bool and streamline
the control flow in the _Q_PENDING_BITS == 8 variant a bit.

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Reviewed-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20240325140943.815051-1-ubizjak@gmail.com
---
 kernel/locking/qspinlock_paravirt.h | 31 +++++++++++-----------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 169950f..77ba80b 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -116,11 +116,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
  * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
  * lock just to be sure that it will get it.
  */
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
 {
+	u16 old = _Q_PENDING_VAL;
+
 	return !READ_ONCE(lock->locked) &&
-	       (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
-				_Q_LOCKED_VAL) == _Q_PENDING_VAL);
+	       try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
 }
 #else /* _Q_PENDING_BITS == 8 */
 static __always_inline void set_pending(struct qspinlock *lock)
@@ -128,27 +129,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
 	atomic_or(_Q_PENDING_VAL, &lock->val);
 }
 
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
 {
-	int val = atomic_read(&lock->val);
-
-	for (;;) {
-		int old, new;
-
-		if (val  & _Q_LOCKED_MASK)
-			break;
+	int old, new;
 
+	old = atomic_read(&lock->val);
+	do {
+		if (old & _Q_LOCKED_MASK)
+			return false;
 		/*
 		 * Try to clear pending bit & set locked bit
 		 */
-		old = val;
-		new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
-		val = atomic_cmpxchg_acquire(&lock->val, old, new);
+		new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+	} while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));
 
-		if (val == old)
-			return 1;
-	}
-	return 0;
+	return true;
 }
 #endif /* _Q_PENDING_BITS == 8 */
 

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2024-04-12  9:46 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-25 14:09 [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() Uros Bizjak
2024-03-25 14:09 ` [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h Uros Bizjak
2024-03-28  2:22   ` Waiman Long
2024-04-11 13:24   ` Ingo Molnar
2024-04-11 13:35     ` Uros Bizjak
2024-04-11 19:08       ` Uros Bizjak
2024-03-28  1:52 ` [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() Waiman Long
2024-04-11 13:33 ` [tip: locking/core] " tip-bot2 for Uros Bizjak
2024-04-11 16:31   ` Linus Torvalds
2024-04-12  9:42     ` [PATCH v2] " Ingo Molnar
2024-04-12  9:46 ` [tip: locking/core] " tip-bot2 for Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox