All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()
@ 2024-03-25 14:09 Uros Bizjak
  2024-03-25 14:09 ` [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h Uros Bizjak
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Uros Bizjak @ 2024-03-25 14:09 UTC (permalink / raw)
  To: linux-kernel
  Cc: Uros Bizjak, Peter Zijlstra, Ingo Molnar, Will Deacon,
	Waiman Long, Boqun Feng

Use try_cmpxchg_acquire(*ptr, &old, new) instead of
cmpxchg_relaxed(*ptr, old, new) == old in trylock_clear_pending().
x86 CMPXCHG instruction returns success in ZF flag, so this change
saves a compare after cmpxchg.

Also change the return type of the function to bool.

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
---
 kernel/locking/qspinlock_paravirt.h | 31 ++++++++++++-----------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 169950fe1aad..77ba80bd95f9 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -116,11 +116,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
  * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
  * lock just to be sure that it will get it.
  */
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
 {
+	u16 old = _Q_PENDING_VAL;
+
 	return !READ_ONCE(lock->locked) &&
-	       (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
-				_Q_LOCKED_VAL) == _Q_PENDING_VAL);
+	       try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
 }
 #else /* _Q_PENDING_BITS == 8 */
 static __always_inline void set_pending(struct qspinlock *lock)
@@ -128,27 +129,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
 	atomic_or(_Q_PENDING_VAL, &lock->val);
 }
 
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
 {
-	int val = atomic_read(&lock->val);
-
-	for (;;) {
-		int old, new;
-
-		if (val  & _Q_LOCKED_MASK)
-			break;
+	int old, new;
 
+	old = atomic_read(&lock->val);
+	do {
+		if (old & _Q_LOCKED_MASK)
+			return false;
 		/*
 		 * Try to clear pending bit & set locked bit
 		 */
-		old = val;
-		new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
-		val = atomic_cmpxchg_acquire(&lock->val, old, new);
+		new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+	} while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));
 
-		if (val == old)
-			return 1;
-	}
-	return 0;
+	return true;
 }
 #endif /* _Q_PENDING_BITS == 8 */
 
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h
  2024-03-25 14:09 [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() Uros Bizjak
@ 2024-03-25 14:09 ` Uros Bizjak
  2024-03-28  2:22   ` Waiman Long
  2024-04-11 13:24   ` Ingo Molnar
  2024-03-28  1:52 ` [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() Waiman Long
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 11+ messages in thread
From: Uros Bizjak @ 2024-03-25 14:09 UTC (permalink / raw)
  To: linux-kernel
  Cc: Uros Bizjak, Peter Zijlstra, Ingo Molnar, Will Deacon,
	Waiman Long, Boqun Feng

Use try_cmpxchg(*ptr, &old, new) instead of
cmpxchg(*ptr, old, new) == old in qspinlock_paravirt.h
x86 CMPXCHG instruction returns success in ZF flag, so
this change saves a compare after cmpxchg.

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
---
 kernel/locking/qspinlock_paravirt.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 77ba80bd95f9..3db5f811260f 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -86,9 +86,10 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
 	 */
 	for (;;) {
 		int val = atomic_read(&lock->val);
+		u8 old = 0;
 
 		if (!(val & _Q_LOCKED_PENDING_MASK) &&
-		   (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
+		    try_cmpxchg_acquire(&lock->locked, &old, _Q_LOCKED_VAL)) {
 			lockevent_inc(pv_lock_stealing);
 			return true;
 		}
@@ -211,8 +212,9 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
 	int hopcnt = 0;
 
 	for_each_hash_entry(he, offset, hash) {
+		struct qspinlock *old = NULL;
 		hopcnt++;
-		if (!cmpxchg(&he->lock, NULL, lock)) {
+		if (try_cmpxchg(&he->lock, &old, lock)) {
 			WRITE_ONCE(he->node, node);
 			lockevent_pv_hop(hopcnt);
 			return &he->lock;
@@ -355,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
 static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
 {
 	struct pv_node *pn = (struct pv_node *)node;
-
+	enum vcpu_state old = vcpu_halted;
 	/*
 	 * If the vCPU is indeed halted, advance its state to match that of
 	 * pv_wait_node(). If OTOH this fails, the vCPU was running and will
@@ -372,8 +374,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
 	 * subsequent writes.
 	 */
 	smp_mb__before_atomic();
-	if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
-	    != vcpu_halted)
+	if (!try_cmpxchg_relaxed(&pn->state, &old, vcpu_hashed))
 		return;
 
 	/*
@@ -541,15 +542,14 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
 #ifndef __pv_queued_spin_unlock
 __visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock)
 {
-	u8 locked;
+	u8 locked = _Q_LOCKED_VAL;
 
 	/*
 	 * We must not unlock if SLOW, because in that case we must first
 	 * unhash. Otherwise it would be possible to have multiple @lock
 	 * entries, which would be BAD.
 	 */
-	locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
-	if (likely(locked == _Q_LOCKED_VAL))
+	if (try_cmpxchg_release(&lock->locked, &locked, 0);
 		return;
 
 	__pv_queued_spin_unlock_slowpath(lock, locked);
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()
  2024-03-25 14:09 [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() Uros Bizjak
  2024-03-25 14:09 ` [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h Uros Bizjak
@ 2024-03-28  1:52 ` Waiman Long
  2024-04-11 13:33 ` [tip: locking/core] " tip-bot2 for Uros Bizjak
  2024-04-12  9:46 ` [tip: locking/core] " tip-bot2 for Uros Bizjak
  3 siblings, 0 replies; 11+ messages in thread
From: Waiman Long @ 2024-03-28  1:52 UTC (permalink / raw)
  To: Uros Bizjak, linux-kernel
  Cc: Peter Zijlstra, Ingo Molnar, Will Deacon, Boqun Feng

On 3/25/24 10:09, Uros Bizjak wrote:
> Use try_cmpxchg_acquire(*ptr, &old, new) instead of
> cmpxchg_relaxed(*ptr, old, new) == old in trylock_clear_pending().
> x86 CMPXCHG instruction returns success in ZF flag, so this change
> saves a compare after cmpxchg.
>
> Also change the return type of the function to bool.
>
> No functional change intended.
>
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Waiman Long <longman@redhat.com>
> Cc: Boqun Feng <boqun.feng@gmail.com>
> ---
>   kernel/locking/qspinlock_paravirt.h | 31 ++++++++++++-----------------
>   1 file changed, 13 insertions(+), 18 deletions(-)
>
> diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
> index 169950fe1aad..77ba80bd95f9 100644
> --- a/kernel/locking/qspinlock_paravirt.h
> +++ b/kernel/locking/qspinlock_paravirt.h
> @@ -116,11 +116,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
>    * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
>    * lock just to be sure that it will get it.
>    */
> -static __always_inline int trylock_clear_pending(struct qspinlock *lock)
> +static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
>   {
> +	u16 old = _Q_PENDING_VAL;
> +
>   	return !READ_ONCE(lock->locked) &&
> -	       (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
> -				_Q_LOCKED_VAL) == _Q_PENDING_VAL);
> +	       try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
>   }
>   #else /* _Q_PENDING_BITS == 8 */
>   static __always_inline void set_pending(struct qspinlock *lock)
> @@ -128,27 +129,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
>   	atomic_or(_Q_PENDING_VAL, &lock->val);
>   }
>   
> -static __always_inline int trylock_clear_pending(struct qspinlock *lock)
> +static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
>   {
> -	int val = atomic_read(&lock->val);
> -
> -	for (;;) {
> -		int old, new;
> -
> -		if (val  & _Q_LOCKED_MASK)
> -			break;
> +	int old, new;
>   
> +	old = atomic_read(&lock->val);
> +	do {
> +		if (old & _Q_LOCKED_MASK)
> +			return false;
>   		/*
>   		 * Try to clear pending bit & set locked bit
>   		 */
> -		old = val;
> -		new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
> -		val = atomic_cmpxchg_acquire(&lock->val, old, new);
> +		new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
> +	} while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));
>   
> -		if (val == old)
> -			return 1;
> -	}
> -	return 0;
> +	return true;
>   }
>   #endif /* _Q_PENDING_BITS == 8 */
>   
Reviewed-by: Waiman Long <longman@redhat.com>


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h
  2024-03-25 14:09 ` [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h Uros Bizjak
@ 2024-03-28  2:22   ` Waiman Long
  2024-04-11 13:24   ` Ingo Molnar
  1 sibling, 0 replies; 11+ messages in thread
From: Waiman Long @ 2024-03-28  2:22 UTC (permalink / raw)
  To: Uros Bizjak, linux-kernel
  Cc: Peter Zijlstra, Ingo Molnar, Will Deacon, Boqun Feng


On 3/25/24 10:09, Uros Bizjak wrote:
> Use try_cmpxchg(*ptr, &old, new) instead of
> cmpxchg(*ptr, old, new) == old in qspinlock_paravirt.h
> x86 CMPXCHG instruction returns success in ZF flag, so
> this change saves a compare after cmpxchg.
>
> No functional change intended.
>
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Waiman Long <longman@redhat.com>
> Cc: Boqun Feng <boqun.feng@gmail.com>
> ---
>   kernel/locking/qspinlock_paravirt.h | 16 ++++++++--------
>   1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
> index 77ba80bd95f9..3db5f811260f 100644
> --- a/kernel/locking/qspinlock_paravirt.h
> +++ b/kernel/locking/qspinlock_paravirt.h
> @@ -86,9 +86,10 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
>   	 */
>   	for (;;) {
>   		int val = atomic_read(&lock->val);
> +		u8 old = 0;
>   
>   		if (!(val & _Q_LOCKED_PENDING_MASK) &&
> -		   (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
> +		    try_cmpxchg_acquire(&lock->locked, &old, _Q_LOCKED_VAL)) {
>   			lockevent_inc(pv_lock_stealing);
>   			return true;
>   		}
> @@ -211,8 +212,9 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
>   	int hopcnt = 0;
>   
>   	for_each_hash_entry(he, offset, hash) {
> +		struct qspinlock *old = NULL;
>   		hopcnt++;
> -		if (!cmpxchg(&he->lock, NULL, lock)) {
> +		if (try_cmpxchg(&he->lock, &old, lock)) {
>   			WRITE_ONCE(he->node, node);
>   			lockevent_pv_hop(hopcnt);
>   			return &he->lock;
> @@ -355,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
>   static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
>   {
>   	struct pv_node *pn = (struct pv_node *)node;
> -
> +	enum vcpu_state old = vcpu_halted;
>   	/*
>   	 * If the vCPU is indeed halted, advance its state to match that of
>   	 * pv_wait_node(). If OTOH this fails, the vCPU was running and will
> @@ -372,8 +374,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
>   	 * subsequent writes.
>   	 */
>   	smp_mb__before_atomic();
> -	if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
> -	    != vcpu_halted)
> +	if (!try_cmpxchg_relaxed(&pn->state, &old, vcpu_hashed))
>   		return;
>   
>   	/*
> @@ -541,15 +542,14 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
>   #ifndef __pv_queued_spin_unlock
>   __visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock)
>   {
> -	u8 locked;
> +	u8 locked = _Q_LOCKED_VAL;
>   
>   	/*
>   	 * We must not unlock if SLOW, because in that case we must first
>   	 * unhash. Otherwise it would be possible to have multiple @lock
>   	 * entries, which would be BAD.
>   	 */
> -	locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
> -	if (likely(locked == _Q_LOCKED_VAL))
> +	if (try_cmpxchg_release(&lock->locked, &locked, 0);
>   		return;
>   
>   	__pv_queued_spin_unlock_slowpath(lock, locked);
Reviewed-by: Waiman Long <longman@redhat.com>


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h
  2024-03-25 14:09 ` [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h Uros Bizjak
  2024-03-28  2:22   ` Waiman Long
@ 2024-04-11 13:24   ` Ingo Molnar
  2024-04-11 13:35     ` Uros Bizjak
  1 sibling, 1 reply; 11+ messages in thread
From: Ingo Molnar @ 2024-04-11 13:24 UTC (permalink / raw)
  To: Uros Bizjak
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar, Will Deacon,
	Waiman Long, Boqun Feng


* Uros Bizjak <ubizjak@gmail.com> wrote:

> -	locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
> -	if (likely(locked == _Q_LOCKED_VAL))
> +	if (try_cmpxchg_release(&lock->locked, &locked, 0);
>  		return;                                   ^------------ ???

This doesn't appear to be a particularly well-tested patch. ;-)

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [tip: locking/core] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()
  2024-03-25 14:09 [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() Uros Bizjak
  2024-03-25 14:09 ` [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h Uros Bizjak
  2024-03-28  1:52 ` [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() Waiman Long
@ 2024-04-11 13:33 ` tip-bot2 for Uros Bizjak
  2024-04-11 16:31   ` Linus Torvalds
  2024-04-12  9:46 ` [tip: locking/core] " tip-bot2 for Uros Bizjak
  3 siblings, 1 reply; 11+ messages in thread
From: tip-bot2 for Uros Bizjak @ 2024-04-11 13:33 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Uros Bizjak, Ingo Molnar, Waiman Long, Linus Torvalds, x86,
	linux-kernel

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     7316aec453e4f5cb7bd0d7b856c01ed71d64eafc
Gitweb:        https://git.kernel.org/tip/7316aec453e4f5cb7bd0d7b856c01ed71d64eafc
Author:        Uros Bizjak <ubizjak@gmail.com>
AuthorDate:    Mon, 25 Mar 2024 15:09:32 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Thu, 11 Apr 2024 15:19:08 +02:00

locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()

Use try_cmpxchg_acquire(*ptr, &old, new) instead of
cmpxchg_relaxed(*ptr, old, new) == old in trylock_clear_pending().

x86 CMPXCHG instruction returns success in ZF flag, so this change
saves a compare after CMPXCHG.

Also change the return type of the function to bool.

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20240325140943.815051-1-ubizjak@gmail.com
---
 kernel/locking/qspinlock_paravirt.h | 31 +++++++++++-----------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 169950f..77ba80b 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -116,11 +116,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
  * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
  * lock just to be sure that it will get it.
  */
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
 {
+	u16 old = _Q_PENDING_VAL;
+
 	return !READ_ONCE(lock->locked) &&
-	       (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
-				_Q_LOCKED_VAL) == _Q_PENDING_VAL);
+	       try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
 }
 #else /* _Q_PENDING_BITS == 8 */
 static __always_inline void set_pending(struct qspinlock *lock)
@@ -128,27 +129,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
 	atomic_or(_Q_PENDING_VAL, &lock->val);
 }
 
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
 {
-	int val = atomic_read(&lock->val);
-
-	for (;;) {
-		int old, new;
-
-		if (val  & _Q_LOCKED_MASK)
-			break;
+	int old, new;
 
+	old = atomic_read(&lock->val);
+	do {
+		if (old & _Q_LOCKED_MASK)
+			return false;
 		/*
 		 * Try to clear pending bit & set locked bit
 		 */
-		old = val;
-		new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
-		val = atomic_cmpxchg_acquire(&lock->val, old, new);
+		new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+	} while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));
 
-		if (val == old)
-			return 1;
-	}
-	return 0;
+	return true;
 }
 #endif /* _Q_PENDING_BITS == 8 */
 

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h
  2024-04-11 13:24   ` Ingo Molnar
@ 2024-04-11 13:35     ` Uros Bizjak
  2024-04-11 19:08       ` Uros Bizjak
  0 siblings, 1 reply; 11+ messages in thread
From: Uros Bizjak @ 2024-04-11 13:35 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar, Will Deacon,
	Waiman Long, Boqun Feng

On Thu, Apr 11, 2024 at 3:24 PM Ingo Molnar <mingo@kernel.org> wrote:
>
>
> * Uros Bizjak <ubizjak@gmail.com> wrote:
>
> > -     locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
> > -     if (likely(locked == _Q_LOCKED_VAL))
> > +     if (try_cmpxchg_release(&lock->locked, &locked, 0);
> >               return;                                   ^------------ ???
>
> This doesn't appear to be a particularly well-tested patch. ;-)

Ouch, embarrassing... oh it is a generic function, conditionally compiled with

#ifndef __pv_queued_spin_lock
#endif

and x86 defines its own function ...

I concentrated on different settings of _Q_PENDING_BITS and the above
slipped through.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [tip: locking/core] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()
  2024-04-11 13:33 ` [tip: locking/core] " tip-bot2 for Uros Bizjak
@ 2024-04-11 16:31   ` Linus Torvalds
  2024-04-12  9:42     ` [PATCH v2] " Ingo Molnar
  0 siblings, 1 reply; 11+ messages in thread
From: Linus Torvalds @ 2024-04-11 16:31 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-tip-commits, Uros Bizjak, Ingo Molnar, Waiman Long, x86

On Thu, 11 Apr 2024 at 06:33, tip-bot2 for Uros Bizjak
<tip-bot2@linutronix.de> wrote:
>
> Use try_cmpxchg_acquire(*ptr, &old, new) instead of
> cmpxchg_relaxed(*ptr, old, new) == old in trylock_clear_pending().

The above commit message is horribly confusing and wrong.

I was going "that's not right", because it says "use acquire instead
of relaxed" memory ordering, and then goes on to say "No functional
change intended".

But it turns out the *code* was always acquire, and it's only the
commit message that is wrong, presumably due to a bit too much
cut-and-paste.

But please fix the commit message, and use the right memory ordering
in the explanations too.

            Linus

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h
  2024-04-11 13:35     ` Uros Bizjak
@ 2024-04-11 19:08       ` Uros Bizjak
  0 siblings, 0 replies; 11+ messages in thread
From: Uros Bizjak @ 2024-04-11 19:08 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar, Will Deacon,
	Waiman Long, Boqun Feng

On Thu, Apr 11, 2024 at 3:35 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Thu, Apr 11, 2024 at 3:24 PM Ingo Molnar <mingo@kernel.org> wrote:
> >
> >
> > * Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > > -     locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
> > > -     if (likely(locked == _Q_LOCKED_VAL))
> > > +     if (try_cmpxchg_release(&lock->locked, &locked, 0);
> > >               return;                                   ^------------ ???
> >
> > This doesn't appear to be a particularly well-tested patch. ;-)
>
> Ouch, embarrassing... oh it is a generic function, conditionally compiled with
>
> #ifndef __pv_queued_spin_lock
> #endif
>
> and x86 defines its own function ...

Looking at the assembly of the fixed function, it looks that improved
generic function is better than x86_64 special asm:

This is the new generic function:

0000000000000750 <__pv_queued_spin_unlock>:
 750:    f3 0f 1e fa              endbr64
 754:    b8 01 00 00 00           mov    $0x1,%eax
 759:    31 d2                    xor    %edx,%edx
 75b:    f0 0f b0 17              lock cmpxchg %dl,(%rdi)
 75f:    75 05                    jne    766 <__pv_queued_spin_unlock+0x16>
 761:    e9 00 00 00 00           jmp    766 <__pv_queued_spin_unlock+0x16>
            762: R_X86_64_PLT32    __x86_return_thunk-0x4
 766:    0f b6 f0                 movzbl %al,%esi
 769:    e9 02 ff ff ff           jmp    670 <__pv_queued_spin_unlock_slowpath>

and  the x86_64 asm version:

0000000000000050 <__raw_callee_save___pv_queued_spin_unlock>:
  50:    f3 0f 1e fa              endbr64
  54:    52                       push   %rdx
  55:    b8 01 00 00 00           mov    $0x1,%eax
  5a:    31 d2                    xor    %edx,%edx
  5c:    f0 0f b0 17              lock cmpxchg %dl,(%rdi)
  60:    3c 01                    cmp    $0x1,%al
  62:    75 06                    jne    6a <.slowpath>
  64:    5a                       pop    %rdx
  65:    e9 00 00 00 00           jmp    6a <.slowpath>

I didn't investigate slowpath, but the generic fast-path part is
certainly better than x86_64 special asm.

Uros.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()
  2024-04-11 16:31   ` Linus Torvalds
@ 2024-04-12  9:42     ` Ingo Molnar
  0 siblings, 0 replies; 11+ messages in thread
From: Ingo Molnar @ 2024-04-12  9:42 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, linux-tip-commits, Uros Bizjak, Waiman Long, x86


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Thu, 11 Apr 2024 at 06:33, tip-bot2 for Uros Bizjak
> <tip-bot2@linutronix.de> wrote:
> >
> > Use try_cmpxchg_acquire(*ptr, &old, new) instead of
> > cmpxchg_relaxed(*ptr, old, new) == old in trylock_clear_pending().
> 
> The above commit message is horribly confusing and wrong.
> 
> I was going "that's not right", because it says "use acquire instead
> of relaxed" memory ordering, and then goes on to say "No functional
> change intended".
> 
> But it turns out the *code* was always acquire, and it's only the
> commit message that is wrong, presumably due to a bit too much
> cut-and-paste.

Yeah, the replacement is cmpxchg_acquire() => try_cmpxchg_acquire(), with 
no change to memory ordering.

> But please fix the commit message, and use the right memory ordering
> in the explanations too.

Done, find below the new patch, which a hopefully better commit message.

I also added your Reviewed-by tag optimistically :)

Thanks,

	Ingo

===========================>
From: Uros Bizjak <ubizjak@gmail.com>
Date: Mon, 25 Mar 2024 15:09:32 +0100
Subject: [PATCH] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()

Replace this pattern in trylock_clear_pending():

    cmpxchg_acquire(*ptr, old, new) == old

... with the simpler and faster:

    try_cmpxchg_acquire(*ptr, &old, new)

The x86 CMPXCHG instruction returns success in the ZF flag, so this change
saves a compare after the CMPXCHG.

Also change the return type of the function to bool and streamline
the control flow in the _Q_PENDING_BITS == 8 variant a bit.

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Reviewed-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20240325140943.815051-1-ubizjak@gmail.com
---
 kernel/locking/qspinlock_paravirt.h | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 169950fe1aad..77ba80bd95f9 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -116,11 +116,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
  * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
  * lock just to be sure that it will get it.
  */
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
 {
+	u16 old = _Q_PENDING_VAL;
+
 	return !READ_ONCE(lock->locked) &&
-	       (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
-				_Q_LOCKED_VAL) == _Q_PENDING_VAL);
+	       try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
 }
 #else /* _Q_PENDING_BITS == 8 */
 static __always_inline void set_pending(struct qspinlock *lock)
@@ -128,27 +129,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
 	atomic_or(_Q_PENDING_VAL, &lock->val);
 }
 
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
 {
-	int val = atomic_read(&lock->val);
-
-	for (;;) {
-		int old, new;
-
-		if (val  & _Q_LOCKED_MASK)
-			break;
+	int old, new;
 
+	old = atomic_read(&lock->val);
+	do {
+		if (old & _Q_LOCKED_MASK)
+			return false;
 		/*
 		 * Try to clear pending bit & set locked bit
 		 */
-		old = val;
-		new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
-		val = atomic_cmpxchg_acquire(&lock->val, old, new);
+		new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+	} while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));
 
-		if (val == old)
-			return 1;
-	}
-	return 0;
+	return true;
 }
 #endif /* _Q_PENDING_BITS == 8 */
 

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [tip: locking/core] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()
  2024-03-25 14:09 [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() Uros Bizjak
                   ` (2 preceding siblings ...)
  2024-04-11 13:33 ` [tip: locking/core] " tip-bot2 for Uros Bizjak
@ 2024-04-12  9:46 ` tip-bot2 for Uros Bizjak
  3 siblings, 0 replies; 11+ messages in thread
From: tip-bot2 for Uros Bizjak @ 2024-04-12  9:46 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Uros Bizjak, Ingo Molnar, Waiman Long, Linus Torvalds, x86,
	linux-kernel

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     6a97734f2222e0352f1900e3eb3167e9069b91bb
Gitweb:        https://git.kernel.org/tip/6a97734f2222e0352f1900e3eb3167e9069b91bb
Author:        Uros Bizjak <ubizjak@gmail.com>
AuthorDate:    Mon, 25 Mar 2024 15:09:32 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Fri, 12 Apr 2024 11:40:51 +02:00

locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()

Replace this pattern in trylock_clear_pending():

    cmpxchg_acquire(*ptr, old, new) == old

... with the simpler and faster:

    try_cmpxchg_acquire(*ptr, &old, new)

The x86 CMPXCHG instruction returns success in the ZF flag, so this change
saves a compare after the CMPXCHG.

Also change the return type of the function to bool and streamline
the control flow in the _Q_PENDING_BITS == 8 variant a bit.

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Reviewed-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20240325140943.815051-1-ubizjak@gmail.com
---
 kernel/locking/qspinlock_paravirt.h | 31 +++++++++++-----------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 169950f..77ba80b 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -116,11 +116,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
  * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
  * lock just to be sure that it will get it.
  */
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
 {
+	u16 old = _Q_PENDING_VAL;
+
 	return !READ_ONCE(lock->locked) &&
-	       (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
-				_Q_LOCKED_VAL) == _Q_PENDING_VAL);
+	       try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
 }
 #else /* _Q_PENDING_BITS == 8 */
 static __always_inline void set_pending(struct qspinlock *lock)
@@ -128,27 +129,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
 	atomic_or(_Q_PENDING_VAL, &lock->val);
 }
 
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
 {
-	int val = atomic_read(&lock->val);
-
-	for (;;) {
-		int old, new;
-
-		if (val  & _Q_LOCKED_MASK)
-			break;
+	int old, new;
 
+	old = atomic_read(&lock->val);
+	do {
+		if (old & _Q_LOCKED_MASK)
+			return false;
 		/*
 		 * Try to clear pending bit & set locked bit
 		 */
-		old = val;
-		new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
-		val = atomic_cmpxchg_acquire(&lock->val, old, new);
+		new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+	} while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));
 
-		if (val == old)
-			return 1;
-	}
-	return 0;
+	return true;
 }
 #endif /* _Q_PENDING_BITS == 8 */
 

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2024-04-12  9:46 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-25 14:09 [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() Uros Bizjak
2024-03-25 14:09 ` [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h Uros Bizjak
2024-03-28  2:22   ` Waiman Long
2024-04-11 13:24   ` Ingo Molnar
2024-04-11 13:35     ` Uros Bizjak
2024-04-11 19:08       ` Uros Bizjak
2024-03-28  1:52 ` [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending() Waiman Long
2024-04-11 13:33 ` [tip: locking/core] " tip-bot2 for Uros Bizjak
2024-04-11 16:31   ` Linus Torvalds
2024-04-12  9:42     ` [PATCH v2] " Ingo Molnar
2024-04-12  9:46 ` [tip: locking/core] " tip-bot2 for Uros Bizjak

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.