[tip:locking/core] locking/qrwlock: Make use of _{acquire|release|relaxed}() atomics

All of lore.kernel.org
 help / color / mirror / Atom feed

From: tip-bot for Will Deacon <tipbot@zytor.com>
To: linux-tip-commits@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, will.deacon@arm.com,
	tglx@linutronix.de, hpa@zytor.com, torvalds@linux-foundation.org,
	mingo@kernel.org, peterz@infradead.org
Subject: [tip:locking/core] locking/qrwlock: Make use of _{acquire|release|relaxed}() atomics
Date: Wed, 12 Aug 2015 05:36:07 -0700	[thread overview]
Message-ID: <tip-77e430e3e45662b696dc49aa53ea0f7ac63f2574@git.kernel.org> (raw)
In-Reply-To: <1438880084-18856-7-git-send-email-will.deacon@arm.com>

Commit-ID:  77e430e3e45662b696dc49aa53ea0f7ac63f2574
Gitweb:     http://git.kernel.org/tip/77e430e3e45662b696dc49aa53ea0f7ac63f2574
Author:     Will Deacon <will.deacon@arm.com>
AuthorDate: Thu, 6 Aug 2015 17:54:42 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 12 Aug 2015 11:59:06 +0200

locking/qrwlock: Make use of _{acquire|release|relaxed}() atomics

The qrwlock implementation is slightly heavy in its use of memory
barriers, mainly through the use of _cmpxchg() and _return() atomics, which
imply full barrier semantics.

This patch modifies the qrwlock code to use the more relaxed atomic
routines so that we can reduce the unnecessary barrier overhead on
weakly-ordered architectures.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-7-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/qrwlock.h | 13 ++++++-------
 kernel/locking/qrwlock.c      | 24 ++++++++++++------------
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index eb673dd..54a8e65 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -68,7 +68,7 @@ static inline int queued_read_trylock(struct qrwlock *lock)
 
 	cnts = atomic_read(&lock->cnts);
 	if (likely(!(cnts & _QW_WMASK))) {
-		cnts = (u32)atomic_add_return(_QR_BIAS, &lock->cnts);
+		cnts = (u32)atomic_add_return_acquire(_QR_BIAS, &lock->cnts);
 		if (likely(!(cnts & _QW_WMASK)))
 			return 1;
 		atomic_sub(_QR_BIAS, &lock->cnts);
@@ -89,8 +89,8 @@ static inline int queued_write_trylock(struct qrwlock *lock)
 	if (unlikely(cnts))
 		return 0;
 
-	return likely(atomic_cmpxchg(&lock->cnts,
-				     cnts, cnts | _QW_LOCKED) == cnts);
+	return likely(atomic_cmpxchg_acquire(&lock->cnts,
+					     cnts, cnts | _QW_LOCKED) == cnts);
 }
 /**
  * queued_read_lock - acquire read lock of a queue rwlock
@@ -100,7 +100,7 @@ static inline void queued_read_lock(struct qrwlock *lock)
 {
 	u32 cnts;
 
-	cnts = atomic_add_return(_QR_BIAS, &lock->cnts);
+	cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts);
 	if (likely(!(cnts & _QW_WMASK)))
 		return;
 
@@ -115,7 +115,7 @@ static inline void queued_read_lock(struct qrwlock *lock)
 static inline void queued_write_lock(struct qrwlock *lock)
 {
 	/* Optimize for the unfair lock case where the fair flag is 0. */
-	if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0)
+	if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
 		return;
 
 	queued_write_lock_slowpath(lock);
@@ -130,8 +130,7 @@ static inline void queued_read_unlock(struct qrwlock *lock)
 	/*
 	 * Atomically decrement the reader count
 	 */
-	smp_mb__before_atomic();
-	atomic_sub(_QR_BIAS, &lock->cnts);
+	(void)atomic_sub_return_release(_QR_BIAS, &lock->cnts);
 }
 
 /**
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index 6a7a3b8..f17a3e3 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -55,7 +55,7 @@ rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
 {
 	while ((cnts & _QW_WMASK) == _QW_LOCKED) {
 		cpu_relax_lowlatency();
-		cnts = smp_load_acquire((u32 *)&lock->cnts);
+		cnts = atomic_read_acquire(&lock->cnts);
 	}
 }
 
@@ -74,8 +74,9 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
 		 * Readers in interrupt context will get the lock immediately
 		 * if the writer is just waiting (not holding the lock yet).
 		 * The rspin_until_writer_unlock() function returns immediately
-		 * in this case. Otherwise, they will spin until the lock
-		 * is available without waiting in the queue.
+		 * in this case. Otherwise, they will spin (with ACQUIRE
+		 * semantics) until the lock is available without waiting in
+		 * the queue.
 		 */
 		rspin_until_writer_unlock(lock, cnts);
 		return;
@@ -88,12 +89,11 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
 	arch_spin_lock(&lock->lock);
 
 	/*
-	 * At the head of the wait queue now, increment the reader count
-	 * and wait until the writer, if it has the lock, has gone away.
-	 * At ths stage, it is not possible for a writer to remain in the
-	 * waiting state (_QW_WAITING). So there won't be any deadlock.
+	 * The ACQUIRE semantics of the following spinning code ensure
+	 * that accesses can't leak upwards out of our subsequent critical
+	 * section in the case that the lock is currently held for write.
 	 */
-	cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
+	cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts) - _QR_BIAS;
 	rspin_until_writer_unlock(lock, cnts);
 
 	/*
@@ -116,7 +116,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
 
 	/* Try to acquire the lock directly if no reader is present */
 	if (!atomic_read(&lock->cnts) &&
-	    (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0))
+	    (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0))
 		goto unlock;
 
 	/*
@@ -127,7 +127,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
 		struct __qrwlock *l = (struct __qrwlock *)lock;
 
 		if (!READ_ONCE(l->wmode) &&
-		   (cmpxchg(&l->wmode, 0, _QW_WAITING) == 0))
+		   (cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING) == 0))
 			break;
 
 		cpu_relax_lowlatency();
@@ -137,8 +137,8 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
 	for (;;) {
 		cnts = atomic_read(&lock->cnts);
 		if ((cnts == _QW_WAITING) &&
-		    (atomic_cmpxchg(&lock->cnts, _QW_WAITING,
-				    _QW_LOCKED) == _QW_WAITING))
+		    (atomic_cmpxchg_acquire(&lock->cnts, _QW_WAITING,
+					    _QW_LOCKED) == _QW_WAITING))
 			break;
 
 		cpu_relax_lowlatency();

next prev parent reply	other threads:[~2015-08-12 12:36 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-06 16:54 [PATCH v5 0/8] Add generic support for relaxed atomics Will Deacon
2015-08-06 16:54 ` [PATCH v5 1/8] atomics: add acquire/release/relaxed variants of some atomic operations Will Deacon
2015-08-12 12:34   ` [tip:locking/core] locking/atomics: Add _{acquire|release|relaxed}() " tip-bot for Will Deacon
2015-08-06 16:54 ` [PATCH v5 2/8] asm-generic: rework atomic-long.h to avoid bulk code duplication Will Deacon
2015-08-12 12:34   ` [tip:locking/core] locking, asm-generic: Rework " tip-bot for Will Deacon
2015-08-06 16:54 ` [PATCH v5 3/8] asm-generic: add relaxed/acquire/release variants for atomic_long_t Will Deacon
2015-08-12 12:35   ` [tip:locking/core] locking, asm-generic: Add _{relaxed|acquire|release}() variants for 'atomic_long_t' tip-bot for Will Deacon
2015-08-06 16:54 ` [PATCH v5 4/8] lockref: remove homebrew cmpxchg64_relaxed macro definition Will Deacon
2015-08-12 12:35   ` [tip:locking/core] locking/lockref: Remove homebrew cmpxchg64_relaxed() " tip-bot for Will Deacon
2015-08-06 16:54 ` [PATCH v5 5/8] locking/qrwlock: implement queue_write_unlock using smp_store_release Will Deacon
2015-08-12 12:35   ` [tip:locking/core] locking/qrwlock: Implement queue_write_unlock( ) using smp_store_release() tip-bot for Will Deacon
2015-08-06 16:54 ` [PATCH v5 6/8] locking/qrwlock: make use of acquire/release/relaxed atomics Will Deacon
2015-08-12 12:36   ` tip-bot for Will Deacon [this message]
2015-08-06 16:54 ` [PATCH v5 7/8] include/llist: use linux/atomic.h instead of asm/cmpxchg.h Will Deacon
2015-08-12 12:36   ` [tip:locking/core] locking, include/llist: Use linux/ atomic.h " tip-bot for Will Deacon
2015-08-06 16:54 ` [PATCH v5 8/8] ARM: atomics: define our SMP atomics in terms of _relaxed operations Will Deacon
2015-08-12 12:36   ` [tip:locking/core] locking, ARM, atomics: Define our SMP atomics in terms of _relaxed() operations tip-bot for Will Deacon
2015-08-07 15:13 ` [PATCH v5 0/8] Add generic support for relaxed atomics Peter Zijlstra
2015-08-07 16:29   ` Will Deacon
2015-08-26  4:28 ` Boqun Feng
2015-08-26 10:41   ` Will Deacon
2015-08-26 11:35     ` Boqun Feng

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:eb673dd dfblob:54a8e65 dfblob:6a7a3b8 dfblob:f17a3e3 )
 OR (
bs:"[tip:locking/core] locking/qrwlock: Make use of _{acquire|release|relaxed}() atomics" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=tip-77e430e3e45662b696dc49aa53ea0f7ac63f2574@git.kernel.org \
    --to=tipbot@zytor.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=will.deacon@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.