linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Michel Lespinasse <walken@google.com>
To: Rik van Riel <riel@redhat.com>, Ingo Molnar <mingo@redhat.com>,
	"Paul E. McKenney" <paulmck@us.ibm.com>,
	David Howells <dhowells@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Eric Dumazet <edumazet@google.com>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	Manfred Spraul <manfred@colorfullife.com>
Cc: linux-kernel@vger.kernel.org
Subject: [RFC PATCH 1/6] kernel: implement queue spinlock API
Date: Tue, 22 Jan 2013 15:13:30 -0800	[thread overview]
Message-ID: <1358896415-28569-2-git-send-email-walken@google.com> (raw)
In-Reply-To: <1358896415-28569-1-git-send-email-walken@google.com>

Introduce queue spinlocks, to be used in situations where it is desired
to have good throughput even under the occasional high-contention situation.

This initial implementation is based on the classic MCS spinlock,
because I think this represents the nicest API we can hope for in a
fast queue spinlock algorithm. The MCS spinlock has known limitations
in that it performs very well under high contention, but is not as
good as the ticket spinlock under low contention. I will address these
limitations in a later patch, which will propose an alternative,
higher performance implementation using (mostly) the same API.

Sample use case acquiring mystruct->lock:

  struct q_spinlock_node node;

  q_spin_lock(&mystruct->lock, &node);
  ...
  q_spin_unlock(&mystruct->lock, &node);

The q_spinlock_node must be used by a single lock holder / waiter and it must
stay allocated for the entire duration when the lock is held (or waited on).

Signed-off-by: Michel Lespinasse <walken@google.com>

---
 arch/x86/include/asm/queue_spinlock.h |   20 ++++++++
 include/asm-generic/queue_spinlock.h  |    7 +++
 include/linux/queue_spinlock.h        |   61 ++++++++++++++++++++++++
 kernel/Makefile                       |    2 +-
 kernel/queue_spinlock.c               |   82 +++++++++++++++++++++++++++++++++
 5 files changed, 171 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/include/asm/queue_spinlock.h
 create mode 100644 include/asm-generic/queue_spinlock.h
 create mode 100644 include/linux/queue_spinlock.h
 create mode 100644 kernel/queue_spinlock.c

diff --git a/arch/x86/include/asm/queue_spinlock.h b/arch/x86/include/asm/queue_spinlock.h
new file mode 100644
index 000000000000..655e01c1c2e8
--- /dev/null
+++ b/arch/x86/include/asm/queue_spinlock.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_QUEUE_SPINLOCK_H
+#define _ASM_QUEUE_SPINLOCK_H
+
+#if defined(CONFIG_X86_PPRO_FENCE) || defined(CONFIG_X86_OOSTORE)
+
+#define q_spin_lock_mb() mb()
+#define q_spin_unlock_mb() mb()
+
+#else
+
+/*
+ * x86 memory ordering only needs compiler barriers to implement
+ * acquire load / release store semantics
+ */
+#define q_spin_lock_mb() barrier()
+#define q_spin_unlock_mb() barrier()
+
+#endif
+
+#endif /* _ASM_QUEUE_SPINLOCK_H */
diff --git a/include/asm-generic/queue_spinlock.h b/include/asm-generic/queue_spinlock.h
new file mode 100644
index 000000000000..01fb9fbfb8dc
--- /dev/null
+++ b/include/asm-generic/queue_spinlock.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_GENERIC_QUEUE_SPINLOCK_H
+#define _ASM_GENERIC_QUEUE_SPINLOCK_H
+
+#define q_spin_lock_mb() mb()
+#define q_spin_unlock_mb() mb()
+
+#endif /* _ASM_GENERIC_QUEUE_SPINLOCK_H */
diff --git a/include/linux/queue_spinlock.h b/include/linux/queue_spinlock.h
new file mode 100644
index 000000000000..84b2470d232b
--- /dev/null
+++ b/include/linux/queue_spinlock.h
@@ -0,0 +1,61 @@
+#ifndef __LINUX_QUEUE_SPINLOCK_H
+#define __LINUX_QUEUE_SPINLOCK_H
+
+#include <linux/bug.h>
+
+struct q_spinlock_node {
+	struct q_spinlock_node *next;
+	bool wait;
+};
+
+struct q_spinlock {
+	struct q_spinlock_node *node;
+};
+
+#define __Q_SPIN_LOCK_UNLOCKED(name) { NULL }
+
+#ifdef CONFIG_SMP
+
+void q_spin_lock(struct q_spinlock *lock, struct q_spinlock_node *node);
+void q_spin_lock_bh(struct q_spinlock *lock, struct q_spinlock_node *node);
+void q_spin_unlock(struct q_spinlock *lock, struct q_spinlock_node *node);
+void q_spin_unlock_bh(struct q_spinlock *lock, struct q_spinlock_node *node);
+
+static inline void q_spin_lock_init(struct q_spinlock *lock)
+{
+	lock->node = NULL;
+}
+
+static inline void assert_q_spin_locked(struct q_spinlock *lock)
+{
+	BUG_ON(!lock->node);
+}
+
+static inline void assert_q_spin_unlocked(struct q_spinlock *lock)
+{
+	BUG_ON(lock->node);
+}
+
+#else /* !CONFIG_SMP */
+
+static inline void
+q_spin_lock(struct q_spinlock *lock, struct q_spinlock_node *node) {}
+
+static inline void
+q_spin_lock_bh(struct q_spinlock *lock, struct q_spinlock_node *node) {}
+
+static inline void
+q_spin_unlock(struct q_spinlock *lock, struct q_spinlock_node *node) {}
+
+static inline void
+q_spin_unlock_bh(struct q_spinlock *lock, struct q_spinlock_node *node) {}
+
+static inline void q_spin_lock_init(struct q_spinlock *lock) {}
+
+static inline void assert_q_spin_locked(struct q_spinlock *lock) {}
+
+static inline void assert_q_spin_unlocked(struct q_spinlock *lock) {}
+
+#endif /* CONFIG_SMP */
+
+#endif /* __LINUX_QUEUE_SPINLOCK_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index 86e3285ae7e5..ec91cfef4d4e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -49,7 +49,7 @@ obj-$(CONFIG_SMP) += smp.o
 ifneq ($(CONFIG_SMP),y)
 obj-y += up.o
 endif
-obj-$(CONFIG_SMP) += spinlock.o
+obj-$(CONFIG_SMP) += spinlock.o queue_spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
 obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
 obj-$(CONFIG_UID16) += uid16.o
diff --git a/kernel/queue_spinlock.c b/kernel/queue_spinlock.c
new file mode 100644
index 000000000000..20304f0bc852
--- /dev/null
+++ b/kernel/queue_spinlock.c
@@ -0,0 +1,82 @@
+#include <linux/queue_spinlock.h>
+#include <linux/export.h>
+#include <linux/preempt.h>
+#include <linux/bottom_half.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>	/* for cpu_relax() */
+#include <asm/queue_spinlock.h>
+
+static inline void
+__q_spin_lock(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	struct q_spinlock_node *prev;
+
+	node->next = NULL;
+	prev = xchg(&lock->node, node);
+	if (likely(!prev))
+		/*
+		 * Acquired the lock.
+		 * xchg() already includes a full memory barrier.
+		 */
+		return;
+
+	node->wait = true;
+	smp_wmb();
+	ACCESS_ONCE(prev->next) = node;
+	while (ACCESS_ONCE(node->wait))
+		cpu_relax();
+	q_spin_lock_mb();	/* guarantee acquire load semantics */
+}
+
+static inline void
+__q_spin_unlock(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	struct q_spinlock_node *next;
+	if (likely(!(next = ACCESS_ONCE(node->next)))) {
+		/*
+		 * Try to release lock.
+		 * cmpxchg() already includes a full memory barrier.
+		 */
+		if (likely(cmpxchg(&lock->node, node, NULL) == node))
+			return;
+
+		/*
+		 * We raced with __q_spin_lock().
+		 * Wait for next thread to be known.
+		 */
+		while (!(next = ACCESS_ONCE(node->next)))
+			cpu_relax();
+	}
+	q_spin_unlock_mb();	/* guarantee release store semantics */
+	ACCESS_ONCE(next->wait) = false;
+}
+
+void q_spin_lock(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	preempt_disable();
+	__q_spin_lock(lock, node);
+}
+EXPORT_SYMBOL(q_spin_lock);
+
+void q_spin_lock_bh(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	local_bh_disable();
+	preempt_disable();
+	__q_spin_lock(lock, node);
+}
+EXPORT_SYMBOL(q_spin_lock_bh);
+
+void q_spin_unlock(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	__q_spin_unlock(lock, node);
+	preempt_enable();
+}
+EXPORT_SYMBOL(q_spin_unlock);
+
+void q_spin_unlock_bh(struct q_spinlock *lock, struct q_spinlock_node *node)
+{
+	__q_spin_unlock(lock, node);
+	preempt_enable_no_resched();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+}
+EXPORT_SYMBOL(q_spin_unlock_bh);
-- 
1.7.7.3

  reply	other threads:[~2013-01-22 23:15 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-22 23:13 [RFC PATCH 0/6] fast queue spinlocks Michel Lespinasse
2013-01-22 23:13 ` Michel Lespinasse [this message]
2013-02-07 22:34   ` [RFC PATCH 1/6] kernel: implement queue spinlock API Paul E. McKenney
2013-02-07 22:56     ` Eric Dumazet
2013-02-07 23:53       ` Paul E. McKenney
2013-02-07 23:58       ` Michel Lespinasse
2013-02-08  0:03         ` Eric Dumazet
2013-02-08  0:40           ` Paul E. McKenney
2013-02-08  3:48             ` Michel Lespinasse
2013-02-08  4:36               ` Paul E. McKenney
2013-02-08  5:03                 ` Paul E. McKenney
2013-02-08  5:11                   ` Michel Lespinasse
2013-02-08 16:17                     ` Paul E. McKenney
2013-02-07 23:14     ` John Stultz
2013-02-08  0:35     ` Michel Lespinasse
2013-01-22 23:13 ` [RFC PATCH 2/6] net: convert qdisc busylock to use " Michel Lespinasse
2013-01-22 23:13 ` [RFC PATCH 3/6] ipc: convert ipc objects " Michel Lespinasse
2013-01-22 23:13 ` [RFC PATCH 4/6] kernel: faster queue spinlock implementation Michel Lespinasse
2013-01-23 21:55   ` Rik van Riel
2013-01-23 23:52     ` Michel Lespinasse
2013-01-24  0:18   ` Eric Dumazet
2013-01-25 20:30   ` [RFC PATCH 7/6] kernel: document fast queue spinlocks Rik van Riel
2013-01-22 23:13 ` [RFC PATCH 5/6] net: qdisc busylock updates to account for queue spinlock api change Michel Lespinasse
2013-01-22 23:13 ` [RFC PATCH 6/6] ipc: object locking " Michel Lespinasse
2013-01-22 23:17 ` [RFC PATCH 0/6] fast queue spinlocks Michel Lespinasse

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1358896415-28569-2-git-send-email-walken@google.com \
    --to=walken@google.com \
    --cc=dhowells@redhat.com \
    --cc=ebiederm@xmission.com \
    --cc=edumazet@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=manfred@colorfullife.com \
    --cc=mingo@redhat.com \
    --cc=paulmck@us.ibm.com \
    --cc=riel@redhat.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).