[PATCH 02/13] locking/qspinlock: inline mcs_spinlock functions into qspinlock

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Nicholas Piggin <npiggin@gmail.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Nicholas Piggin <npiggin@gmail.com>,
	Ingo Molnar <mingo@redhat.com>, Will Deacon <will@kernel.org>,
	Waiman Long <longman@redhat.com>,
	Boqun Feng <boqun.feng@gmail.com>,
	linux-kernel@vger.kernel.org
Subject: [PATCH 02/13] locking/qspinlock: inline mcs_spinlock functions into qspinlock
Date: Tue,  5 Jul 2022 00:38:09 +1000	[thread overview]
Message-ID: <20220704143820.3071004-3-npiggin@gmail.com> (raw)
In-Reply-To: <20220704143820.3071004-1-npiggin@gmail.com>

qspinlock uses mcs_spinlock for the struct type (.next, .locked, and the
misplaced .count), and arch_mcs_spin_{un}lock_contended(). These can be
trivially inlined into qspinlock, and the unused mcs_spinlock code
removed.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/alpha/include/asm/Kbuild       |   1 -
 arch/arc/include/asm/Kbuild         |   1 -
 arch/arm/include/asm/mcs_spinlock.h |  24 ------
 arch/arm64/include/asm/Kbuild       |   1 -
 arch/hexagon/include/asm/Kbuild     |   1 -
 arch/ia64/include/asm/Kbuild        |   1 -
 arch/m68k/include/asm/Kbuild        |   1 -
 arch/microblaze/include/asm/Kbuild  |   1 -
 arch/mips/include/asm/Kbuild        |   1 -
 arch/nios2/include/asm/Kbuild       |   1 -
 arch/parisc/include/asm/Kbuild      |   1 -
 arch/powerpc/include/asm/Kbuild     |   1 -
 arch/s390/include/asm/Kbuild        |   1 -
 arch/sh/include/asm/Kbuild          |   1 -
 arch/sparc/include/asm/Kbuild       |   1 -
 arch/um/include/asm/Kbuild          |   1 -
 arch/x86/include/asm/Kbuild         |   1 -
 arch/xtensa/include/asm/Kbuild      |   1 -
 include/asm-generic/mcs_spinlock.h  |  13 ---
 kernel/locking/mcs_spinlock.h       | 121 ----------------------------
 kernel/locking/qspinlock.c          |  38 ++++-----
 kernel/locking/qspinlock_paravirt.h |  55 ++++++-------
 22 files changed, 43 insertions(+), 225 deletions(-)
 delete mode 100644 arch/arm/include/asm/mcs_spinlock.h
 delete mode 100644 include/asm-generic/mcs_spinlock.h
 delete mode 100644 kernel/locking/mcs_spinlock.h

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 42911c8340c7..d21cf7b3173a 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -3,4 +3,3 @@
 generated-y += syscall_table.h
 generic-y += export.h
 generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 3c1afa524b9c..5ae4337a9301 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += user.h
diff --git a/arch/arm/include/asm/mcs_spinlock.h b/arch/arm/include/asm/mcs_spinlock.h
deleted file mode 100644
index 529d2cf4d06f..000000000000
--- a/arch/arm/include/asm/mcs_spinlock.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_MCS_LOCK_H
-#define __ASM_MCS_LOCK_H
-
-#ifdef CONFIG_SMP
-#include <asm/spinlock.h>
-
-/* MCS spin-locking. */
-#define arch_mcs_spin_lock_contended(lock)				\
-do {									\
-	/* Ensure prior stores are observed before we enter wfe. */	\
-	smp_mb();							\
-	while (!(smp_load_acquire(lock)))				\
-		wfe();							\
-} while (0)								\
-
-#define arch_mcs_spin_unlock_contended(lock)				\
-do {									\
-	smp_store_release(lock, 1);					\
-	dsb_sev();							\
-} while (0)
-
-#endif	/* CONFIG_SMP */
-#endif	/* __ASM_MCS_LOCK_H */
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 5c8ee5a541d2..57e9ad366d25 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 generic-y += early_ioremap.h
-generic-y += mcs_spinlock.h
 generic-y += qrwlock.h
 generic-y += qspinlock.h
 generic-y += parport.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 3ece3c93fe08..37bbf99f66d4 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -2,4 +2,3 @@
 generic-y += extable.h
 generic-y += iomap.h
 generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index f994c1daf9d4..a0198c12e339 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 generated-y += syscall_table.h
 generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
 generic-y += vtime.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 1b720299deb1..8dbef73ce01d 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -3,5 +3,4 @@ generated-y += syscall_table.h
 generic-y += export.h
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
 generic-y += spinlock.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index a055f5dbe00a..7615a27e0851 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -3,7 +3,6 @@ generated-y += syscall_table.h
 generic-y += cmpxchg.h
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += syscalls.h
 generic-y += tlb.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index dee172716581..65cedca08771 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -9,7 +9,6 @@ generated-y += unistd_nr_o32.h
 
 generic-y += export.h
 generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += qrwlock.h
 generic-y += qspinlock.h
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 7fe7437555fb..5718eee9665c 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -2,6 +2,5 @@
 generic-y += cmpxchg.h
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
 generic-y += spinlock.h
 generic-y += user.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index e6e7f74c8ac9..1f0c28d74c88 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -2,5 +2,4 @@
 generated-y += syscall_table_32.h
 generated-y += syscall_table_64.h
 generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
 generic-y += user.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index bcf95ce0964f..813a8c3405ad 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -4,7 +4,6 @@ generated-y += syscall_table_64.h
 generated-y += syscall_table_spu.h
 generic-y += export.h
 generic-y += kvm_types.h
-generic-y += mcs_spinlock.h
 generic-y += qrwlock.h
 generic-y += vtime.h
 generic-y += early_ioremap.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 1a18d7b82f86..8b036a4ee2ca 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -7,4 +7,3 @@ generated-y += unistd_nr.h
 generic-y += asm-offsets.h
 generic-y += export.h
 generic-y += kvm_types.h
-generic-y += mcs_spinlock.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index fc44d9c88b41..3192f19bcf85 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 generated-y += syscall_table.h
 generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
 generic-y += parport.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 0b9d98ced34a..f0b913f7ba05 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -3,4 +3,3 @@ generated-y += syscall_table_32.h
 generated-y += syscall_table_64.h
 generic-y += export.h
 generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index b2d834a29f3a..04080c0c1aec 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -14,7 +14,6 @@ generic-y += hw_irq.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
-generic-y += mcs_spinlock.h
 generic-y += mmiowb.h
 generic-y += module.lds.h
 generic-y += param.h
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 1e51650b79d7..beb7683f7b8f 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -10,4 +10,3 @@ generated-y += xen-hypercalls.h
 
 generic-y += early_ioremap.h
 generic-y += export.h
-generic-y += mcs_spinlock.h
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index fa07c686cbcc..29ae65cb68c2 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -2,7 +2,6 @@
 generated-y += syscall_table.h
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
 generic-y += param.h
 generic-y += parport.h
 generic-y += qrwlock.h
diff --git a/include/asm-generic/mcs_spinlock.h b/include/asm-generic/mcs_spinlock.h
deleted file mode 100644
index 10cd4ffc6ba2..000000000000
--- a/include/asm-generic/mcs_spinlock.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __ASM_MCS_SPINLOCK_H
-#define __ASM_MCS_SPINLOCK_H
-
-/*
- * Architectures can define their own:
- *
- *   arch_mcs_spin_lock_contended(l)
- *   arch_mcs_spin_unlock_contended(l)
- *
- * See kernel/locking/mcs_spinlock.c.
- */
-
-#endif /* __ASM_MCS_SPINLOCK_H */
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
deleted file mode 100644
index 85251d8771d9..000000000000
--- a/kernel/locking/mcs_spinlock.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * MCS lock defines
- *
- * This file contains the main data structure and API definitions of MCS lock.
- *
- * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
- * with the desirable properties of being fair, and with each cpu trying
- * to acquire the lock spinning on a local variable.
- * It avoids expensive cache bounces that common test-and-set spin-lock
- * implementations incur.
- */
-#ifndef __LINUX_MCS_SPINLOCK_H
-#define __LINUX_MCS_SPINLOCK_H
-
-#include <asm/mcs_spinlock.h>
-
-struct mcs_spinlock {
-	struct mcs_spinlock *next;
-	int locked; /* 1 if lock acquired */
-	int count;  /* nesting count, see qspinlock.c */
-};
-
-#ifndef arch_mcs_spin_lock_contended
-/*
- * Using smp_cond_load_acquire() provides the acquire semantics
- * required so that subsequent operations happen after the
- * lock is acquired. Additionally, some architectures such as
- * ARM64 would like to do spin-waiting instead of purely
- * spinning, and smp_cond_load_acquire() provides that behavior.
- */
-#define arch_mcs_spin_lock_contended(l)					\
-do {									\
-	smp_cond_load_acquire(l, VAL);					\
-} while (0)
-#endif
-
-#ifndef arch_mcs_spin_unlock_contended
-/*
- * smp_store_release() provides a memory barrier to ensure all
- * operations in the critical section has been completed before
- * unlocking.
- */
-#define arch_mcs_spin_unlock_contended(l)				\
-	smp_store_release((l), 1)
-#endif
-
-/*
- * Note: the smp_load_acquire/smp_store_release pair is not
- * sufficient to form a full memory barrier across
- * cpus for many architectures (except x86) for mcs_unlock and mcs_lock.
- * For applications that need a full barrier across multiple cpus
- * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be
- * used after mcs_lock.
- */
-
-/*
- * In order to acquire the lock, the caller should declare a local node and
- * pass a reference of the node to this function in addition to the lock.
- * If the lock has already been acquired, then this will proceed to spin
- * on this node->locked until the previous lock holder sets the node->locked
- * in mcs_spin_unlock().
- */
-static inline
-void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
-{
-	struct mcs_spinlock *prev;
-
-	/* Init node */
-	node->locked = 0;
-	node->next   = NULL;
-
-	/*
-	 * We rely on the full barrier with global transitivity implied by the
-	 * below xchg() to order the initialization stores above against any
-	 * observation of @node. And to provide the ACQUIRE ordering associated
-	 * with a LOCK primitive.
-	 */
-	prev = xchg(lock, node);
-	if (likely(prev == NULL)) {
-		/*
-		 * Lock acquired, don't need to set node->locked to 1. Threads
-		 * only spin on its own node->locked value for lock acquisition.
-		 * However, since this thread can immediately acquire the lock
-		 * and does not proceed to spin on its own node->locked, this
-		 * value won't be used. If a debug mode is needed to
-		 * audit lock status, then set node->locked value here.
-		 */
-		return;
-	}
-	WRITE_ONCE(prev->next, node);
-
-	/* Wait until the lock holder passes the lock down. */
-	arch_mcs_spin_lock_contended(&node->locked);
-}
-
-/*
- * Releases the lock. The caller should pass in the corresponding node that
- * was used to acquire the lock.
- */
-static inline
-void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
-{
-	struct mcs_spinlock *next = READ_ONCE(node->next);
-
-	if (likely(!next)) {
-		/*
-		 * Release the lock by setting it to NULL
-		 */
-		if (likely(cmpxchg_release(lock, node, NULL) == node))
-			return;
-		/* Wait until the next pointer is set */
-		while (!(next = READ_ONCE(node->next)))
-			cpu_relax();
-	}
-
-	/* Pass lock to next waiter. */
-	arch_mcs_spin_unlock_contended(&next->locked);
-}
-
-#endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index a0fc21d99199..32f401e966ab 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -66,11 +66,10 @@
  *
  */
 
-#include "mcs_spinlock.h"
 #define MAX_NODES	4
 
 /*
- * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
+ * On 64-bit architectures, the qnode structure will be 16 bytes in
  * size and four of them will fit nicely in one 64-byte cacheline. For
  * pvqspinlock, however, we need more space for extra data. To accommodate
  * that, we insert two more long words to pad it up to 32 bytes. IOW, only
@@ -80,7 +79,9 @@
  * qspinlocks.
  */
 struct qnode {
-	struct mcs_spinlock mcs;
+	struct qnode *next;
+	int locked; /* 1 if lock acquired */
+	int count;  /* nesting count */
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 	int			cpu;
 	u8			state;
@@ -124,18 +125,18 @@ static inline __pure u32 encode_tail(int cpu, int idx)
 	return tail;
 }
 
-static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
+static inline __pure struct qnode *decode_tail(u32 tail)
 {
 	int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
 	int idx = (tail &  _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
 
-	return per_cpu_ptr(&qnodes[idx].mcs, cpu);
+	return per_cpu_ptr(&qnodes[idx], cpu);
 }
 
 static inline __pure
-struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx)
+struct qnode *grab_qnode(struct qnode *base, int idx)
 {
-	return &((struct qnode *)base + idx)->mcs;
+	return &base[idx];
 }
 
 #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
@@ -271,13 +272,13 @@ static __always_inline void set_locked(struct qspinlock *lock)
  * all the PV callbacks.
  */
 
-static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
-static __always_inline void __pv_wait_node(struct mcs_spinlock *node,
-					   struct mcs_spinlock *prev) { }
+static __always_inline void __pv_init_node(struct qnode *node) { }
+static __always_inline void __pv_wait_node(struct qnode *node,
+					   struct qnode *prev) { }
 static __always_inline void __pv_kick_node(struct qspinlock *lock,
-					   struct mcs_spinlock *node) { }
+					   struct qnode *node) { }
 static __always_inline u32  __pv_wait_head_or_lock(struct qspinlock *lock,
-						   struct mcs_spinlock *node)
+						   struct qnode *node)
 						   { return 0; }
 
 #define pv_enabled()		false
@@ -316,7 +317,7 @@ static __always_inline u32  __pv_wait_head_or_lock(struct qspinlock *lock,
  */
 void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 {
-	struct mcs_spinlock *prev, *next, *node;
+	struct qnode *prev, *next, *node;
 	u32 old, tail;
 	int idx;
 
@@ -399,7 +400,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 queue:
 	lockevent_inc(lock_slowpath);
 pv_queue:
-	node = this_cpu_ptr(&qnodes[0].mcs);
+	node = this_cpu_ptr(&qnodes[0]);
 	idx = node->count++;
 	tail = encode_tail(smp_processor_id(), idx);
 
@@ -421,7 +422,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 		goto release;
 	}
 
-	node = grab_mcs_node(node, idx);
+	node = grab_qnode(node, idx);
 
 	/*
 	 * Keep counts of non-zero index values:
@@ -475,7 +476,8 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 		WRITE_ONCE(prev->next, node);
 
 		pv_wait_node(node, prev);
-		arch_mcs_spin_lock_contended(&node->locked);
+		/* Wait for mcs node lock to be released */
+		smp_cond_load_acquire(&node->locked, VAL);
 
 		/*
 		 * While waiting for the MCS lock, the next pointer may have
@@ -554,7 +556,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	if (!next)
 		next = smp_cond_load_relaxed(&node->next, (VAL));
 
-	arch_mcs_spin_unlock_contended(&next->locked);
+	smp_store_release(&next->locked, 1); /* unlock the mcs node lock */
 	pv_kick_node(lock, next);
 
 release:
@@ -563,7 +565,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	/*
 	 * release the node
 	 */
-	__this_cpu_dec(qnodes[0].mcs.count);
+	__this_cpu_dec(qnodes[0].count);
 }
 EXPORT_SYMBOL(queued_spin_lock_slowpath);
 
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index b6a175155f36..cce3d3dde216 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -267,16 +267,12 @@ pv_wait_early(struct qnode *prev, int loop)
 }
 
 /*
- * Initialize the PV part of the mcs_spinlock node.
+ * Initialize the PV part of the qnode.
  */
-static void pv_init_node(struct mcs_spinlock *node)
+static void pv_init_node(struct qnode *node)
 {
-	struct qnode *pn = (struct qnode *)node;
-
-	BUILD_BUG_ON(sizeof(struct qnode) > sizeof(struct qnode));
-
-	pn->cpu = smp_processor_id();
-	pn->state = vcpu_running;
+	node->cpu = smp_processor_id();
+	node->state = vcpu_running;
 }
 
 /*
@@ -284,10 +280,8 @@ static void pv_init_node(struct mcs_spinlock *node)
  * pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its
  * behalf.
  */
-static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
+static void pv_wait_node(struct qnode *node, struct qnode *prev)
 {
-	struct qnode *pn = (struct qnode *)node;
-	struct qnode *pp = (struct qnode *)prev;
 	int loop;
 	bool wait_early;
 
@@ -295,7 +289,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
 		for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
 			if (READ_ONCE(node->locked))
 				return;
-			if (pv_wait_early(pp, loop)) {
+			if (pv_wait_early(prev, loop)) {
 				wait_early = true;
 				break;
 			}
@@ -303,20 +297,20 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
 		}
 
 		/*
-		 * Order pn->state vs pn->locked thusly:
+		 * Order node->state vs node->locked thusly:
 		 *
-		 * [S] pn->state = vcpu_halted	  [S] next->locked = 1
-		 *     MB			      MB
-		 * [L] pn->locked		[RmW] pn->state = vcpu_hashed
+		 * [S] node->state = vcpu_halted  [S] next->locked = 1
+		 *     MB                             MB
+		 * [L] node->locked             [RmW] node->state = vcpu_hashed
 		 *
 		 * Matches the cmpxchg() from pv_kick_node().
 		 */
-		smp_store_mb(pn->state, vcpu_halted);
+		smp_store_mb(node->state, vcpu_halted);
 
 		if (!READ_ONCE(node->locked)) {
 			lockevent_inc(pv_wait_node);
 			lockevent_cond_inc(pv_wait_early, wait_early);
-			pv_wait(&pn->state, vcpu_halted);
+			pv_wait(&node->state, vcpu_halted);
 		}
 
 		/*
@@ -324,7 +318,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
 		 * value so that pv_wait_head_or_lock() knows to not also try
 		 * to hash this lock.
 		 */
-		cmpxchg(&pn->state, vcpu_halted, vcpu_running);
+		cmpxchg(&node->state, vcpu_halted, vcpu_running);
 
 		/*
 		 * If the locked flag is still not set after wakeup, it is a
@@ -351,10 +345,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
  * such that they're waiting in pv_wait_head_or_lock(), this avoids a
  * wake/sleep cycle.
  */
-static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
+static void pv_kick_node(struct qspinlock *lock, struct qnode *node)
 {
-	struct qnode *pn = (struct qnode *)node;
-
 	/*
 	 * If the vCPU is indeed halted, advance its state to match that of
 	 * pv_wait_node(). If OTOH this fails, the vCPU was running and will
@@ -363,15 +355,15 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
 	 * Matches with smp_store_mb() and cmpxchg() in pv_wait_node()
 	 *
 	 * The write to next->locked in arch_mcs_spin_unlock_contended()
-	 * must be ordered before the read of pn->state in the cmpxchg()
+	 * must be ordered before the read of node->state in the cmpxchg()
 	 * below for the code to work correctly. To guarantee full ordering
 	 * irrespective of the success or failure of the cmpxchg(),
 	 * a relaxed version with explicit barrier is used. The control
-	 * dependency will order the reading of pn->state before any
+	 * dependency will order the reading of node->state before any
 	 * subsequent writes.
 	 */
 	smp_mb__before_atomic();
-	if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
+	if (cmpxchg_relaxed(&node->state, vcpu_halted, vcpu_hashed)
 	    != vcpu_halted)
 		return;
 
@@ -383,7 +375,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
 	 * needed.
 	 */
 	WRITE_ONCE(lock->locked, _Q_SLOW_VAL);
-	(void)pv_hash(lock, pn);
+	(void)pv_hash(lock, node);
 }
 
 /*
@@ -394,9 +386,8 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
  * The current value of the lock will be returned for additional processing.
  */
 static u32
-pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
+pv_wait_head_or_lock(struct qspinlock *lock, struct qnode *node)
 {
-	struct qnode *pn = (struct qnode *)node;
 	struct qspinlock **lp = NULL;
 	int waitcnt = 0;
 	int loop;
@@ -405,7 +396,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
 	 * If pv_kick_node() already advanced our state, we don't need to
 	 * insert ourselves into the hash table anymore.
 	 */
-	if (READ_ONCE(pn->state) == vcpu_hashed)
+	if (READ_ONCE(node->state) == vcpu_hashed)
 		lp = (struct qspinlock **)1;
 
 	/*
@@ -418,7 +409,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
 		 * Set correct vCPU state to be used by queue node wait-early
 		 * mechanism.
 		 */
-		WRITE_ONCE(pn->state, vcpu_running);
+		WRITE_ONCE(node->state, vcpu_running);
 
 		/*
 		 * Set the pending bit in the active lock spinning loop to
@@ -434,7 +425,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
 
 
 		if (!lp) { /* ONCE */
-			lp = pv_hash(lock, pn);
+			lp = pv_hash(lock, node);
 
 			/*
 			 * We must hash before setting _Q_SLOW_VAL, such that
@@ -458,7 +449,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
 				goto gotlock;
 			}
 		}
-		WRITE_ONCE(pn->state, vcpu_hashed);
+		WRITE_ONCE(node->state, vcpu_hashed);
 		lockevent_inc(pv_wait_head);
 		lockevent_cond_inc(pv_wait_again, waitcnt);
 		pv_wait(&lock->locked, _Q_SLOW_VAL);
-- 
2.35.1

next prev parent reply	other threads:[~2022-07-04 14:38 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-04 14:38 [PATCH 00/13] locking/qspinlock: simplify code generation Nicholas Piggin
2022-07-04 14:38 ` [PATCH 01/13] locking/qspinlock: remove pv_node abstraction Nicholas Piggin
2022-07-06 23:23   ` Boqun Feng
2022-07-04 14:38 ` Nicholas Piggin [this message]
2022-07-05 16:57   ` [PATCH 02/13] locking/qspinlock: inline mcs_spinlock functions into qspinlock Peter Zijlstra
2022-07-12  0:06     ` Nicholas Piggin
2022-07-04 14:38 ` [PATCH 03/13] locking/qspinlock: split common mcs queueing code into its own function Nicholas Piggin
2022-07-05 17:01   ` Peter Zijlstra
2022-07-12  0:10     ` Nicholas Piggin
2022-07-04 14:38 ` [PATCH 04/13] locking/qspinlock: move pv lock word helpers into qspinlock.c Nicholas Piggin
2022-07-05 19:34   ` Waiman Long
2022-07-12  0:11     ` Nicholas Piggin
2022-07-04 14:38 ` [PATCH 05/13] locking/qspinlock: be less clever with the preprocessor Nicholas Piggin
2022-07-05 17:08   ` Peter Zijlstra
2022-07-12  0:29     ` Nicholas Piggin
2022-07-05 20:02   ` Waiman Long
2022-07-12  0:33     ` Nicholas Piggin
2022-07-04 14:38 ` [PATCH 06/13] locking/qspinlock: merge qspinlock_paravirt.h into qspinlock.c Nicholas Piggin
2022-07-05 17:20   ` Peter Zijlstra
2022-07-05 17:36     ` Peter Zijlstra
2022-07-12  0:46       ` Nicholas Piggin
2022-07-06 13:35     ` Waiman Long
2022-07-06 14:16       ` Peter Zijlstra
2022-07-04 14:38 ` [PATCH 07/13] locking/qspinlock: remove arch qspinlock_paravirt.h includes Nicholas Piggin
2022-07-04 14:38 ` [PATCH 08/13] locking/qspinlock: stop renaming queued_spin_lock_slowpath to native_queued_spin_lock_slowpath Nicholas Piggin
2022-07-05 17:28   ` Peter Zijlstra
2022-07-04 14:38 ` [PATCH 09/13] locking/qspinlock: rename __pv_init_lock_hash to pv_spinlocks_init Nicholas Piggin
2022-07-04 14:38 ` [PATCH 10/13] locking/qspinlock: paravirt use simple trylock in case idx overflows Nicholas Piggin
2022-07-04 14:38 ` [PATCH 11/13] locking/qspinlock: Use queued_spin_trylock in pv_hybrid_queued_unfair_trylock Nicholas Piggin
2022-07-05 17:31   ` Peter Zijlstra
2022-07-05 20:15   ` Waiman Long
2022-07-12  0:48     ` Nicholas Piggin
2022-07-04 14:38 ` [PATCH 12/13] locking/qspinlock: separate pv_wait_node from the non-paravirt path Nicholas Piggin
2022-07-05 17:34   ` Peter Zijlstra
2022-07-12  0:50     ` Nicholas Piggin
2022-07-04 14:38 ` [PATCH 13/13] locking/qspinlock: simplify pv_wait_head_or_lock calling scheme Nicholas Piggin
2022-07-05 17:59 ` [PATCH 00/13] locking/qspinlock: simplify code generation Peter Zijlstra
2022-07-12  0:56   ` Nicholas Piggin

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:42911c8340c dfblob:d21cf7b3173 dfblob:3c1afa524b9
dfblob:5ae4337a930 dfblob:529d2cf4d06 dfblob:5c8ee5a541d
dfblob:57e9ad366d2 dfblob:3ece3c93fe0 dfblob:37bbf99f66d
dfblob:f994c1daf9d dfblob:a0198c12e33 dfblob:1b720299deb
dfblob:8dbef73ce01 dfblob:a055f5dbe00 dfblob:7615a27e085
dfblob:dee17271658 dfblob:65cedca0877 dfblob:7fe7437555f
dfblob:5718eee9665 dfblob:e6e7f74c8ac dfblob:1f0c28d74c8
dfblob:bcf95ce0964 dfblob:813a8c3405a dfblob:1a18d7b82f8
dfblob:8b036a4ee2c dfblob:fc44d9c88b4 dfblob:3192f19bcf8
dfblob:0b9d98ced34 dfblob:f0b913f7ba0 dfblob:b2d834a29f3
dfblob:04080c0c1ae dfblob:1e51650b79d dfblob:beb7683f7b8
dfblob:fa07c686cbc dfblob:29ae65cb68c dfblob:10cd4ffc6ba
dfblob:85251d8771d dfblob:a0fc21d9919 dfblob:32f401e966a
dfblob:b6a175155f3 dfblob:cce3d3dde21 )
 OR (
bs:"[PATCH 02/13] locking/qspinlock: inline mcs_spinlock functions into qspinlock" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220704143820.3071004-3-npiggin@gmail.com \
    --to=npiggin@gmail.com \
    --cc=boqun.feng@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.