From: Nicholas Piggin <npiggin@gmail.com>
To: linuxppc-dev@lists.ozlabs.org
Cc: Jordan Niethe <jniethe5@gmail.com>,
Laurent Dufour <ldufour@linux.ibm.com>,
Nicholas Piggin <npiggin@gmail.com>
Subject: [PATCH v3 10/17] powerpc/qspinlock: allow propagation of yield CPU down the queue
Date: Sat, 26 Nov 2022 19:59:25 +1000 [thread overview]
Message-ID: <20221126095932.1234527-11-npiggin@gmail.com> (raw)
In-Reply-To: <20221126095932.1234527-1-npiggin@gmail.com>
Having all CPUs poll the lock word for the owner CPU that should be
yielded to defeats most of the purpose of using MCS queueing for
scalability. Yet it may be desirable for queued waiters to to yield
to a preempted owner.
With this change, queue waiters never sample the owner CPU directly
from the lock word. The queue head (which is spinning on the lock)
propagates the owner CPU back to the next waiter if it finds the owner
has been preempted. That waiter then propagates the owner CPU back to
the next waiter, and so on.
s390 addreses this problem differenty, by having queued waiters sample
the lock word to find the owner at a low frequency. That has the
advantage of being simpler, the advantage of propagation is that the
lock word never has to be accesed by queued waiters, and the transfer
of cache lines to transmit the owner data is only required when lock
holder vCPU preemption occurs.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/lib/qspinlock.c | 79 ++++++++++++++++++++++++++++++++++++
1 file changed, 79 insertions(+)
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 1aafafc701da..b044760a05e9 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -12,6 +12,7 @@
struct qnode {
struct qnode *next;
struct qspinlock *lock;
+ int yield_cpu;
u8 locked; /* 1 if lock acquired */
};
@@ -28,6 +29,7 @@ static int head_spins __read_mostly = (1<<8);
static bool pv_yield_owner __read_mostly = true;
static bool pv_yield_allow_steal __read_mostly = false;
static bool pv_yield_prev __read_mostly = true;
+static bool pv_yield_propagate_owner __read_mostly = true;
static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
@@ -232,14 +234,67 @@ static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u
__yield_to_locked_owner(lock, val, paravirt, mustq);
}
+static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
+{
+ struct qnode *next;
+ int owner;
+
+ if (!paravirt)
+ return;
+ if (!pv_yield_propagate_owner)
+ return;
+
+ owner = get_owner_cpu(val);
+ if (*set_yield_cpu == owner)
+ return;
+
+ next = READ_ONCE(node->next);
+ if (!next)
+ return;
+
+ if (vcpu_is_preempted(owner)) {
+ next->yield_cpu = owner;
+ *set_yield_cpu = owner;
+ } else if (*set_yield_cpu != -1) {
+ next->yield_cpu = owner;
+ *set_yield_cpu = owner;
+ }
+}
+
static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
{
int prev_cpu = decode_tail_cpu(val);
u32 yield_count;
+ int yield_cpu;
if (!paravirt)
goto relax;
+ if (!pv_yield_propagate_owner)
+ goto yield_prev;
+
+ yield_cpu = READ_ONCE(node->yield_cpu);
+ if (yield_cpu == -1) {
+ /* Propagate back the -1 CPU */
+ if (node->next && node->next->yield_cpu != -1)
+ node->next->yield_cpu = yield_cpu;
+ goto yield_prev;
+ }
+
+ yield_count = yield_count_of(yield_cpu);
+ if ((yield_count & 1) == 0)
+ goto yield_prev; /* owner vcpu is running */
+
+ smp_rmb();
+
+ if (yield_cpu == node->yield_cpu) {
+ if (node->next && node->next->yield_cpu != yield_cpu)
+ node->next->yield_cpu = yield_cpu;
+ yield_to_preempted(yield_cpu, yield_count);
+ return;
+ }
+
+yield_prev:
if (!pv_yield_prev)
goto relax;
@@ -293,6 +348,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
u32 val, old, tail;
bool mustq = false;
int idx;
+ int set_yield_cpu = -1;
int iters = 0;
BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
@@ -314,6 +370,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
node = &qnodesp->nodes[idx];
node->next = NULL;
node->lock = lock;
+ node->yield_cpu = -1;
node->locked = 0;
tail = encode_tail_cpu(smp_processor_id());
@@ -334,6 +391,10 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
while (!node->locked)
yield_to_prev(lock, node, old, paravirt);
+ /* Clear out stale propagated yield_cpu */
+ if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1)
+ node->yield_cpu = -1;
+
smp_rmb(); /* acquire barrier for the mcs lock */
}
@@ -344,6 +405,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
if (!(val & _Q_LOCKED_VAL))
break;
+ propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
yield_head_to_locked_owner(lock, val, paravirt);
if (!maybe_stealers)
continue;
@@ -512,6 +574,22 @@ static int pv_yield_prev_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
+static int pv_yield_propagate_owner_set(void *data, u64 val)
+{
+ pv_yield_propagate_owner = !!val;
+
+ return 0;
+}
+
+static int pv_yield_propagate_owner_get(void *data, u64 *val)
+{
+ *val = pv_yield_propagate_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n");
+
static __init int spinlock_debugfs_init(void)
{
debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
@@ -520,6 +598,7 @@ static __init int spinlock_debugfs_init(void)
debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
+ debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
}
return 0;
--
2.37.2
next prev parent reply other threads:[~2022-11-26 10:09 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-26 9:59 [PATCH v3 00/17] powerpc: alternate queued spinlock implementation Nicholas Piggin
2022-11-26 9:59 ` [PATCH v3 01/17] powerpc/qspinlock: add mcs queueing for contended waiters Nicholas Piggin
2022-11-26 9:59 ` [PATCH v3 02/17] powerpc/qspinlock: use a half-word store to unlock to avoid larx/stcx Nicholas Piggin
2022-11-26 9:59 ` [PATCH v3 03/17] powerpc/qspinlock: convert atomic operations to assembly Nicholas Piggin
2022-11-26 9:59 ` [PATCH v3 04/17] powerpc/qspinlock: allow new waiters to steal the lock before queueing Nicholas Piggin
2022-11-26 9:59 ` [PATCH v3 05/17] powerpc/qspinlock: theft prevention to control latency Nicholas Piggin
2022-11-26 9:59 ` [PATCH v3 06/17] powerpc/qspinlock: store owner CPU in lock word Nicholas Piggin
2022-11-26 9:59 ` [PATCH v3 07/17] powerpc/qspinlock: paravirt yield to lock owner Nicholas Piggin
2022-11-26 9:59 ` [PATCH v3 08/17] powerpc/qspinlock: implement option to yield to previous node Nicholas Piggin
2022-11-26 9:59 ` [PATCH v3 09/17] powerpc/qspinlock: allow stealing when head of queue yields Nicholas Piggin
2022-11-26 9:59 ` Nicholas Piggin [this message]
2022-11-26 9:59 ` [PATCH v3 11/17] powerpc/qspinlock: add ability to prod new queue head CPU Nicholas Piggin
2022-11-26 9:59 ` [PATCH v3 12/17] powerpc/qspinlock: allow lock stealing in trylock and lock fastpath Nicholas Piggin
2022-11-26 9:59 ` [PATCH v3 13/17] powerpc/qspinlock: use spin_begin/end API Nicholas Piggin
2022-11-26 9:59 ` [PATCH v3 14/17] powerpc/qspinlock: reduce remote node steal spins Nicholas Piggin
2022-11-26 9:59 ` [PATCH v3 15/17] powerpc/qspinlock: allow indefinite spinning on a preempted owner Nicholas Piggin
2022-11-26 9:59 ` [PATCH v3 16/17] powerpc/qspinlock: provide accounting and options for sleepy locks Nicholas Piggin
2022-11-26 9:59 ` [PATCH v3 17/17] powerpc/qspinlock: add compile-time tuning adjustments Nicholas Piggin
2022-11-28 3:11 ` [PATCH v3 real 01/17] powerpc/qspinlock: powerpc qspinlock implementation Nicholas Piggin
2022-12-08 12:39 ` (subset) [PATCH v3 00/17] powerpc: alternate queued spinlock implementation Michael Ellerman
2023-04-13 10:58 ` Shrikanth Hegde
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221126095932.1234527-11-npiggin@gmail.com \
--to=npiggin@gmail.com \
--cc=jniethe5@gmail.com \
--cc=ldufour@linux.ibm.com \
--cc=linuxppc-dev@lists.ozlabs.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).