* [PATCH v7 2/3] locking/percpu-rwsem: Extract __percpu_up_read()
2026-06-04 7:15 [PATCH v7 0/3] locking: contended_release tracepoint instrumentation Dmitry Ilvokhin
2026-06-04 7:15 ` [PATCH v7 1/3] tracing/lock: Remove unnecessary linux/sched.h include Dmitry Ilvokhin
@ 2026-06-04 7:15 ` Dmitry Ilvokhin
2026-06-04 7:15 ` [PATCH v7 3/3] locking: Add contended_release tracepoint to sleepable locks Dmitry Ilvokhin
2 siblings, 0 replies; 5+ messages in thread
From: Dmitry Ilvokhin @ 2026-06-04 7:15 UTC (permalink / raw)
To: Peter Zijlstra, Dennis Zhou, Tejun Heo, Christoph Lameter,
Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Ingo Molnar,
Will Deacon, Boqun Feng, Waiman Long
Cc: linux-mm, linux-kernel, linux-trace-kernel, kernel-team,
Dmitry Ilvokhin, Usama Arif
Move the percpu_up_read() slowpath out of the inline function into a new
__percpu_up_read() to avoid binary size increase from adding a
tracepoint to an inlined function.
Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
Acked-by: Usama Arif <usama.arif@linux.dev>
---
include/linux/percpu-rwsem.h | 15 +++------------
kernel/locking/percpu-rwsem.c | 18 ++++++++++++++++++
2 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index c8cb010d655e..39d5bf8e6562 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -107,6 +107,8 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
return ret;
}
+extern void __percpu_up_read(struct percpu_rw_semaphore *sem);
+
static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, _RET_IP_);
@@ -118,18 +120,7 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
if (likely(rcu_sync_is_idle(&sem->rss))) {
this_cpu_dec(*sem->read_count);
} else {
- /*
- * slowpath; reader will only ever wake a single blocked
- * writer.
- */
- smp_mb(); /* B matches C */
- /*
- * In other words, if they see our decrement (presumably to
- * aggregate zero, as that is the only time it matters) they
- * will also see our critical section.
- */
- this_cpu_dec(*sem->read_count);
- rcuwait_wake_up(&sem->writer);
+ __percpu_up_read(sem);
}
preempt_enable();
}
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index ef234469baac..f3ee7a0d6047 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -288,3 +288,21 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
rcu_sync_exit(&sem->rss);
}
EXPORT_SYMBOL_GPL(percpu_up_write);
+
+void __percpu_up_read(struct percpu_rw_semaphore *sem)
+{
+ lockdep_assert_preemption_disabled();
+ /*
+ * slowpath; reader will only ever wake a single blocked
+ * writer.
+ */
+ smp_mb(); /* B matches C */
+ /*
+ * In other words, if they see our decrement (presumably to
+ * aggregate zero, as that is the only time it matters) they
+ * will also see our critical section.
+ */
+ this_cpu_dec(*sem->read_count);
+ rcuwait_wake_up(&sem->writer);
+}
+EXPORT_SYMBOL_GPL(__percpu_up_read);
--
2.53.0-Meta
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH v7 3/3] locking: Add contended_release tracepoint to sleepable locks
2026-06-04 7:15 [PATCH v7 0/3] locking: contended_release tracepoint instrumentation Dmitry Ilvokhin
2026-06-04 7:15 ` [PATCH v7 1/3] tracing/lock: Remove unnecessary linux/sched.h include Dmitry Ilvokhin
2026-06-04 7:15 ` [PATCH v7 2/3] locking/percpu-rwsem: Extract __percpu_up_read() Dmitry Ilvokhin
@ 2026-06-04 7:15 ` Dmitry Ilvokhin
2026-06-04 14:45 ` Usama Arif
2 siblings, 1 reply; 5+ messages in thread
From: Dmitry Ilvokhin @ 2026-06-04 7:15 UTC (permalink / raw)
To: Peter Zijlstra, Dennis Zhou, Tejun Heo, Christoph Lameter,
Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Ingo Molnar,
Will Deacon, Boqun Feng, Waiman Long
Cc: linux-mm, linux-kernel, linux-trace-kernel, kernel-team,
Dmitry Ilvokhin, Paul E. McKenney
Add the contended_release trace event. This tracepoint fires on the
holder side when a contended lock is released, complementing the
existing contention_begin/contention_end tracepoints which fire on the
waiter side.
This enables correlating lock hold time under contention with waiter
events by lock address.
Add trace_contended_release()/trace_call__contended_release() calls to
the slowpath unlock paths of sleepable locks: mutex, rtmutex, semaphore,
rwsem, percpu-rwsem, and RT-specific rwbase locks.
Where possible, trace_contended_release() fires before the lock is
released and before the waiter is woken. For some lock types, the
tracepoint fires after the release but before the wake. Making the
placement consistent across all lock types is not worth the added
complexity.
For reader/writer locks, the tracepoint fires for every reader releasing
while a writer is waiting, not only for the last reader.
Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
---
include/trace/events/lock.h | 17 +++++++++++++++++
kernel/locking/mutex.c | 4 ++++
kernel/locking/percpu-rwsem.c | 11 +++++++++++
kernel/locking/rtmutex.c | 1 +
kernel/locking/rwbase_rt.c | 6 ++++++
kernel/locking/rwsem.c | 10 ++++++++--
kernel/locking/semaphore.c | 4 ++++
7 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h
index da978f2afb45..1ded869cd619 100644
--- a/include/trace/events/lock.h
+++ b/include/trace/events/lock.h
@@ -137,6 +137,23 @@ TRACE_EVENT(contention_end,
TP_printk("%p (ret=%d)", __entry->lock_addr, __entry->ret)
);
+TRACE_EVENT(contended_release,
+
+ TP_PROTO(void *lock),
+
+ TP_ARGS(lock),
+
+ TP_STRUCT__entry(
+ __field(void *, lock_addr)
+ ),
+
+ TP_fast_assign(
+ __entry->lock_addr = lock;
+ ),
+
+ TP_printk("%p", __entry->lock_addr)
+);
+
#endif /* _TRACE_LOCK_H */
/* This part must be outside protection */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 09534628dc01..43b7f7e281a0 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -1023,6 +1023,9 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
wake_q_add(&wake_q, next);
}
+ if (trace_contended_release_enabled() && waiter)
+ trace_call__contended_release(lock);
+
if (owner & MUTEX_FLAG_HANDOFF)
__mutex_handoff(lock, next);
@@ -1220,6 +1223,7 @@ EXPORT_SYMBOL(ww_mutex_lock_interruptible);
EXPORT_TRACEPOINT_SYMBOL_GPL(contention_begin);
EXPORT_TRACEPOINT_SYMBOL_GPL(contention_end);
+EXPORT_TRACEPOINT_SYMBOL_GPL(contended_release);
/**
* atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index f3ee7a0d6047..f7e152c40d6d 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -263,6 +263,9 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, _RET_IP_);
+ if (trace_contended_release_enabled() && wq_has_sleeper(&sem->waiters))
+ trace_call__contended_release(sem);
+
/*
* Signal the writer is done, no fast path yet.
*
@@ -292,6 +295,14 @@ EXPORT_SYMBOL_GPL(percpu_up_write);
void __percpu_up_read(struct percpu_rw_semaphore *sem)
{
lockdep_assert_preemption_disabled();
+ /*
+ * After percpu_up_write() completes, rcu_sync_is_idle() can still
+ * return false during the grace period, forcing readers into this
+ * slowpath. Only trace when a writer is actually waiting for
+ * readers to drain.
+ */
+ if (trace_contended_release_enabled() && rcuwait_active(&sem->writer))
+ trace_call__contended_release(sem);
/*
* slowpath; reader will only ever wake a single blocked
* writer.
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 9147d6a31b78..28beae7d21fe 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1470,6 +1470,7 @@ static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
raw_spin_lock_irqsave(&lock->wait_lock, flags);
}
+ trace_contended_release(lock);
/*
* The wakeup next waiter path does not suffer from the above
* race. See the comments there.
diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
index 82e078c0665a..2835c9ef9b3f 100644
--- a/kernel/locking/rwbase_rt.c
+++ b/kernel/locking/rwbase_rt.c
@@ -174,6 +174,8 @@ static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb,
static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb,
unsigned int state)
{
+ if (trace_contended_release_enabled() && rt_mutex_owner(&rwb->rtmutex))
+ trace_call__contended_release(rwb);
/*
* rwb->readers can only hit 0 when a writer is waiting for the
* active readers to leave the critical section.
@@ -205,6 +207,8 @@ static inline void rwbase_write_unlock(struct rwbase_rt *rwb)
unsigned long flags;
raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm))
+ trace_call__contended_release(rwb);
__rwbase_write_unlock(rwb, WRITER_BIAS, flags);
}
@@ -214,6 +218,8 @@ static inline void rwbase_write_downgrade(struct rwbase_rt *rwb)
unsigned long flags;
raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm))
+ trace_call__contended_release(rwb);
/* Release it and account current as reader */
__rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags);
}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index bf647097369c..b9c180ac1eee 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1387,6 +1387,8 @@ static inline void __up_read(struct rw_semaphore *sem)
rwsem_clear_reader_owned(sem);
tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
+ if (trace_contended_release_enabled() && (tmp & RWSEM_FLAG_WAITERS))
+ trace_call__contended_release(sem);
if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
RWSEM_FLAG_WAITERS)) {
clear_nonspinnable(sem);
@@ -1413,8 +1415,10 @@ static inline void __up_write(struct rw_semaphore *sem)
preempt_disable();
rwsem_clear_owner(sem);
tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
- if (unlikely(tmp & RWSEM_FLAG_WAITERS))
+ if (unlikely(tmp & RWSEM_FLAG_WAITERS)) {
+ trace_contended_release(sem);
rwsem_wake(sem);
+ }
preempt_enable();
}
@@ -1437,8 +1441,10 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
tmp = atomic_long_fetch_add_release(
-RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
rwsem_set_reader_owned(sem);
- if (tmp & RWSEM_FLAG_WAITERS)
+ if (tmp & RWSEM_FLAG_WAITERS) {
+ trace_contended_release(sem);
rwsem_downgrade_wake(sem);
+ }
preempt_enable();
}
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index 74d41433ba13..233730c25933 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -230,6 +230,10 @@ void __sched up(struct semaphore *sem)
sem->count++;
else
__up(sem, &wake_q);
+
+ if (trace_contended_release_enabled() && !wake_q_empty(&wake_q))
+ trace_call__contended_release(sem);
+
raw_spin_unlock_irqrestore(&sem->lock, flags);
if (!wake_q_empty(&wake_q))
wake_up_q(&wake_q);
--
2.53.0-Meta
^ permalink raw reply related [flat|nested] 5+ messages in thread