* [RFC PATCH v3 4/4] locking: Add contended_release tracepoint to spinning locks
From: Dmitry Ilvokhin @ 2026-03-18 18:45 UTC (permalink / raw)
To: Arnd Bergmann, Dennis Zhou, Tejun Heo, Christoph Lameter,
Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Peter Zijlstra, Ingo Molnar, Will Deacon, Boqun Feng, Waiman Long
Cc: linux-arch, linux-kernel, linux-mm, linux-trace-kernel,
kernel-team, Dmitry Ilvokhin
In-Reply-To: <cover.1773858853.git.d@ilvokhin.com>
Extend the contended_release tracepoint to queued spinlocks and queued
rwlocks.
When the tracepoint is disabled, the only addition to the hot path is a
single NOP instruction (the static branch). When enabled, the contention
check, trace call, and unlock are combined in an out-of-line function to
minimize hot path impact, avoiding the compiler needing to preserve the
lock pointer in a callee-saved register across the trace call.
Binary size impact (x86_64, defconfig):
uninlined unlock (common case): +983 bytes (+0.00%)
inlined unlock (worst case): +71554 bytes (+0.30%)
The inlined unlock case could not be achieved through Kconfig options on
x86_64 as PREEMPT_BUILD unconditionally selects UNINLINE_SPIN_UNLOCK on
x86_64. The UNINLINE_SPIN_UNLOCK guards were manually inverted to force
inline the unlock path and estimate the worst case binary size increase.
Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
include/asm-generic/qrwlock.h | 48 +++++++++++++++++++++++++++------
include/asm-generic/qspinlock.h | 25 +++++++++++++++--
kernel/locking/qrwlock.c | 16 +++++++++++
kernel/locking/qspinlock.c | 8 ++++++
4 files changed, 87 insertions(+), 10 deletions(-)
diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 75b8f4601b28..e24dc537fd66 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -14,6 +14,7 @@
#define __ASM_GENERIC_QRWLOCK_H
#include <linux/atomic.h>
+#include <linux/tracepoint-defs.h>
#include <asm/barrier.h>
#include <asm/processor.h>
@@ -35,6 +36,10 @@
*/
extern void queued_read_lock_slowpath(struct qrwlock *lock);
extern void queued_write_lock_slowpath(struct qrwlock *lock);
+extern void queued_read_unlock_traced(struct qrwlock *lock);
+extern void queued_write_unlock_traced(struct qrwlock *lock);
+
+DECLARE_TRACEPOINT(contended_release);
/**
* queued_read_trylock - try to acquire read lock of a queued rwlock
@@ -102,10 +107,16 @@ static inline void queued_write_lock(struct qrwlock *lock)
}
/**
- * queued_read_unlock - release read lock of a queued rwlock
+ * queued_rwlock_is_contended - check if the lock is contended
* @lock : Pointer to queued rwlock structure
+ * Return: 1 if lock contended, 0 otherwise
*/
-static inline void queued_read_unlock(struct qrwlock *lock)
+static inline int queued_rwlock_is_contended(struct qrwlock *lock)
+{
+ return arch_spin_is_locked(&lock->wait_lock);
+}
+
+static __always_inline void __queued_read_unlock(struct qrwlock *lock)
{
/*
* Atomically decrement the reader count
@@ -114,22 +125,43 @@ static inline void queued_read_unlock(struct qrwlock *lock)
}
/**
- * queued_write_unlock - release write lock of a queued rwlock
+ * queued_read_unlock - release read lock of a queued rwlock
* @lock : Pointer to queued rwlock structure
*/
-static inline void queued_write_unlock(struct qrwlock *lock)
+static inline void queued_read_unlock(struct qrwlock *lock)
+{
+ /*
+ * Trace and unlock are combined in the traced unlock variant so
+ * the compiler does not need to preserve the lock pointer across
+ * the function call, avoiding callee-saved register save/restore
+ * on the hot path.
+ */
+ if (tracepoint_enabled(contended_release)) {
+ queued_read_unlock_traced(lock);
+ return;
+ }
+
+ __queued_read_unlock(lock);
+}
+
+static __always_inline void __queued_write_unlock(struct qrwlock *lock)
{
smp_store_release(&lock->wlocked, 0);
}
/**
- * queued_rwlock_is_contended - check if the lock is contended
+ * queued_write_unlock - release write lock of a queued rwlock
* @lock : Pointer to queued rwlock structure
- * Return: 1 if lock contended, 0 otherwise
*/
-static inline int queued_rwlock_is_contended(struct qrwlock *lock)
+static inline void queued_write_unlock(struct qrwlock *lock)
{
- return arch_spin_is_locked(&lock->wait_lock);
+ /* See comment in queued_read_unlock(). */
+ if (tracepoint_enabled(contended_release)) {
+ queued_write_unlock_traced(lock);
+ return;
+ }
+
+ __queued_write_unlock(lock);
}
/*
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index bf47cca2c375..8ba463a3b891 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -41,6 +41,7 @@
#include <asm-generic/qspinlock_types.h>
#include <linux/atomic.h>
+#include <linux/tracepoint-defs.h>
#ifndef queued_spin_is_locked
/**
@@ -116,6 +117,19 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
#endif
#ifndef queued_spin_unlock
+
+DECLARE_TRACEPOINT(contended_release);
+
+extern void queued_spin_unlock_traced(struct qspinlock *lock);
+
+static __always_inline void __queued_spin_unlock(struct qspinlock *lock)
+{
+ /*
+ * unlock() needs release semantics:
+ */
+ smp_store_release(&lock->locked, 0);
+}
+
/**
* queued_spin_unlock - release a queued spinlock
* @lock : Pointer to queued spinlock structure
@@ -123,9 +137,16 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
static __always_inline void queued_spin_unlock(struct qspinlock *lock)
{
/*
- * unlock() needs release semantics:
+ * Trace and unlock are combined in queued_spin_unlock_traced()
+ * so the compiler does not need to preserve the lock pointer
+ * across the function call, avoiding callee-saved register
+ * save/restore on the hot path.
*/
- smp_store_release(&lock->locked, 0);
+ if (tracepoint_enabled(contended_release)) {
+ queued_spin_unlock_traced(lock);
+ return;
+ }
+ __queued_spin_unlock(lock);
}
#endif
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index d2ef312a8611..5f7a0fc2b27a 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -90,3 +90,19 @@ void __lockfunc queued_write_lock_slowpath(struct qrwlock *lock)
trace_contention_end(lock, 0);
}
EXPORT_SYMBOL(queued_write_lock_slowpath);
+
+void __lockfunc queued_read_unlock_traced(struct qrwlock *lock)
+{
+ if (queued_rwlock_is_contended(lock))
+ trace_contended_release(lock);
+ __queued_read_unlock(lock);
+}
+EXPORT_SYMBOL(queued_read_unlock_traced);
+
+void __lockfunc queued_write_unlock_traced(struct qrwlock *lock)
+{
+ if (queued_rwlock_is_contended(lock))
+ trace_contended_release(lock);
+ __queued_write_unlock(lock);
+}
+EXPORT_SYMBOL(queued_write_unlock_traced);
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index af8d122bb649..1544dcec65fa 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -104,6 +104,14 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
#endif
+void __lockfunc queued_spin_unlock_traced(struct qspinlock *lock)
+{
+ if (queued_spin_is_contended(lock))
+ trace_contended_release(lock);
+ __queued_spin_unlock(lock);
+}
+EXPORT_SYMBOL(queued_spin_unlock_traced);
+
#endif /* _GEN_PV_LOCK_SLOWPATH */
/**
--
2.52.0
^ permalink raw reply related
* [PATCH v3 3/4] locking: Add contended_release tracepoint to sleepable locks
From: Dmitry Ilvokhin @ 2026-03-18 18:45 UTC (permalink / raw)
To: Arnd Bergmann, Dennis Zhou, Tejun Heo, Christoph Lameter,
Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Peter Zijlstra, Ingo Molnar, Will Deacon, Boqun Feng, Waiman Long
Cc: linux-arch, linux-kernel, linux-mm, linux-trace-kernel,
kernel-team, Dmitry Ilvokhin
In-Reply-To: <cover.1773858853.git.d@ilvokhin.com>
Add the contended_release trace event. This tracepoint fires on the
holder side when a contended lock is released, complementing the
existing contention_begin/contention_end tracepoints which fire on the
waiter side.
This enables correlating lock hold time under contention with waiter
events by lock address.
Add trace_contended_release() calls to the slowpath unlock paths of
sleepable locks: mutex, rtmutex, semaphore, rwsem, percpu-rwsem, and
RT-specific rwbase locks.
Where possible, trace_contended_release() fires before the lock is
released and before the waiter is woken. For rwsem, semaphore, and
rwbase_rt read unlock, the tracepoint fires after the release but before
the wake, as contention is determined by the return value of the atomic
release operation.
Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
include/trace/events/lock.h | 17 +++++++++++++++++
kernel/locking/mutex.c | 3 +++
kernel/locking/percpu-rwsem.c | 4 ++++
kernel/locking/rtmutex.c | 1 +
kernel/locking/rwbase_rt.c | 8 +++++++-
kernel/locking/rwsem.c | 9 +++++++--
kernel/locking/semaphore.c | 4 +++-
7 files changed, 42 insertions(+), 4 deletions(-)
diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h
index da978f2afb45..1ded869cd619 100644
--- a/include/trace/events/lock.h
+++ b/include/trace/events/lock.h
@@ -137,6 +137,23 @@ TRACE_EVENT(contention_end,
TP_printk("%p (ret=%d)", __entry->lock_addr, __entry->ret)
);
+TRACE_EVENT(contended_release,
+
+ TP_PROTO(void *lock),
+
+ TP_ARGS(lock),
+
+ TP_STRUCT__entry(
+ __field(void *, lock_addr)
+ ),
+
+ TP_fast_assign(
+ __entry->lock_addr = lock;
+ ),
+
+ TP_printk("%p", __entry->lock_addr)
+);
+
#endif /* _TRACE_LOCK_H */
/* This part must be outside protection */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 427187ff02db..bb25741c2768 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -997,6 +997,9 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
wake_q_add(&wake_q, next);
}
+ if (trace_contended_release_enabled() && waiter)
+ trace_contended_release(lock);
+
if (owner & MUTEX_FLAG_HANDOFF)
__mutex_handoff(lock, next);
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index f3ee7a0d6047..6bf2ccddd42d 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -263,6 +263,9 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, _RET_IP_);
+ if (trace_contended_release_enabled() && wq_has_sleeper(&sem->waiters))
+ trace_contended_release(sem);
+
/*
* Signal the writer is done, no fast path yet.
*
@@ -292,6 +295,7 @@ EXPORT_SYMBOL_GPL(percpu_up_write);
void __percpu_up_read(struct percpu_rw_semaphore *sem)
{
lockdep_assert_preemption_disabled();
+ trace_contended_release(sem);
/*
* slowpath; reader will only ever wake a single blocked
* writer.
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index ccaba6148b61..3db8a840b4e8 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1466,6 +1466,7 @@ static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
raw_spin_lock_irqsave(&lock->wait_lock, flags);
}
+ trace_contended_release(lock);
/*
* The wakeup next waiter path does not suffer from the above
* race. See the comments there.
diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
index 82e078c0665a..081778934b13 100644
--- a/kernel/locking/rwbase_rt.c
+++ b/kernel/locking/rwbase_rt.c
@@ -162,8 +162,10 @@ static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb,
* worst case which can happen is a spurious wakeup.
*/
owner = rt_mutex_owner(rtm);
- if (owner)
+ if (owner) {
+ trace_contended_release(rwb);
rt_mutex_wake_q_add_task(&wqh, owner, state);
+ }
/* Pairs with the preempt_enable in rt_mutex_wake_up_q() */
preempt_disable();
@@ -205,6 +207,8 @@ static inline void rwbase_write_unlock(struct rwbase_rt *rwb)
unsigned long flags;
raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm))
+ trace_contended_release(rwb);
__rwbase_write_unlock(rwb, WRITER_BIAS, flags);
}
@@ -214,6 +218,8 @@ static inline void rwbase_write_downgrade(struct rwbase_rt *rwb)
unsigned long flags;
raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm))
+ trace_contended_release(rwb);
/* Release it and account current as reader */
__rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags);
}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index bf647097369c..767a1a2b7d8c 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1390,6 +1390,7 @@ static inline void __up_read(struct rw_semaphore *sem)
if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
RWSEM_FLAG_WAITERS)) {
clear_nonspinnable(sem);
+ trace_contended_release(sem);
rwsem_wake(sem);
}
preempt_enable();
@@ -1413,8 +1414,10 @@ static inline void __up_write(struct rw_semaphore *sem)
preempt_disable();
rwsem_clear_owner(sem);
tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
- if (unlikely(tmp & RWSEM_FLAG_WAITERS))
+ if (unlikely(tmp & RWSEM_FLAG_WAITERS)) {
+ trace_contended_release(sem);
rwsem_wake(sem);
+ }
preempt_enable();
}
@@ -1437,8 +1440,10 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
tmp = atomic_long_fetch_add_release(
-RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
rwsem_set_reader_owned(sem);
- if (tmp & RWSEM_FLAG_WAITERS)
+ if (tmp & RWSEM_FLAG_WAITERS) {
+ trace_contended_release(sem);
rwsem_downgrade_wake(sem);
+ }
preempt_enable();
}
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index 74d41433ba13..d46415095dd6 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -231,8 +231,10 @@ void __sched up(struct semaphore *sem)
else
__up(sem, &wake_q);
raw_spin_unlock_irqrestore(&sem->lock, flags);
- if (!wake_q_empty(&wake_q))
+ if (!wake_q_empty(&wake_q)) {
+ trace_contended_release(sem);
wake_up_q(&wake_q);
+ }
}
EXPORT_SYMBOL(up);
--
2.52.0
^ permalink raw reply related
* [PATCH v3 2/4] locking/percpu-rwsem: Extract __percpu_up_read()
From: Dmitry Ilvokhin @ 2026-03-18 18:45 UTC (permalink / raw)
To: Arnd Bergmann, Dennis Zhou, Tejun Heo, Christoph Lameter,
Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Peter Zijlstra, Ingo Molnar, Will Deacon, Boqun Feng, Waiman Long
Cc: linux-arch, linux-kernel, linux-mm, linux-trace-kernel,
kernel-team, Dmitry Ilvokhin, Usama Arif
In-Reply-To: <cover.1773858853.git.d@ilvokhin.com>
Move the percpu_up_read() slowpath out of the inline function into a new
__percpu_up_read() to avoid binary size increase from adding a
tracepoint to an inlined function.
Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
Acked-by: Usama Arif <usama.arif@linux.dev>
---
include/linux/percpu-rwsem.h | 15 +++------------
kernel/locking/percpu-rwsem.c | 18 ++++++++++++++++++
2 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index c8cb010d655e..39d5bf8e6562 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -107,6 +107,8 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
return ret;
}
+extern void __percpu_up_read(struct percpu_rw_semaphore *sem);
+
static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, _RET_IP_);
@@ -118,18 +120,7 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
if (likely(rcu_sync_is_idle(&sem->rss))) {
this_cpu_dec(*sem->read_count);
} else {
- /*
- * slowpath; reader will only ever wake a single blocked
- * writer.
- */
- smp_mb(); /* B matches C */
- /*
- * In other words, if they see our decrement (presumably to
- * aggregate zero, as that is the only time it matters) they
- * will also see our critical section.
- */
- this_cpu_dec(*sem->read_count);
- rcuwait_wake_up(&sem->writer);
+ __percpu_up_read(sem);
}
preempt_enable();
}
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index ef234469baac..f3ee7a0d6047 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -288,3 +288,21 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
rcu_sync_exit(&sem->rss);
}
EXPORT_SYMBOL_GPL(percpu_up_write);
+
+void __percpu_up_read(struct percpu_rw_semaphore *sem)
+{
+ lockdep_assert_preemption_disabled();
+ /*
+ * slowpath; reader will only ever wake a single blocked
+ * writer.
+ */
+ smp_mb(); /* B matches C */
+ /*
+ * In other words, if they see our decrement (presumably to
+ * aggregate zero, as that is the only time it matters) they
+ * will also see our critical section.
+ */
+ this_cpu_dec(*sem->read_count);
+ rcuwait_wake_up(&sem->writer);
+}
+EXPORT_SYMBOL_GPL(__percpu_up_read);
--
2.52.0
^ permalink raw reply related
* [PATCH v3 0/4] locking: contended_release tracepoint instrumentation
From: Dmitry Ilvokhin @ 2026-03-18 18:45 UTC (permalink / raw)
To: Arnd Bergmann, Dennis Zhou, Tejun Heo, Christoph Lameter,
Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Peter Zijlstra, Ingo Molnar, Will Deacon, Boqun Feng, Waiman Long
Cc: linux-arch, linux-kernel, linux-mm, linux-trace-kernel,
kernel-team, Dmitry Ilvokhin
The existing contention_begin/contention_end tracepoints fire on the
waiter side. The lock holder's identity and stack can be captured at
contention_begin time (e.g. perf lock contention --lock-owner), but
this reflects the holder's state when a waiter arrives, not when the
lock is actually released.
This series adds a contended_release tracepoint that fires on the
holder side when a lock with waiters is released. This provides:
- Hold time estimation: when the holder's own acquisition was
contended, its contention_end (acquisition) and contended_release
can be correlated to measure how long the lock was held under
contention.
- The holder's stack at release time, which may differ from what perf lock
contention --lock-owner captures if the holder does significant work between
the waiter's arrival and the unlock.
The series is structured as follows:
1. Remove unnecessary linux/sched.h include from trace/events/lock.h.
2. Extract __percpu_up_read() out of the inline percpu_up_read() to
avoid binary size increase from adding a tracepoint.
3. Add contended_release tracepoint and instrument sleepable locks:
mutex, rtmutex, semaphore, rwsem, percpu-rwsem, and rwbase_rt.
4. RFC. Extend contended_release to queued spinlocks and queued rwlocks.
v2 -> v3:
- Added new patch: extend contended_release tracepoint to queued spinlocks
and queued rwlocks (marked as RFC, requesting feedback). This is prompted by
Matthew Wilcox's suggestion to try to come up with generic instrumentation,
instead of instrumenting each "special" lock manually. See [1] for the
discussion.
- Reworked tracepoint placement to fire before the lock is released and
before the waiter is woken where possible, for consistency with
spinning locks where there is no explicit wake (inspired by Usama Arif's
suggestion).
- Remove unnecessary linux/sched.h include from trace/events/lock.h.
RFC -> v2:
- Add trace_contended_release_enabled() guard before waiter checks that
exist only for the tracepoint (Steven Rostedt).
- Rename __percpu_up_read_slowpath() to __percpu_up_read() (Peter
Zijlstra).
- Add extern for __percpu_up_read() (Peter Zijlstra).
- Squashed tracepoint introduction and usage commits (Masami Hiramatsu).
v2: https://lore.kernel.org/all/cover.1773164180.git.d@ilvokhin.com/
RFC: https://lore.kernel.org/all/cover.1772642407.git.d@ilvokhin.com/
[1]: https://lore.kernel.org/all/aa7G1nD7Rd9F4eBH@casper.infradead.org/
Dmitry Ilvokhin (4):
tracing/lock: Remove unnecessary linux/sched.h include
locking/percpu-rwsem: Extract __percpu_up_read()
locking: Add contended_release tracepoint to sleepable locks
locking: Add contended_release tracepoint to spinning locks
include/asm-generic/qrwlock.h | 48 +++++++++++++++++++++++++++------
include/asm-generic/qspinlock.h | 25 +++++++++++++++--
include/linux/percpu-rwsem.h | 15 +++--------
include/trace/events/lock.h | 18 ++++++++++++-
kernel/locking/mutex.c | 3 +++
kernel/locking/percpu-rwsem.c | 22 +++++++++++++++
kernel/locking/qrwlock.c | 16 +++++++++++
kernel/locking/qspinlock.c | 8 ++++++
kernel/locking/rtmutex.c | 1 +
kernel/locking/rwbase_rt.c | 8 +++++-
kernel/locking/rwsem.c | 9 +++++--
kernel/locking/semaphore.c | 4 ++-
12 files changed, 150 insertions(+), 27 deletions(-)
--
2.52.0
^ permalink raw reply
* [PATCH v3 1/4] tracing/lock: Remove unnecessary linux/sched.h include
From: Dmitry Ilvokhin @ 2026-03-18 18:45 UTC (permalink / raw)
To: Arnd Bergmann, Dennis Zhou, Tejun Heo, Christoph Lameter,
Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Peter Zijlstra, Ingo Molnar, Will Deacon, Boqun Feng, Waiman Long
Cc: linux-arch, linux-kernel, linux-mm, linux-trace-kernel,
kernel-team, Dmitry Ilvokhin
In-Reply-To: <cover.1773858853.git.d@ilvokhin.com>
None of the trace events in lock.h reference anything from
linux/sched.h. Remove the unnecessary include.
Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
include/trace/events/lock.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h
index 8e89baa3775f..da978f2afb45 100644
--- a/include/trace/events/lock.h
+++ b/include/trace/events/lock.h
@@ -5,7 +5,6 @@
#if !defined(_TRACE_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_LOCK_H
-#include <linux/sched.h>
#include <linux/tracepoint.h>
/* flags for lock:contention_begin */
--
2.52.0
^ permalink raw reply related
* [PATCH v8 13/13] lib/bootconfig: change xbc_node_index() return type to uint16_t
From: Josh Law @ 2026-03-18 15:59 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260318155919.78168-1-objecting@objecting.org>
lib/bootconfig.c:136:21: warning: conversion from 'long int' to
'int' may change value [-Wconversion]
lib/bootconfig.c:308:33: warning: conversion from 'int' to 'uint16_t'
may change value [-Wconversion]
lib/bootconfig.c:467:37: warning: conversion from 'int' to 'uint16_t'
may change value [-Wconversion]
lib/bootconfig.c:469:40: warning: conversion from 'int' to 'uint16_t'
may change value [-Wconversion]
lib/bootconfig.c:472:54: warning: conversion from 'int' to 'uint16_t'
may change value [-Wconversion]
lib/bootconfig.c:476:45: warning: conversion from 'int' to 'uint16_t'
may change value [-Wconversion]
xbc_node_index() returns the position of a node in the xbc_nodes array,
which has at most XBC_NODE_MAX (8192) entries, well within uint16_t
range. Every caller stores the result in a uint16_t field (node->parent,
node->child, node->next, or the keys[] array in compose_key_after), so
the int return type causes narrowing warnings at all six call sites.
Change the return type to uint16_t and add an explicit cast on the
pointer subtraction to match the storage width and eliminate the
warnings.
Signed-off-by: Josh Law <objecting@objecting.org>
---
include/linux/bootconfig.h | 2 +-
lib/bootconfig.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 23a96c5edcf3..692a5acc2ffc 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -66,7 +66,7 @@ struct xbc_node {
/* Node tree access raw APIs */
struct xbc_node * __init xbc_root_node(void);
-int __init xbc_node_index(struct xbc_node *node);
+uint16_t __init xbc_node_index(struct xbc_node *node);
struct xbc_node * __init xbc_node_get_parent(struct xbc_node *node);
struct xbc_node * __init xbc_node_get_child(struct xbc_node *node);
struct xbc_node * __init xbc_node_get_next(struct xbc_node *node);
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 272d9427e879..57f07e33868e 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -131,9 +131,9 @@ struct xbc_node * __init xbc_root_node(void)
*
* Return the index number of @node in XBC node list.
*/
-int __init xbc_node_index(struct xbc_node *node)
+uint16_t __init xbc_node_index(struct xbc_node *node)
{
- return node - &xbc_nodes[0];
+ return (uint16_t)(node - &xbc_nodes[0]);
}
/**
--
2.34.1
^ permalink raw reply related
* [PATCH v8 10/13] lib/bootconfig: use size_t for strlen result in xbc_node_match_prefix()
From: Josh Law @ 2026-03-18 15:59 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260318155919.78168-1-objecting@objecting.org>
lib/bootconfig.c:198:19: warning: conversion from 'size_t' to 'int'
may change value [-Wconversion]
lib/bootconfig.c:200:33: warning: conversion to '__kernel_size_t'
from 'int' may change the sign of the result [-Wsign-conversion]
strlen() returns size_t but the result was stored in an int. The value
is then passed back to strncmp() which expects size_t, causing a second
sign-conversion warning on the round-trip. Use size_t throughout to
match the API types.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index dd639046912c..3c5b6ad32a54 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -195,7 +195,7 @@ static bool __init
xbc_node_match_prefix(struct xbc_node *node, const char **prefix)
{
const char *p = xbc_node_get_data(node);
- int len = strlen(p);
+ size_t len = strlen(p);
if (strncmp(*prefix, p, len))
return false;
--
2.34.1
^ permalink raw reply related
* [PATCH v8 12/13] lib/bootconfig: use size_t for key length tracking in xbc_verify_tree()
From: Josh Law @ 2026-03-18 15:59 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260318155919.78168-1-objecting@objecting.org>
lib/bootconfig.c:839:24: warning: conversion from 'size_t' to 'int'
may change value [-Wconversion]
lib/bootconfig.c:860:32: warning: conversion from 'size_t' to 'int'
may change value [-Wconversion]
lib/bootconfig.c:860:29: warning: conversion to 'size_t' from 'int'
may change the sign of the result [-Wsign-conversion]
The key length variables len and wlen accumulate strlen() results but
were declared as int, causing truncation and sign-conversion warnings.
Change both to size_t to match the strlen() return type and avoid
mixed-sign arithmetic.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 119c3d429c1f..272d9427e879 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -803,7 +803,8 @@ static int __init xbc_close_brace(char **k, char *n)
static int __init xbc_verify_tree(void)
{
- int i, depth, len, wlen;
+ int i, depth;
+ size_t len, wlen;
struct xbc_node *n, *m;
/* Brace closing */
--
2.34.1
^ permalink raw reply related
* [PATCH v8 09/13] lib/bootconfig: fix signed comparison in xbc_node_get_data()
From: Josh Law @ 2026-03-18 15:59 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260318155919.78168-1-objecting@objecting.org>
lib/bootconfig.c:188:28: warning: comparison of integer expressions
of different signedness: 'int' and 'size_t' [-Wsign-compare]
The local variable 'offset' is declared as int, but xbc_data_size is
size_t. Change the type to size_t to match and eliminate the warning.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index ecc4e8d93547..dd639046912c 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -183,7 +183,7 @@ struct xbc_node * __init xbc_node_get_next(struct xbc_node *node)
*/
const char * __init xbc_node_get_data(struct xbc_node *node)
{
- int offset = node->data & ~XBC_VALUE;
+ size_t offset = node->data & ~XBC_VALUE;
if (WARN_ON(offset >= xbc_data_size))
return NULL;
--
2.34.1
^ permalink raw reply related
* [PATCH v8 11/13] lib/bootconfig: use signed type for offset in xbc_init_node()
From: Josh Law @ 2026-03-18 15:59 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260318155919.78168-1-objecting@objecting.org>
lib/bootconfig.c:415:32: warning: conversion to 'long unsigned int'
from 'long int' may change the sign of the result [-Wsign-conversion]
Pointer subtraction yields ptrdiff_t (signed), which was stored in
unsigned long. The original unsigned type implicitly caught a negative
offset (data < xbc_data) because the wrapped value would exceed
XBC_DATA_MAX. Make this intent explicit by using a signed long and
adding an offset < 0 check to the WARN_ON condition.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 3c5b6ad32a54..119c3d429c1f 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -412,9 +412,9 @@ const char * __init xbc_node_find_next_key_value(struct xbc_node *root,
static int __init xbc_init_node(struct xbc_node *node, char *data, uint16_t flag)
{
- unsigned long offset = data - xbc_data;
+ long offset = data - xbc_data;
- if (WARN_ON(offset >= XBC_DATA_MAX))
+ if (WARN_ON(offset < 0 || offset >= XBC_DATA_MAX))
return -EINVAL;
node->data = (uint16_t)offset | flag;
--
2.34.1
^ permalink raw reply related
* [PATCH v8 08/13] lib/bootconfig: validate child node index in xbc_verify_tree()
From: Josh Law @ 2026-03-18 15:59 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260318155919.78168-1-objecting@objecting.org>
xbc_verify_tree() validates that each node's next index is within
bounds, but does not check the child index. Add the same bounds
check for the child field.
Without this check, a corrupt bootconfig that passes next-index
validation could still trigger an out-of-bounds memory access via an
invalid child index when xbc_node_get_child() is called during tree
traversal at boot time.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index d7e2395b7e83..1adf592cc038 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -823,6 +823,10 @@ static int __init xbc_verify_tree(void)
return xbc_parse_error("No closing brace",
xbc_node_get_data(xbc_nodes + i));
}
+ if (xbc_nodes[i].child >= xbc_node_num) {
+ return xbc_parse_error("Broken child node",
+ xbc_node_get_data(xbc_nodes + i));
+ }
}
/* Key tree limitation check */
--
2.34.1
^ permalink raw reply related
* [PATCH v8 07/13] lib/bootconfig: replace linux/kernel.h with specific includes
From: Josh Law @ 2026-03-18 15:59 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260318155919.78168-1-objecting@objecting.org>
linux/kernel.h is a legacy catch-all header. Replace it with the
specific headers actually needed: linux/cache.h for SMP_CACHE_BYTES,
linux/compiler.h for unlikely(), and linux/sprintf.h for snprintf().
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 69486bd56fea..d7e2395b7e83 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -17,7 +17,9 @@
#include <linux/bug.h>
#include <linux/ctype.h>
#include <linux/errno.h>
-#include <linux/kernel.h>
+#include <linux/cache.h>
+#include <linux/compiler.h>
+#include <linux/sprintf.h>
#include <linux/memblock.h>
#include <linux/string.h>
--
2.34.1
^ permalink raw reply related
* [PATCH v8 02/13] lib/bootconfig: narrow flag parameter type from uint32_t to uint16_t
From: Josh Law @ 2026-03-18 15:59 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260318155919.78168-1-objecting@objecting.org>
The flag parameter in the node creation helpers only ever carries
XBC_KEY (0) or XBC_VALUE (0x8000), both of which fit in uint16_t.
Using uint16_t matches the width of xbc_node.data where the flag is
ultimately stored.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index d77b6533ac00..d69ec95d6062 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -408,7 +408,7 @@ const char * __init xbc_node_find_next_key_value(struct xbc_node *root,
/* XBC parse and tree build */
-static int __init xbc_init_node(struct xbc_node *node, char *data, uint32_t flag)
+static int __init xbc_init_node(struct xbc_node *node, char *data, uint16_t flag)
{
unsigned long offset = data - xbc_data;
@@ -422,7 +422,7 @@ static int __init xbc_init_node(struct xbc_node *node, char *data, uint32_t flag
return 0;
}
-static struct xbc_node * __init xbc_add_node(char *data, uint32_t flag)
+static struct xbc_node * __init xbc_add_node(char *data, uint16_t flag)
{
struct xbc_node *node;
@@ -452,7 +452,7 @@ static inline __init struct xbc_node *xbc_last_child(struct xbc_node *node)
return node;
}
-static struct xbc_node * __init __xbc_add_sibling(char *data, uint32_t flag, bool head)
+static struct xbc_node * __init __xbc_add_sibling(char *data, uint16_t flag, bool head)
{
struct xbc_node *sib, *node = xbc_add_node(data, flag);
@@ -480,17 +480,17 @@ static struct xbc_node * __init __xbc_add_sibling(char *data, uint32_t flag, boo
return node;
}
-static inline struct xbc_node * __init xbc_add_sibling(char *data, uint32_t flag)
+static inline struct xbc_node * __init xbc_add_sibling(char *data, uint16_t flag)
{
return __xbc_add_sibling(data, flag, false);
}
-static inline struct xbc_node * __init xbc_add_head_sibling(char *data, uint32_t flag)
+static inline struct xbc_node * __init xbc_add_head_sibling(char *data, uint16_t flag)
{
return __xbc_add_sibling(data, flag, true);
}
-static inline __init struct xbc_node *xbc_add_child(char *data, uint32_t flag)
+static inline __init struct xbc_node *xbc_add_child(char *data, uint16_t flag)
{
struct xbc_node *node = xbc_add_sibling(data, flag);
--
2.34.1
^ permalink raw reply related
* [PATCH v8 03/13] lib/bootconfig: fix off-by-one in xbc_verify_tree() next node check
From: Josh Law @ 2026-03-18 15:59 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260318155919.78168-1-objecting@objecting.org>
Valid node indices are 0 to xbc_node_num-1, so a next value equal to
xbc_node_num is out of bounds. Use >= instead of > to catch this.
A malformed or corrupt bootconfig could pass tree verification with
an out-of-bounds next index. On subsequent tree traversal at boot
time, xbc_node_get_next() would return a pointer past the allocated
xbc_nodes array, causing an out-of-bounds read of kernel memory.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index d69ec95d6062..ca668ead1db6 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -816,7 +816,7 @@ static int __init xbc_verify_tree(void)
}
for (i = 0; i < xbc_node_num; i++) {
- if (xbc_nodes[i].next > xbc_node_num) {
+ if (xbc_nodes[i].next >= xbc_node_num) {
return xbc_parse_error("No closing brace",
xbc_node_get_data(xbc_nodes + i));
}
--
2.34.1
^ permalink raw reply related
* [PATCH v8 06/13] bootconfig: constify xbc_calc_checksum() data parameter
From: Josh Law @ 2026-03-18 15:59 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260318155919.78168-1-objecting@objecting.org>
xbc_calc_checksum() only reads the data buffer, so mark the parameter
as const void * and the internal pointer as const unsigned char *.
Signed-off-by: Josh Law <objecting@objecting.org>
---
include/linux/bootconfig.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 25df9260d206..23a96c5edcf3 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -36,9 +36,9 @@ bool __init cmdline_has_extra_options(void);
* The checksum will be used with the BOOTCONFIG_MAGIC and the size for
* embedding the bootconfig in the initrd image.
*/
-static inline __init uint32_t xbc_calc_checksum(void *data, uint32_t size)
+static inline __init uint32_t xbc_calc_checksum(const void *data, uint32_t size)
{
- unsigned char *p = data;
+ const unsigned char *p = data;
uint32_t ret = 0;
while (size--)
--
2.34.1
^ permalink raw reply related
* [PATCH v8 04/13] lib/bootconfig: increment xbc_node_num after node init succeeds
From: Josh Law @ 2026-03-18 15:59 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260318155919.78168-1-objecting@objecting.org>
Move the xbc_node_num increment to after xbc_init_node() so a failed
init does not leave a partially initialized node counted in the array.
If xbc_init_node() fails on a data offset at the boundary of a
maximum-size bootconfig, the pre-incremented count causes subsequent
tree verification and traversal to consider the uninitialized node as
valid, potentially leading to an out-of-bounds read or unpredictable
boot behavior.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index ca668ead1db6..6b899e24189c 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -429,9 +429,10 @@ static struct xbc_node * __init xbc_add_node(char *data, uint16_t flag)
if (xbc_node_num == XBC_NODE_MAX)
return NULL;
- node = &xbc_nodes[xbc_node_num++];
+ node = &xbc_nodes[xbc_node_num];
if (xbc_init_node(node, data, flag) < 0)
return NULL;
+ xbc_node_num++;
return node;
}
--
2.34.1
^ permalink raw reply related
* [PATCH v8 05/13] lib/bootconfig: drop redundant memset of xbc_nodes
From: Josh Law @ 2026-03-18 15:59 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260318155919.78168-1-objecting@objecting.org>
memblock_alloc() already returns zeroed memory, so the explicit memset
in xbc_init() is redundant. Switch the userspace xbc_alloc_mem() from
malloc() to calloc() so both paths return zeroed memory, and remove
the separate memset call.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 6b899e24189c..69486bd56fea 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -71,7 +71,7 @@ static inline void __init xbc_free_mem(void *addr, size_t size, bool early)
static inline void *xbc_alloc_mem(size_t size)
{
- return malloc(size);
+ return calloc(1, size);
}
static inline void xbc_free_mem(void *addr, size_t size, bool early)
@@ -982,7 +982,6 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
_xbc_exit(true);
return -ENOMEM;
}
- memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX);
ret = xbc_parse_tree();
if (!ret)
--
2.34.1
^ permalink raw reply related
* [PATCH v8 00/13] bootconfig: cleanups, correctness, and modernization
From: Josh Law @ 2026-03-18 15:59 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: linux-kernel, linux-trace-kernel, Josh Law
This series addresses a collection of issues found during a review of
lib/bootconfig.c, include/linux/bootconfig.h, and tools/bootconfig,
covering off-by-one errors, coding style, signedness/type cleanup, and
API modernization.
The two patches with Fixes tags (xbc_init_node() override check and
load_xbc_file() fd leak) have been split into a separate series for
bootconfig/fixes per Masami's request.
Changes since v7:
- Split fixes from cleanups/improvements into separate series per
maintainer request, so fixes can go into bootconfig/fixes and the
rest (this series) into bootconfig/for-next.
Bug fixes:
- Fix off-by-one in xbc_verify_tree() where a next-node index equal
to xbc_node_num passes the bounds check despite being out of range;
a malformed bootconfig could cause an out-of-bounds read of kernel
memory during tree traversal at boot time (patch 3).
- Move xbc_node_num increment to after xbc_init_node() validation so
a failed init does not leave a partially initialized node counted
in the array; on a maximum-size bootconfig, the uninitialized node
could be traversed leading to unpredictable boot behavior (patch 4).
- Validate child node indices in xbc_verify_tree() alongside the
existing next-node check; without this, a corrupt bootconfig could
trigger an out-of-bounds memory access via an invalid child index
during tree traversal (patch 8).
Correctness:
- Narrow the flag parameter in node creation helpers from uint32_t to
uint16_t to match the xbc_node.data field width (patch 2).
- Constify the xbc_calc_checksum() data parameter since it only reads
the buffer (patch 6).
- Fix strict-GCC signedness and narrowing warnings by aligning local
types with strlen() APIs and the node index/data storage in
xbc_node_get_data(), xbc_node_match_prefix(), xbc_init_node(),
xbc_verify_tree(), and xbc_node_index() (patches 9-13).
Cleanups:
- Fix comment typos, missing blank line before kerneldoc, and
inconsistent if/else bracing (patch 1).
- Drop redundant memset after memblock_alloc which already returns
zeroed memory; switch the userspace path from malloc to calloc to
match (patch 5).
Modernization:
- Replace the catch-all linux/kernel.h include with the specific
headers needed: linux/cache.h, linux/compiler.h, and
linux/sprintf.h (patch 7).
Build-tested with both the in-kernel build (lib/bootconfig.o,
init/main.o) and the userspace tools/bootconfig build. All 70
tools/bootconfig test cases pass.
Josh Law (13):
lib/bootconfig: clean up comment typos and bracing
lib/bootconfig: narrow flag parameter type from uint32_t to uint16_t
lib/bootconfig: fix off-by-one in xbc_verify_tree() next node check
lib/bootconfig: increment xbc_node_num after node init succeeds
lib/bootconfig: drop redundant memset of xbc_nodes
bootconfig: constify xbc_calc_checksum() data parameter
lib/bootconfig: replace linux/kernel.h with specific includes
lib/bootconfig: validate child node index in xbc_verify_tree()
lib/bootconfig: fix signed comparison in xbc_node_get_data()
lib/bootconfig: use size_t for strlen result in
xbc_node_match_prefix()
lib/bootconfig: use signed type for offset in xbc_init_node()
lib/bootconfig: use size_t for key length tracking in
xbc_verify_tree()
lib/bootconfig: change xbc_node_index() return type to uint16_t
include/linux/bootconfig.h | 6 ++--
lib/bootconfig.c | 62 ++++++++++++++++++++++----------------
2 files changed, 39 insertions(+), 29 deletions(-)
--
2.34.1
^ permalink raw reply
* [PATCH v8 01/13] lib/bootconfig: clean up comment typos and bracing
From: Josh Law @ 2026-03-18 15:59 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260318155919.78168-1-objecting@objecting.org>
Fixes kerneldoc typos ("initiized" and "uder") and adds a missing blank line.
Also fixes inconsistent if/else bracing in __xbc_add_key() and elsewhere.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index b0ef1e74e98a..d77b6533ac00 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -79,6 +79,7 @@ static inline void xbc_free_mem(void *addr, size_t size, bool early)
free(addr);
}
#endif
+
/**
* xbc_get_info() - Get the information of loaded boot config
* @node_size: A pointer to store the number of nodes.
@@ -112,7 +113,7 @@ static int __init xbc_parse_error(const char *msg, const char *p)
* xbc_root_node() - Get the root node of extended boot config
*
* Return the address of root node of extended boot config. If the
- * extended boot config is not initiized, return NULL.
+ * extended boot config is not initialized, return NULL.
*/
struct xbc_node * __init xbc_root_node(void)
{
@@ -364,7 +365,7 @@ struct xbc_node * __init xbc_node_find_next_leaf(struct xbc_node *root,
node = xbc_node_get_parent(node);
if (node == root)
return NULL;
- /* User passed a node which is not uder parent */
+ /* User passed a node which is not under parent */
if (WARN_ON(!node))
return NULL;
}
@@ -472,8 +473,9 @@ static struct xbc_node * __init __xbc_add_sibling(char *data, uint32_t flag, boo
sib->next = xbc_node_index(node);
}
}
- } else
+ } else {
xbc_parse_error("Too many nodes", data);
+ }
return node;
}
@@ -655,9 +657,9 @@ static int __init __xbc_add_key(char *k)
if (unlikely(xbc_node_num == 0))
goto add_node;
- if (!last_parent) /* the first level */
+ if (!last_parent) { /* the first level */
node = find_match_node(xbc_nodes, k);
- else {
+ } else {
child = xbc_node_get_child(last_parent);
/* Since the value node is the first child, skip it. */
if (child && xbc_node_is_value(child))
@@ -665,9 +667,9 @@ static int __init __xbc_add_key(char *k)
node = find_match_node(child, k);
}
- if (node)
+ if (node) {
last_parent = node;
- else {
+ } else {
add_node:
node = xbc_add_child(k, XBC_KEY);
if (!node)
@@ -991,8 +993,9 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
if (emsg)
*emsg = xbc_err_msg;
_xbc_exit(true);
- } else
+ } else {
ret = xbc_node_num;
+ }
return ret;
}
--
2.34.1
^ permalink raw reply related
* [PATCH v8 2/2] tools/bootconfig: fix fd leak in load_xbc_file() on fstat failure
From: Josh Law @ 2026-03-18 15:58 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260318155847.78065-1-objecting@objecting.org>
If fstat() fails after open() succeeds, the function returns without
closing the file descriptor. Also preserve errno across close(), since
close() may overwrite it before the error is returned.
Fixes: 950313ebf79c ("tools: bootconfig: Add bootconfig command")
Signed-off-by: Josh Law <objecting@objecting.org>
---
tools/bootconfig/main.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 55d59ed507d5..643f707b8f1d 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -162,8 +162,11 @@ static int load_xbc_file(const char *path, char **buf)
if (fd < 0)
return -errno;
ret = fstat(fd, &stat);
- if (ret < 0)
- return -errno;
+ if (ret < 0) {
+ ret = -errno;
+ close(fd);
+ return ret;
+ }
ret = load_xbc_fd(fd, buf, stat.st_size);
--
2.34.1
^ permalink raw reply related
* [PATCH v8 1/2] lib/bootconfig: check xbc_init_node() return in override path
From: Josh Law @ 2026-03-18 15:58 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260318155847.78065-1-objecting@objecting.org>
The ':=' override path in xbc_parse_kv() calls xbc_init_node() to
re-initialize an existing value node but does not check the return
value. If xbc_init_node() fails (data offset out of range), parsing
silently continues with stale node data.
Add the missing error check to match the xbc_add_node() call path
which already checks for failure.
In practice, a bootconfig using ':=' to override a value near the
32KB data limit could silently retain the old value, meaning a
security-relevant boot parameter override (e.g., a trace filter or
debug setting) would not take effect as intended.
Fixes: e5efaeb8a8f5 ("bootconfig: Support mixing a value and subkeys under a key")
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 1adf592cc038..ecc4e8d93547 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -728,7 +728,8 @@ static int __init xbc_parse_kv(char **k, char *v, int op)
if (op == ':') {
unsigned short nidx = child->next;
- xbc_init_node(child, v, XBC_VALUE);
+ if (xbc_init_node(child, v, XBC_VALUE) < 0)
+ return xbc_parse_error("Failed to override value", v);
child->next = nidx; /* keep subkeys */
goto array;
}
--
2.34.1
^ permalink raw reply related
* [PATCH v8 0/2] bootconfig: targeted fixes for stable
From: Josh Law @ 2026-03-18 15:58 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
Two bug fixes with Fixes tags, split out from the larger cleanup series
(v7 patches 9/15 and 10/15) per Masami's request so they can go
cleanly into bootconfig/fixes and from there into stable.
Changes since v7:
- Split fixes (this series) from cleanups/improvements into separate
series per maintainer request, so fixes can go into bootconfig/fixes
and the rest into bootconfig/for-next.
Bug fixes:
- Check xbc_init_node() return value in the ':=' override path; a
bootconfig using ':=' near the 32KB data limit could silently
retain the old value, meaning a security-relevant boot parameter
override would not take effect (patch 1).
- Fix file descriptor leak in tools/bootconfig load_xbc_file() when
fstat() fails, and preserve errno across close() on that error path
(patch 2).
Build-tested with both the in-kernel build (lib/bootconfig.o,
init/main.o) and the userspace tools/bootconfig build. All 70
tools/bootconfig test cases pass.
Josh Law (2):
lib/bootconfig: check xbc_init_node() return in override path
tools/bootconfig: fix fd leak in load_xbc_file() on fstat failure
lib/bootconfig.c | 3 ++-
tools/bootconfig/main.c | 7 +++++--
2 files changed, 7 insertions(+), 3 deletions(-)
--
2.34.1
^ permalink raw reply
* Re: [PATCH v9 2/4] ring-buffer: Flush and stop persistent ring buffer on panic
From: Mathieu Desnoyers @ 2026-03-18 15:51 UTC (permalink / raw)
To: Masami Hiramatsu (Google)
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Ian Rogers
In-Reply-To: <20260319002918.5768a8ee053dd35373e215f2@kernel.org>
On 2026-03-18 11:29, Masami Hiramatsu (Google) wrote:
>>
>> - AFAIU, you are not trying to evince cache lines after creation
>> of a new virtual mapping (which is the documented intent of
>> flush_cache_vmap).
>
> Ah, OK. That's a good point!
> (anyway I will replace it with do { } while (0) in the next version.)
>
>>
>> - AFAIU flush_cache_vmap maps to no-code on arm64 (asm-generic), what am
>> I missing ? It makes sense to be a no-op because AFAIR arm64 does not
>> have to deal with virtually aliasing caches.
>
> Yeah, so my patch also introduces arm64 specific implementation.
Just make sure to call this something else than "flush_cache_vmap",
because you don't want to slow down vmap on arm64 which does not
require to evince and certainly not write back cache lines after
creation of a new virtual mapping.
Thanks,
Mathieu
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
^ permalink raw reply
* Re: [PATCH v7 08/15] sched: Add task enqueue/dequeue trace points
From: gmonaco @ 2026-03-18 15:44 UTC (permalink / raw)
To: Peter Zijlstra, linux-trace-kernel, linux-kernel
Cc: Steven Rostedt, Nam Cao, Juri Lelli, Masami Hiramatsu,
Ingo Molnar, K Prateek Nayak, Tomas Glozar, Clark Williams,
John Kacur
In-Reply-To: <20260310105627.332044-9-gmonaco@redhat.com>
Hi Peter,
On Tue, 2026-03-10 at 11:56 +0100, Gabriele Monaco wrote:
> From: Nam Cao <namcao@linutronix.de>
>
> Add trace points into enqueue_task() and dequeue_task().
>
> Signed-off-by: Nam Cao <namcao@linutronix.de>
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Reviewed-by: K Prateek Nayak <kprateek.nayak@amd.com>
> Co-developed-by: Gabriele Monaco <gmonaco@redhat.com>
> Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
> ---
Could I get an Ack for this?
Thanks,
Gabriele
>
> Notes:
> V7:
> * Use cpu_of(rq) helper in tracepoint calls
> V5:
> * Do not fire enqueue tracepoint for delayed enqueues
>
> include/trace/events/sched.h | 8 ++++++++
> kernel/sched/core.c | 12 +++++++++++-
> kernel/sched/sched.h | 2 ++
> 3 files changed, 21 insertions(+), 1 deletion(-)
>
> diff --git a/include/trace/events/sched.h
> b/include/trace/events/sched.h
> index 7b2645b50e78..5844147ec5fd 100644
> --- a/include/trace/events/sched.h
> +++ b/include/trace/events/sched.h
> @@ -896,6 +896,14 @@ DECLARE_TRACE(sched_set_need_resched,
> TP_PROTO(struct task_struct *tsk, int cpu, int tif),
> TP_ARGS(tsk, cpu, tif));
>
> +DECLARE_TRACE(sched_enqueue,
> + TP_PROTO(struct task_struct *tsk, int cpu),
> + TP_ARGS(tsk, cpu));
> +
> +DECLARE_TRACE(sched_dequeue,
> + TP_PROTO(struct task_struct *tsk, int cpu),
> + TP_ARGS(tsk, cpu));
> +
> #endif /* _TRACE_SCHED_H */
>
> /* This part must be outside protection */
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index b7f77c165a6e..c9ca1e048612 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -122,6 +122,8 @@
> EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
> EXPORT_TRACEPOINT_SYMBOL_GPL(sched_entry_tp);
> EXPORT_TRACEPOINT_SYMBOL_GPL(sched_exit_tp);
> EXPORT_TRACEPOINT_SYMBOL_GPL(sched_set_need_resched_tp);
> +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_enqueue_tp);
> +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_dequeue_tp);
>
> DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
> DEFINE_PER_CPU(struct rnd_state, sched_rnd_state);
> @@ -2094,6 +2096,9 @@ unsigned long get_wchan(struct task_struct *p)
>
> void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> {
> + if (trace_sched_enqueue_tp_enabled() && !(flags &
> ENQUEUE_DELAYED))
> + trace_sched_enqueue_tp(p, cpu_of(rq));
> +
> if (!(flags & ENQUEUE_NOCLOCK))
> update_rq_clock(rq);
>
> @@ -2120,6 +2125,8 @@ void enqueue_task(struct rq *rq, struct
> task_struct *p, int flags)
> */
> inline bool dequeue_task(struct rq *rq, struct task_struct *p, int
> flags)
> {
> + bool ret;
> +
> if (sched_core_enabled(rq))
> sched_core_dequeue(rq, p, flags);
>
> @@ -2136,7 +2143,10 @@ inline bool dequeue_task(struct rq *rq, struct
> task_struct *p, int flags)
> * and mark the task ->sched_delayed.
> */
> uclamp_rq_dec(rq, p);
> - return p->sched_class->dequeue_task(rq, p, flags);
> + ret = p->sched_class->dequeue_task(rq, p, flags);
> + if (trace_sched_dequeue_tp_enabled() && !(flags &
> DEQUEUE_SLEEP))
> + trace_sched_dequeue_tp(p, cpu_of(rq));
> + return ret;
> }
>
> void activate_task(struct rq *rq, struct task_struct *p, int flags)
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 43bbf0693cca..d7ab0aa58a70 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2955,6 +2955,8 @@ static inline void sub_nr_running(struct rq
> *rq, unsigned count)
>
> static inline void __block_task(struct rq *rq, struct task_struct
> *p)
> {
> + trace_sched_dequeue_tp(p, cpu_of(rq));
> +
> if (p->sched_contributes_to_load)
> rq->nr_uninterruptible++;
>
^ permalink raw reply
* Re: [PATCH v7 00/15] bootconfig: fixes, cleanups, and modernization
From: Masami Hiramatsu @ 2026-03-18 15:34 UTC (permalink / raw)
To: Josh Law; +Cc: Andrew Morton, Steven Rostedt, linux-kernel, linux-trace-kernel
In-Reply-To: <20260317160916.33576-1-objecting@objecting.org>
Hi Josh,
It is mostly OK, but can you reorder this series as the fixes (which
has "Fixes" tag, [9/15] and [10/15]) first and others second, so that
I can cleanly merge it to bootconfig/fixes and bootconfig/for-next?
The patches which has Fixes tag should go into stable tree, but other
improvements/cleanups should go to next merge window.
Thank you,
On Tue, 17 Mar 2026 16:09:01 +0000
Josh Law <objecting@objecting.org> wrote:
> This series addresses a collection of issues found during a review of
> lib/bootconfig.c, include/linux/bootconfig.h, and tools/bootconfig,
> ranging from off-by-one errors and unchecked return values to coding
> style, signedness/type cleanup, and API modernization.
>
> Changes since v6:
> - Dropped "add missing __init annotations to static helpers"
> (v6 patch 1).
> - Dropped "fix sign-compare in xbc_node_compose_key_after()"
> (v6 patch 16).
> - Updated "fix fd leak in load_xbc_file() on fstat failure" to save
> errno before close(), since close() may overwrite it before the
> error is returned (patch 10).
> - Updated "fix signed comparison in xbc_node_get_data()" to use
> size_t for the local offset variable, matching the warning and
> xbc_data_size (patch 11).
> - Updated "use signed type for offset in xbc_init_node()" to use a
> signed long and explicitly check offset < 0 in WARN_ON(), making
> the pre-base pointer case explicit instead of relying on unsigned
> wraparound (patch 13).
>
> Changes since v5:
> - Folded typo fixes, kerneldoc blank line, and inconsistent bracing
> patches (v5 02-05, 07) into a single patch (patch 1).
> - Dropped "use __packed macro for struct xbc_node" (v5 11) and
> "add __packed definition to tools/bootconfig shim header" (v5 14)
> per review feedback.
> - Added Fixes: tag to "check xbc_init_node() return in override
> path" (patch 9).
> - Added Fixes: tag to "fix fd leak in load_xbc_file() on fstat
> failure" (patch 10).
>
> Changes since v4:
> - Added six follow-up patches found via static analysis with strict
> GCC warnings (patches 11-15, plus the now-dropped v6 patch 16).
> - Added "fix signed comparison in xbc_node_get_data()" to match the
> local offset type to xbc_data_size and eliminate the sign-compare
> warning (patch 11).
> - Added "use size_t for strlen result in xbc_node_match_prefix()"
> and "use size_t for key length tracking in xbc_verify_tree()" to
> match strlen() return types (patches 12, 14).
> - Added "use signed type for offset in xbc_init_node()" to make the
> offset bounds check explicit and avoid sign-conversion warnings
> from pointer subtraction (patch 13).
> - Added "change xbc_node_index() return type to uint16_t" to match
> the 16-bit storage fields and XBC_NODE_MAX bounds (patch 15).
>
> Changes since v3:
> - Added commit descriptions to all patches that were missing them.
> - Added real-world impact statements to all bug-fix patches.
>
> Changes since v2:
> - Added "validate child node index in xbc_verify_tree()" (patch 8).
> - Added "check xbc_init_node() return in override path" (patch 9).
> - Added "fix fd leak in load_xbc_file() on fstat failure" (patch 10).
>
> Changes since v1:
> - Dropped "return empty string instead of NULL from
> xbc_node_get_data()" -- returning "" causes false matches in
> xbc_node_match_prefix() because strncmp(..., "", 0) always
> returns 0.
>
> Bug fixes:
> - Fix off-by-one in xbc_verify_tree() where a next-node index equal
> to xbc_node_num passes the bounds check despite being out of range;
> a malformed bootconfig could cause an out-of-bounds read of kernel
> memory during tree traversal at boot time (patch 3).
> - Move xbc_node_num increment to after xbc_init_node() validation so
> a failed init does not leave a partially initialized node counted
> in the array; on a maximum-size bootconfig, the uninitialized node
> could be traversed leading to unpredictable boot behavior (patch 4).
> - Validate child node indices in xbc_verify_tree() alongside the
> existing next-node check; without this, a corrupt bootconfig could
> trigger an out-of-bounds memory access via an invalid child index
> during tree traversal (patch 8).
> - Check xbc_init_node() return value in the ':=' override path; a
> bootconfig using ':=' near the 32KB data limit could silently
> retain the old value, meaning a security-relevant boot parameter
> override would not take effect (patch 9).
> - Fix file descriptor leak in tools/bootconfig load_xbc_file() when
> fstat() fails, and preserve errno across close() on that error path
> (patch 10).
>
> Correctness:
> - Narrow the flag parameter in node creation helpers from uint32_t to
> uint16_t to match the xbc_node.data field width (patch 2).
> - Constify the xbc_calc_checksum() data parameter since it only reads
> the buffer (patch 6).
> - Fix strict-GCC signedness and narrowing warnings by aligning local
> types with strlen() APIs and the node index/data storage in
> xbc_node_get_data(), xbc_node_match_prefix(), xbc_init_node(),
> xbc_verify_tree(), and xbc_node_index() (patches 11-15).
>
> Cleanups:
> - Fix comment typos, missing blank line before kerneldoc, and
> inconsistent if/else bracing (patch 1).
> - Drop redundant memset after memblock_alloc which already returns
> zeroed memory; switch the userspace path from malloc to calloc to
> match (patch 5).
>
> Modernization:
> - Replace the catch-all linux/kernel.h include with the specific
> headers needed: linux/cache.h, linux/compiler.h, and
> linux/sprintf.h (patch 7).
>
> Build-tested with both the in-kernel build (lib/bootconfig.o,
> init/main.o) and the userspace tools/bootconfig build. All 70
> tools/bootconfig test cases pass.
>
> Josh Law (15):
> lib/bootconfig: clean up comment typos and bracing
> lib/bootconfig: narrow flag parameter type from uint32_t to uint16_t
> lib/bootconfig: fix off-by-one in xbc_verify_tree() next node check
> lib/bootconfig: increment xbc_node_num after node init succeeds
> lib/bootconfig: drop redundant memset of xbc_nodes
> bootconfig: constify xbc_calc_checksum() data parameter
> lib/bootconfig: replace linux/kernel.h with specific includes
> lib/bootconfig: validate child node index in xbc_verify_tree()
> lib/bootconfig: check xbc_init_node() return in override path
> tools/bootconfig: fix fd leak in load_xbc_file() on fstat failure
> lib/bootconfig: fix signed comparison in xbc_node_get_data()
> lib/bootconfig: use size_t for strlen result in
> xbc_node_match_prefix()
> lib/bootconfig: use signed type for offset in xbc_init_node()
> lib/bootconfig: use size_t for key length tracking in
> xbc_verify_tree()
> lib/bootconfig: change xbc_node_index() return type to uint16_t
>
> include/linux/bootconfig.h | 6 ++--
> lib/bootconfig.c | 65 ++++++++++++++++++++++----------------
> tools/bootconfig/main.c | 7 ++--
> 3 files changed, 46 insertions(+), 32 deletions(-)
>
> --
> 2.34.1
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox