* [PATCH v3 1/9] rv/da: introduce DA_MON_ALLOCATION_STRATEGY
2026-06-07 16:13 [PATCH v3 0/9] rv/tlob: Add task latency over budget RV monitor wen.yang
@ 2026-06-07 16:13 ` wen.yang
2026-06-07 16:13 ` [PATCH v3 2/9] rv: add generic uprobe infrastructure for RV monitors wen.yang
` (7 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: wen.yang @ 2026-06-07 16:13 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, linux-trace-kernel, linux-kernel, Wen Yang
From: Wen Yang <wen.yang@linux.dev>
Consolidate per-object DA monitor storage allocation under a
single compile-time selector, replacing the ad-hoc
da_monitor_init_prealloc() API.
Three strategies are provided:
DA_ALLOC_AUTO (default) - lock-free kmalloc_nolock on the hot path;
unbounded capacity. Preserves the existing
behaviour for all monitors that do not set
DA_MON_ALLOCATION_STRATEGY.
DA_ALLOC_POOL - pre-allocated fixed-size pool. Requires the
monitor to define DA_MON_POOL_SIZE; enforced
with #error. da_prepare_storage() acquires
spinlock_t (O(1), irqsave); must be called
from task context on PREEMPT_RT where
spinlock_t is a sleeping lock.
DA_ALLOC_MANUAL - caller pre-inserts storage via
da_create_empty_storage() before the first
da_handle_start_event(); the framework only
links the target field. Useful for monitors
that allocate storage from known-safe task
context (e.g. a syscall path) and then hand
it to a tracepoint handler on the hot path.
da_handle_start_event() and da_handle_start_run_event() both call
da_prepare_storage() which resolves at compile time to the correct
allocation function, so no runtime dispatch is needed.
da_monitor_init_prealloc() is removed; da_monitor_init() selects pool
or kmalloc initialisation internally based on the strategy.
A da_extra_cleanup() hook macro is added: the default is a no-op; a
monitor may define it as a function called by da_monitor_destroy() on
each remaining entry before hash_del_rcu().
nomiss is updated to DA_ALLOC_MANUAL: it calls da_create_empty_storage()
from handle_sys_enter() (the sched_setscheduler syscall path, safe
task context), then da_fill_empty_storage() links the sched_dl_entity
target on the first da_handle_start_run_event() call in
handle_sched_switch().
Suggested-by: Gabriele Monaco <gmonaco@redhat.com>
Signed-off-by: Wen Yang <wen.yang@linux.dev>
---
include/rv/da_monitor.h | 276 +++++++++++++++++++++--
kernel/trace/rv/monitors/nomiss/nomiss.c | 6 +-
2 files changed, 254 insertions(+), 28 deletions(-)
diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h
index 34b8fba9ecd4..eb7fc02ecb8a 100644
--- a/include/rv/da_monitor.h
+++ b/include/rv/da_monitor.h
@@ -14,6 +14,26 @@
#ifndef _RV_DA_MONITOR_H
#define _RV_DA_MONITOR_H
+/*
+ * Allocation strategies for RV_MON_PER_OBJ monitors.
+ *
+ * Define DA_MON_ALLOCATION_STRATEGY before including this header.
+ * DA_ALLOC_AUTO - lock-free kmalloc on the hot path; unbounded capacity.
+ * DA_ALLOC_POOL - pre-allocated fixed-size pool; requires DA_MON_POOL_SIZE.
+ * da_prepare_storage() acquires spinlock_t (O(1), irqsave);
+ * must be called from task context on PREEMPT_RT where
+ * spinlock_t is a sleeping lock.
+ * DA_ALLOC_MANUAL - caller inserts storage before da_handle_start_event();
+ * the framework only links the target field.
+ */
+#define DA_ALLOC_AUTO 0
+#define DA_ALLOC_POOL 1
+#define DA_ALLOC_MANUAL 2
+
+#ifndef DA_MON_ALLOCATION_STRATEGY
+# define DA_MON_ALLOCATION_STRATEGY DA_ALLOC_AUTO
+#endif
+
#include <rv/automata.h>
#include <linux/rv.h>
#include <linux/stringify.h>
@@ -66,6 +86,19 @@ static struct rv_monitor rv_this;
#define da_monitor_sync_hook()
#endif
+/*
+ * Hook for per-object teardown during da_monitor_destroy().
+ *
+ * Called for each entry still in the hash table when the monitor is
+ * destroyed. Invoked before da_monitor_reset() and hash_del_rcu(), so
+ * it is safe to call ha_cancel_timer_sync() here.
+ *
+ * Define before including this header. Default is a no-op.
+ */
+#ifndef da_extra_cleanup
+#define da_extra_cleanup(da_mon)
+#endif
+
/*
* Type for the target id, default to int but can be overridden.
* A long type can work as hash table key (PER_OBJ) but will be downgraded to
@@ -398,6 +431,16 @@ static inline void da_monitor_destroy(void)
* Functions to define, init and get a per-object monitor.
*/
+/*
+ * DA_MON_POOL_SIZE must be defined before this header is included (directly or
+ * transitively via ha_monitor.h) when DA_ALLOC_POOL is selected. In practice
+ * this means defining it after the monitor's model header (which supplies the
+ * capacity constant) and before the ha_monitor.h include.
+ */
+#if DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_POOL && !defined(DA_MON_POOL_SIZE)
+# error "DA_ALLOC_POOL requires DA_MON_POOL_SIZE to be defined before including this header"
+#endif
+
struct da_monitor_storage {
da_id_type id;
monitor_target target;
@@ -495,18 +538,6 @@ static inline da_id_type da_get_id(struct da_monitor *da_mon)
return container_of(da_mon, struct da_monitor_storage, rv.da_mon)->id;
}
-/*
- * da_create_or_get - create the per-object storage if not already there
- *
- * This needs a lookup so should be guarded by RCU, the condition is checked
- * directly in da_create_storage()
- */
-static inline void da_create_or_get(da_id_type id, monitor_target target)
-{
- guard(rcu)();
- da_create_storage(id, target, da_get_monitor(id, target));
-}
-
/*
* da_fill_empty_storage - store the target in a pre-allocated storage
*
@@ -537,15 +568,96 @@ static inline monitor_target da_get_target_by_id(da_id_type id)
return mon_storage->target;
}
+/*
+ * Per-object pool state.
+ *
+ * Zero-initialised by default (storage == NULL ⟹ kmalloc mode). A monitor
+ * opts into pool mode by defining DA_MON_ALLOCATION_STRATEGY DA_ALLOC_POOL
+ * and DA_MON_POOL_SIZE before including this header; da_monitor_init() then
+ * pre-allocates the pool internally.
+ *
+ * Because every field is wrapped in this struct and the struct itself is a
+ * per-TU static, each monitor that includes this header gets a completely
+ * independent pool. A kmalloc monitor (e.g. nomiss) and a pool monitor
+ * (e.g. tlob) therefore coexist without any interference.
+ *
+ * da_pool_return_cb runs from softirq (non-PREEMPT_RT) or rcuc kthread
+ * (PREEMPT_RT); spin_lock_irqsave handles both.
+ */
+struct da_per_obj_pool {
+ struct da_monitor_storage *storage; /* non-NULL ⟹ pool mode */
+ struct da_monitor_storage **free; /* kmalloc'd pointer stack */
+ unsigned int free_top;
+ unsigned int capacity; /* total number of slots */
+ spinlock_t lock;
+};
+
+static struct da_per_obj_pool da_pool = {
+ .lock = __SPIN_LOCK_UNLOCKED(da_pool.lock),
+};
+
+static void da_pool_return_cb(struct rcu_head *head)
+{
+ struct da_monitor_storage *ms =
+ container_of(head, struct da_monitor_storage, rcu);
+ unsigned long flags;
+
+ spin_lock_irqsave(&da_pool.lock, flags);
+ if (!WARN_ON_ONCE(!da_pool.free || da_pool.free_top >= da_pool.capacity))
+ da_pool.free[da_pool.free_top++] = ms;
+ spin_unlock_irqrestore(&da_pool.lock, flags);
+}
+
+/*
+ * da_create_or_get_pool - pop a slot and insert it into the hash.
+ *
+ * Returns the new da_monitor on success, NULL if the pool is exhausted, or
+ * the existing da_monitor if a concurrent caller already inserted the same id
+ * (in which case the popped slot is returned to the free stack).
+ *
+ * Must be called inside an RCU read-side critical section (guard(rcu)()).
+ */
+static inline struct da_monitor *
+da_create_or_get_pool(da_id_type id, monitor_target target)
+{
+ struct da_monitor_storage *mon_storage, *existing;
+ unsigned long flags;
+
+ spin_lock_irqsave(&da_pool.lock, flags);
+ if (!da_pool.free_top) {
+ spin_unlock_irqrestore(&da_pool.lock, flags);
+ return NULL;
+ }
+ mon_storage = da_pool.free[--da_pool.free_top];
+ spin_unlock_irqrestore(&da_pool.lock, flags);
+
+ mon_storage->id = id;
+ mon_storage->target = target;
+
+ /*
+ * A concurrent caller may have inserted the same id between our spinlock
+ * release and here. Return the slot to the pool and yield to the winner.
+ */
+ existing = __da_get_mon_storage(id);
+ if (unlikely(existing)) {
+ spin_lock_irqsave(&da_pool.lock, flags);
+ da_pool.free[da_pool.free_top++] = mon_storage;
+ spin_unlock_irqrestore(&da_pool.lock, flags);
+ return &existing->rv.da_mon;
+ }
+ hash_add_rcu(da_monitor_ht, &mon_storage->node, id);
+ return &mon_storage->rv.da_mon;
+}
+
+
/*
* da_destroy_storage - destroy the per-object storage
*
- * The caller is responsible to synchronise writers, either with locks or
- * implicitly. For instance, if da_destroy_storage is called at sched_exit and
- * da_create_storage can never occur after that, it's safe to call this without
- * locks.
- * This function includes an RCU read-side critical section to synchronise
- * against da_monitor_destroy().
+ * Pool mode: removes from hash and returns the slot via call_rcu().
+ * Kmalloc mode: removes from hash and frees via kfree_rcu().
+ *
+ * Includes an RCU read-side critical section to synchronise against
+ * da_monitor_destroy().
*/
static inline void da_destroy_storage(da_id_type id)
{
@@ -558,7 +670,11 @@ static inline void da_destroy_storage(da_id_type id)
return;
da_monitor_reset_hook(&mon_storage->rv.da_mon);
hash_del_rcu(&mon_storage->node);
+#if DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_POOL
+ call_rcu(&mon_storage->rcu, da_pool_return_cb);
+#else
kfree_rcu(mon_storage, rcu);
+#endif
}
static void __da_monitor_reset_all(void (*reset)(struct da_monitor *))
@@ -581,13 +697,87 @@ static inline void da_monitor_reset_state_all(void)
__da_monitor_reset_all(da_monitor_reset_state);
}
+/* Not part of the public API; called by da_monitor_init() for DA_ALLOC_POOL. */
+static inline int __da_monitor_init_pool(unsigned int prealloc_count)
+{
+ da_pool.storage = kcalloc(prealloc_count, sizeof(*da_pool.storage),
+ GFP_KERNEL);
+ if (!da_pool.storage)
+ return -ENOMEM;
+
+ da_pool.free = kmalloc_array(prealloc_count, sizeof(*da_pool.free),
+ GFP_KERNEL);
+ if (!da_pool.free) {
+ kfree(da_pool.storage);
+ da_pool.storage = NULL;
+ return -ENOMEM;
+ }
+
+ da_pool.capacity = prealloc_count;
+ da_pool.free_top = 0;
+ for (unsigned int i = 0; i < prealloc_count; i++)
+ da_pool.free[da_pool.free_top++] = &da_pool.storage[i];
+ return 0;
+}
+
+/*
+ * da_monitor_init - initialise the per-object monitor
+ *
+ * Selects the allocation path at compile time based on DA_MON_ALLOCATION_STRATEGY:
+ * DA_ALLOC_POOL - pre-allocates DA_MON_POOL_SIZE storage slots.
+ * DA_ALLOC_AUTO / DA_ALLOC_MANUAL - initialises the hash table only.
+ */
static inline int da_monitor_init(void)
{
hash_init(da_monitor_ht);
+#if DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_POOL
+ return __da_monitor_init_pool(DA_MON_POOL_SIZE);
+#else
return 0;
+#endif
}
-static inline void da_monitor_destroy(void)
+static inline void da_monitor_destroy_pool(void)
+{
+ struct da_monitor_storage *ms;
+ struct hlist_node *tmp;
+ int bkt;
+
+ /*
+ * Ensure all in-flight tracepoint handlers that may hold a raw pointer
+ * to a pool slot (e.g. tlob_stop_task after its RCU guard exits) have
+ * completed before we begin tearing down the pool. Mirrors the same
+ * call in da_monitor_destroy_kmalloc().
+ */
+ tracepoint_synchronize_unregister();
+
+ /*
+ * Drain any entries that were not stopped before destroy (e.g.
+ * uprobe-started sessions whose stop probe never fired). Call
+ * da_extra_cleanup() before hash_del_rcu() so the hook may safely
+ * call ha_cancel_timer_sync() while the monitor is still reachable.
+ */
+ hash_for_each_safe(da_monitor_ht, bkt, tmp, ms, node) {
+ da_extra_cleanup(&ms->rv.da_mon);
+ hash_del_rcu(&ms->node);
+ call_rcu(&ms->rcu, da_pool_return_cb);
+ }
+
+ /*
+ * rcu_barrier() drains every pending call_rcu() callback, including
+ * both da_pool_return_cb() and any monitor-specific free callbacks
+ * (e.g. tlob_free_rcu) enqueued by da_extra_cleanup().
+ */
+ rcu_barrier();
+ kfree(da_pool.storage);
+ da_pool.storage = NULL;
+ kfree(da_pool.free);
+ da_pool.free = NULL;
+ da_pool.free_top = 0;
+ da_pool.capacity = 0;
+}
+
+static inline void da_monitor_destroy_kmalloc(void)
{
struct da_monitor_storage *mon_storage;
struct hlist_node *tmp;
@@ -607,15 +797,51 @@ static inline void da_monitor_destroy(void)
}
/*
- * Allow the per-object monitors to run allocation manually, necessary if the
- * start condition is in a context problematic for allocation (e.g. scheduling).
- * In such case, if the storage was pre-allocated without a target, set it now.
+ * da_monitor_destroy - tear down the per-object monitor
+ *
+ * DA_ALLOC_POOL: calls tracepoint_synchronize_unregister() to drain any
+ * in-flight handlers, then iterates the hash draining remaining entries via
+ * da_extra_cleanup() + hash_del_rcu() + call_rcu(), then rcu_barrier() to
+ * wait for all pending da_pool_return_cb() callbacks before freeing the pool.
+ * DA_ALLOC_AUTO / DA_ALLOC_MANUAL: drains remaining entries after
+ * tracepoint_synchronize_unregister() + synchronize_rcu().
*/
-#ifdef DA_SKIP_AUTO_ALLOC
-#define da_prepare_storage da_fill_empty_storage
+static inline void da_monitor_destroy(void)
+{
+#if DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_POOL
+ da_monitor_destroy_pool();
#else
+ da_monitor_destroy_kmalloc();
+#endif
+}
+
+/*
+ * da_prepare_storage - obtain (or create) the da_monitor for (id, target)
+ *
+ * The implementation is selected at compile time by DA_MON_ALLOCATION_STRATEGY:
+ *
+ * DA_ALLOC_AUTO - calls da_create_storage() (lock-free kmalloc_nolock).
+ * DA_ALLOC_POOL - if an entry already exists, returns it; otherwise pops a
+ * slot from the pre-allocated pool and re-looks it up.
+ * Returns NULL if the pool is exhausted.
+ * DA_ALLOC_MANUAL - caller has already inserted storage via da_create_empty_storage();
+ * only fills in the target field if it was left NULL.
+ */
+#if DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_POOL
+static inline struct da_monitor *da_prepare_storage(da_id_type id,
+ monitor_target target,
+ struct da_monitor *da_mon)
+{
+ if (da_mon)
+ return da_mon;
+ /* da_create_or_get_pool() returns the da_monitor directly; no re-lookup needed. */
+ return da_create_or_get_pool(id, target);
+}
+#elif DA_MON_ALLOCATION_STRATEGY == DA_ALLOC_MANUAL
+#define da_prepare_storage da_fill_empty_storage
+#else /* DA_ALLOC_AUTO */
#define da_prepare_storage da_create_storage
-#endif /* DA_SKIP_AUTO_ALLOC */
+#endif
#endif /* RV_MON_TYPE */
diff --git a/kernel/trace/rv/monitors/nomiss/nomiss.c b/kernel/trace/rv/monitors/nomiss/nomiss.c
index 8ead8783c29f..ac4d334e757f 100644
--- a/kernel/trace/rv/monitors/nomiss/nomiss.c
+++ b/kernel/trace/rv/monitors/nomiss/nomiss.c
@@ -17,8 +17,8 @@
#define RV_MON_TYPE RV_MON_PER_OBJ
#define HA_TIMER_TYPE HA_TIMER_WHEEL
-/* The start condition is on sched_switch, it's dangerous to allocate there */
-#define DA_SKIP_AUTO_ALLOC
+/* Allocate storage in sched_setscheduler; sched_switch is too hot to alloc. */
+#define DA_MON_ALLOCATION_STRATEGY DA_ALLOC_MANUAL
typedef struct sched_dl_entity *monitor_target;
#include "nomiss.h"
#include <rv/ha_monitor.h>
@@ -214,7 +214,7 @@ static void handle_sys_enter(void *data, struct pt_regs *regs, long id)
if (p->policy == SCHED_DEADLINE)
da_reset(EXPAND_ID_TASK(p));
else if (new_policy == SCHED_DEADLINE)
- da_create_or_get(EXPAND_ID_TASK(p));
+ da_create_empty_storage(get_entity_id(&p->dl, task_cpu(p), DL_TASK));
}
static void handle_sched_wakeup(void *data, struct task_struct *tsk)
--
2.43.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v3 2/9] rv: add generic uprobe infrastructure for RV monitors
2026-06-07 16:13 [PATCH v3 0/9] rv/tlob: Add task latency over budget RV monitor wen.yang
2026-06-07 16:13 ` [PATCH v3 1/9] rv/da: introduce DA_MON_ALLOCATION_STRATEGY wen.yang
@ 2026-06-07 16:13 ` wen.yang
2026-06-07 16:13 ` [PATCH v3 3/9] rv/tlob: add tlob model DOT file wen.yang
` (6 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: wen.yang @ 2026-06-07 16:13 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, linux-trace-kernel, linux-kernel, Wen Yang
From: Wen Yang <wen.yang@linux.dev>
Introduce rv_uprobe, a thin wrapper around uprobe_consumer providing
rv_uprobe_attach_path(), rv_uprobe_attach(), and rv_uprobe_detach()
for RV monitors. An opaque priv pointer is forwarded unchanged to
entry/return handlers so monitors can carry per-binding state (e.g. a
latency threshold) to the hot path without any global lookup.
rv_uprobe_detach() is fully synchronous (nosync + sync + path_put +
kfree), closing the use-after-free window present in open-coded
patterns where kfree() precedes uprobe_unregister_sync().
Suggested-by: Gabriele Monaco <gmonaco@redhat.com>
Signed-off-by: Wen Yang <wen.yang@linux.dev>
---
include/rv/rv_uprobe.h | 119 +++++++++++++++++++++++
kernel/trace/rv/Kconfig | 4 +
kernel/trace/rv/Makefile | 1 +
kernel/trace/rv/rv_uprobe.c | 182 ++++++++++++++++++++++++++++++++++++
4 files changed, 306 insertions(+)
create mode 100644 include/rv/rv_uprobe.h
create mode 100644 kernel/trace/rv/rv_uprobe.c
diff --git a/include/rv/rv_uprobe.h b/include/rv/rv_uprobe.h
new file mode 100644
index 000000000000..9106c5c9275e
--- /dev/null
+++ b/include/rv/rv_uprobe.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Generic uprobe infrastructure for RV monitors.
+ *
+ */
+
+#ifndef _RV_UPROBE_H
+#define _RV_UPROBE_H
+
+#include <linux/path.h>
+#include <linux/types.h>
+
+struct pt_regs;
+
+/**
+ * struct rv_uprobe - a single uprobe registered on behalf of an RV monitor
+ *
+ * @offset: byte offset within the ELF binary where the probe is installed
+ * @priv: monitor-private pointer; set at attach time, never touched by
+ * this layer; passed unchanged to entry_fn / ret_fn
+ * @path: resolved path of the probed binary (read-only after attach);
+ * callers may use path.dentry for identity comparisons
+ *
+ * The implementation fields (uprobe_consumer, uprobe handle, callbacks) are
+ * private to rv_uprobe.c and are not exposed here; monitors must not access
+ * them directly.
+ */
+struct rv_uprobe {
+ /* public: read-only after rv_uprobe_attach*() */
+ loff_t offset;
+ void *priv;
+ struct path path;
+};
+
+/**
+ * rv_uprobe_attach_path - register an uprobe given an already-resolved path
+ * @path: path of the target binary; rv_uprobe takes its own reference
+ * @offset: byte offset within the binary
+ * @entry_fn: called on probe hit (entry); may be NULL
+ * @ret_fn: called on function return (uretprobe); may be NULL
+ * @priv: opaque pointer forwarded to callbacks unchanged
+ *
+ * Use this variant when the caller has already resolved the path (e.g. to
+ * register multiple probes on the same binary with a single kern_path call).
+ * The inode is derived internally via d_real_inode(), so inode and path are
+ * always consistent.
+ *
+ * Returns a pointer to the new rv_uprobe on success, ERR_PTR on failure.
+ */
+struct rv_uprobe *rv_uprobe_attach_path(struct path *path, loff_t offset,
+ int (*entry_fn)(struct rv_uprobe *p, struct pt_regs *regs, __u64 *data),
+ int (*ret_fn)(struct rv_uprobe *p, unsigned long func,
+ struct pt_regs *regs, __u64 *data),
+ void *priv);
+
+/**
+ * rv_uprobe_attach - resolve binpath and register an uprobe
+ * @binpath: absolute path to the target binary
+ * @offset: byte offset within the binary
+ * @entry_fn: called on probe hit (entry); may be NULL
+ * @ret_fn: called on function return (uretprobe); may be NULL
+ * @priv: opaque pointer forwarded to callbacks unchanged
+ *
+ * Resolves binpath via kern_path(), then delegates to rv_uprobe_attach_path().
+ *
+ * Returns a pointer to the new rv_uprobe on success, ERR_PTR on failure.
+ */
+struct rv_uprobe *rv_uprobe_attach(const char *binpath, loff_t offset,
+ int (*entry_fn)(struct rv_uprobe *p, struct pt_regs *regs, __u64 *data),
+ int (*ret_fn)(struct rv_uprobe *p, unsigned long func,
+ struct pt_regs *regs, __u64 *data),
+ void *priv);
+
+/**
+ * rv_uprobe_detach - synchronously unregister an uprobe and free it
+ * @p: probe to detach; may be NULL (no-op)
+ *
+ * Calls uprobe_unregister_nosync(), then uprobe_unregister_sync() to wait
+ * for any in-progress handler to finish, then releases the path reference
+ * and frees the rv_uprobe struct. The caller's priv data is NOT freed.
+ *
+ * When removing a single probe, prefer this over the three-phase API.
+ * Safe to call from process context only (uprobe_unregister_sync() may
+ * schedule).
+ */
+void rv_uprobe_detach(struct rv_uprobe *p);
+
+/**
+ * rv_uprobe_unregister_nosync - dequeue an uprobe without waiting
+ * @p: probe to dequeue; may be NULL (no-op)
+ *
+ * Removes the uprobe from the uprobe subsystem but does NOT wait for
+ * in-flight handlers to complete. The caller must call rv_uprobe_sync()
+ * before calling rv_uprobe_free() on the same probe.
+ *
+ * Use this to batch multiple deregistrations before a single rv_uprobe_sync().
+ */
+void rv_uprobe_unregister_nosync(struct rv_uprobe *p);
+
+/**
+ * rv_uprobe_sync - wait for all in-flight uprobe handlers to complete
+ *
+ * Global barrier: waits for every in-flight uprobe handler across the system
+ * to finish. Call once after a batch of rv_uprobe_unregister_nosync() calls
+ * and before any rv_uprobe_free() call.
+ */
+void rv_uprobe_sync(void);
+
+/**
+ * rv_uprobe_free - release resources of a previously deregistered probe
+ * @p: probe to free; may be NULL (no-op)
+ *
+ * Releases the path reference and frees the rv_uprobe struct. Must only
+ * be called after rv_uprobe_sync() has returned. The caller's priv data
+ * is NOT freed.
+ */
+void rv_uprobe_free(struct rv_uprobe *p);
+
+#endif /* _RV_UPROBE_H */
diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig
index 3884b14df375..e2e0033a00b9 100644
--- a/kernel/trace/rv/Kconfig
+++ b/kernel/trace/rv/Kconfig
@@ -59,6 +59,10 @@ config RV_PER_TASK_MONITORS
This option configures the maximum number of per-task RV monitors that can run
simultaneously.
+config RV_UPROBE
+ bool
+ depends on RV && UPROBES
+
source "kernel/trace/rv/monitors/wip/Kconfig"
source "kernel/trace/rv/monitors/wwnr/Kconfig"
diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile
index 94498da35b37..f139b904bea3 100644
--- a/kernel/trace/rv/Makefile
+++ b/kernel/trace/rv/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_RV_MON_STALL) += monitors/stall/stall.o
obj-$(CONFIG_RV_MON_DEADLINE) += monitors/deadline/deadline.o
obj-$(CONFIG_RV_MON_NOMISS) += monitors/nomiss/nomiss.o
# Add new monitors here
+obj-$(CONFIG_RV_UPROBE) += rv_uprobe.o
obj-$(CONFIG_RV_REACTORS) += rv_reactors.o
obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o
obj-$(CONFIG_RV_REACT_PANIC) += reactor_panic.o
diff --git a/kernel/trace/rv/rv_uprobe.c b/kernel/trace/rv/rv_uprobe.c
new file mode 100644
index 000000000000..3d8b764dded3
--- /dev/null
+++ b/kernel/trace/rv/rv_uprobe.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic uprobe infrastructure for RV monitors.
+ *
+ */
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/slab.h>
+#include <linux/uprobes.h>
+#include <rv/rv_uprobe.h>
+
+/*
+ * Private extension of struct rv_uprobe. Allocated by rv_uprobe_attach*()
+ * and returned to callers as &impl->pub.
+ */
+struct rv_uprobe_impl {
+ struct rv_uprobe pub; /* must be first; callers hold &pub */
+ struct uprobe_consumer uc;
+ struct uprobe *uprobe;
+ int (*entry_fn)(struct rv_uprobe *p, struct pt_regs *regs, __u64 *data);
+ int (*ret_fn)(struct rv_uprobe *p, unsigned long func,
+ struct pt_regs *regs, __u64 *data);
+};
+
+static int rv_uprobe_handler(struct uprobe_consumer *uc,
+ struct pt_regs *regs, __u64 *data)
+{
+ struct rv_uprobe_impl *impl = container_of(uc, struct rv_uprobe_impl, uc);
+
+ if (impl->entry_fn)
+ return impl->entry_fn(&impl->pub, regs, data);
+ return 0;
+}
+
+static int rv_uprobe_ret_handler(struct uprobe_consumer *uc,
+ unsigned long func,
+ struct pt_regs *regs, __u64 *data)
+{
+ struct rv_uprobe_impl *impl = container_of(uc, struct rv_uprobe_impl, uc);
+
+ if (impl->ret_fn)
+ return impl->ret_fn(&impl->pub, func, regs, data);
+ return 0;
+}
+
+static struct rv_uprobe *
+__rv_uprobe_attach(struct inode *inode, struct path *path, loff_t offset,
+ int (*entry_fn)(struct rv_uprobe *p, struct pt_regs *regs, __u64 *data),
+ int (*ret_fn)(struct rv_uprobe *p, unsigned long func,
+ struct pt_regs *regs, __u64 *data),
+ void *priv)
+{
+ struct rv_uprobe_impl *impl;
+ int ret;
+
+ if (!entry_fn && !ret_fn)
+ return ERR_PTR(-EINVAL);
+
+ impl = kzalloc_obj(*impl, GFP_KERNEL);
+ if (!impl)
+ return ERR_PTR(-ENOMEM);
+
+ impl->pub.offset = offset;
+ impl->pub.priv = priv;
+ impl->entry_fn = entry_fn;
+ impl->ret_fn = ret_fn;
+ path_get(path);
+ impl->pub.path = *path;
+
+ if (entry_fn)
+ impl->uc.handler = rv_uprobe_handler;
+ if (ret_fn)
+ impl->uc.ret_handler = rv_uprobe_ret_handler;
+
+ impl->uprobe = uprobe_register(inode, offset, 0, &impl->uc);
+ if (IS_ERR(impl->uprobe)) {
+ ret = PTR_ERR(impl->uprobe);
+ path_put(&impl->pub.path);
+ kfree(impl);
+ return ERR_PTR(ret);
+ }
+
+ return &impl->pub;
+}
+
+/**
+ * rv_uprobe_attach_path - register an uprobe given an already-resolved path
+ */
+struct rv_uprobe *rv_uprobe_attach_path(struct path *path, loff_t offset,
+ int (*entry_fn)(struct rv_uprobe *p, struct pt_regs *regs, __u64 *data),
+ int (*ret_fn)(struct rv_uprobe *p, unsigned long func,
+ struct pt_regs *regs, __u64 *data),
+ void *priv)
+{
+ struct inode *inode = d_real_inode(path->dentry);
+
+ return __rv_uprobe_attach(inode, path, offset, entry_fn, ret_fn, priv);
+}
+EXPORT_SYMBOL_GPL(rv_uprobe_attach_path);
+
+/**
+ * rv_uprobe_attach - resolve binpath and register an uprobe
+ */
+struct rv_uprobe *rv_uprobe_attach(const char *binpath, loff_t offset,
+ int (*entry_fn)(struct rv_uprobe *p, struct pt_regs *regs, __u64 *data),
+ int (*ret_fn)(struct rv_uprobe *p, unsigned long func,
+ struct pt_regs *regs, __u64 *data),
+ void *priv)
+{
+ struct rv_uprobe *p;
+ struct path path;
+ int ret;
+
+ ret = kern_path(binpath, LOOKUP_FOLLOW, &path);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (!d_is_reg(path.dentry)) {
+ path_put(&path);
+ return ERR_PTR(-EINVAL);
+ }
+
+ p = rv_uprobe_attach_path(&path, offset, entry_fn, ret_fn, priv);
+ path_put(&path);
+ return p;
+}
+EXPORT_SYMBOL_GPL(rv_uprobe_attach);
+
+/**
+ * rv_uprobe_detach - synchronously unregister an uprobe and free it
+ */
+void rv_uprobe_detach(struct rv_uprobe *p)
+{
+ if (!p)
+ return;
+
+ rv_uprobe_unregister_nosync(p);
+ rv_uprobe_sync();
+ rv_uprobe_free(p);
+}
+EXPORT_SYMBOL_GPL(rv_uprobe_detach);
+
+/**
+ * rv_uprobe_unregister_nosync - dequeue an uprobe without waiting
+ */
+void rv_uprobe_unregister_nosync(struct rv_uprobe *p)
+{
+ struct rv_uprobe_impl *impl;
+
+ if (!p)
+ return;
+
+ impl = container_of(p, struct rv_uprobe_impl, pub);
+ uprobe_unregister_nosync(impl->uprobe, &impl->uc);
+}
+EXPORT_SYMBOL_GPL(rv_uprobe_unregister_nosync);
+
+/**
+ * rv_uprobe_sync - wait for all in-flight uprobe handlers to complete
+ */
+void rv_uprobe_sync(void)
+{
+ uprobe_unregister_sync();
+}
+EXPORT_SYMBOL_GPL(rv_uprobe_sync);
+
+/**
+ * rv_uprobe_free - release resources of a previously deregistered probe
+ */
+void rv_uprobe_free(struct rv_uprobe *p)
+{
+ struct rv_uprobe_impl *impl;
+
+ if (!p)
+ return;
+
+ impl = container_of(p, struct rv_uprobe_impl, pub);
+ path_put(&p->path);
+ kfree(impl);
+}
+EXPORT_SYMBOL_GPL(rv_uprobe_free);
--
2.43.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v3 3/9] rv/tlob: add tlob model DOT file
2026-06-07 16:13 [PATCH v3 0/9] rv/tlob: Add task latency over budget RV monitor wen.yang
2026-06-07 16:13 ` [PATCH v3 1/9] rv/da: introduce DA_MON_ALLOCATION_STRATEGY wen.yang
2026-06-07 16:13 ` [PATCH v3 2/9] rv: add generic uprobe infrastructure for RV monitors wen.yang
@ 2026-06-07 16:13 ` wen.yang
2026-06-07 16:13 ` [PATCH v3 4/9] rv/ha: fix ha_invariant_passed_ns silent bypass of invariant check wen.yang
` (5 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: wen.yang @ 2026-06-07 16:13 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, linux-trace-kernel, linux-kernel, Wen Yang
From: Wen Yang <wen.yang@linux.dev>
Add tools/verification/models/tlob.dot, the Graphviz specification of
the tlob hybrid automaton. The model has three states (running,
waiting, sleeping) connected by four transitions (switch_in, preempt,
wakeup, sleep) with a single clock invariant clk_elapsed < BUDGET_NS()
active in all states.
Suggested-by: Gabriele Monaco <gmonaco@redhat.com>
Signed-off-by: Wen Yang <wen.yang@linux.dev>
---
tools/verification/models/tlob.dot | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
create mode 100644 tools/verification/models/tlob.dot
diff --git a/tools/verification/models/tlob.dot b/tools/verification/models/tlob.dot
new file mode 100644
index 000000000000..a1834daff2ed
--- /dev/null
+++ b/tools/verification/models/tlob.dot
@@ -0,0 +1,22 @@
+digraph state_automaton {
+ center = true;
+ size = "7,11";
+ {node [shape = plaintext, style=invis, label=""] "__init_running"};
+ {node [shape = ellipse] "running"};
+ {node [shape = plaintext] "running"};
+ {node [shape = plaintext] "waiting"};
+ {node [shape = plaintext] "sleeping"};
+ "__init_running" -> "running";
+ "running" -> "running" [ label = "start;reset(clk_elapsed)" ];
+ "running" [label = "running\nclk_elapsed < BUDGET_NS()", color = green3];
+ "waiting" [label = "waiting\nclk_elapsed < BUDGET_NS()"];
+ "sleeping" [label = "sleeping\nclk_elapsed < BUDGET_NS()"];
+ "running" -> "sleeping" [ label = "sleep" ];
+ "running" -> "waiting" [ label = "preempt" ];
+ "waiting" -> "running" [ label = "switch_in" ];
+ "sleeping" -> "waiting" [ label = "wakeup" ];
+ { rank = min ;
+ "__init_running";
+ "running";
+ }
+}
--
2.43.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v3 4/9] rv/ha: fix ha_invariant_passed_ns silent bypass of invariant check
2026-06-07 16:13 [PATCH v3 0/9] rv/tlob: Add task latency over budget RV monitor wen.yang
` (2 preceding siblings ...)
2026-06-07 16:13 ` [PATCH v3 3/9] rv/tlob: add tlob model DOT file wen.yang
@ 2026-06-07 16:13 ` wen.yang
2026-06-07 16:13 ` [PATCH v3 5/9] rv/ha: make da_monitor_reset_hook and EVENT_NONE_LBL overridable wen.yang
` (4 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: wen.yang @ 2026-06-07 16:13 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, linux-trace-kernel, linux-kernel, Wen Yang
From: Wen Yang <wen.yang@linux.dev>
The function is documented as "prepare the invariant and return the time
since reset", but on the first call (env_store == U64_MAX) it exits
early without calling ha_set_invariant_ns():
if (ha_monitor_env_invalid(ha_mon, env)) /* env_store == U64_MAX */
return 0; /* ha_set_invariant_ns skipped, env_store stays U64_MAX */
...
ha_set_invariant_ns(ha_mon, env, expire - passed, time_ns);
This leaves env_store == U64_MAX, so ha_check_invariant_ns() always
passes on the first activation regardless of elapsed time:
return READ_ONCE(ha_mon->env_store[env]) >= time_ns; /* U64_MAX >= any */
Fix: establish the guard before converting to the invariant:
if (ha_monitor_env_invalid(ha_mon, env))
ha_reset_clk_ns(ha_mon, env, time_ns); /* guard: env_store = time_ns */
passed = ha_get_env(ha_mon, env, time_ns);
ha_set_invariant_ns(ha_mon, env, expire - passed, time_ns);
/* invariant: env_store = time_ns + expire */
Apply the same fix to ha_invariant_passed_jiffy().
Signed-off-by: Wen Yang <wen.yang@linux.dev>
---
include/rv/ha_monitor.h | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/include/rv/ha_monitor.h b/include/rv/ha_monitor.h
index 28d3c74cabfc..e5860900a337 100644
--- a/include/rv/ha_monitor.h
+++ b/include/rv/ha_monitor.h
@@ -365,16 +365,22 @@ static inline bool ha_check_invariant_ns(struct ha_monitor *ha_mon,
}
/*
* ha_invariant_passed_ns - prepare the invariant and return the time since reset
+ *
+ * If the env has not been initialised yet (first entry into a state with an
+ * invariant), anchor the guard clock at the current time so that the full
+ * budget is available from this point. This preserves the documented
+ * guard→invariant ordering: ha_set_invariant_ns() is always preceded by a
+ * valid guard representation in env_store.
*/
static inline u64 ha_invariant_passed_ns(struct ha_monitor *ha_mon, enum envs env,
u64 expire, u64 time_ns)
{
- u64 passed = 0;
+ u64 passed;
if (env < 0 || env >= ENV_MAX_STORED)
return 0;
if (ha_monitor_env_invalid(ha_mon, env))
- return 0;
+ ha_reset_clk_ns(ha_mon, env, time_ns);
passed = ha_get_env(ha_mon, env, time_ns);
ha_set_invariant_ns(ha_mon, env, expire - passed, time_ns);
return passed;
@@ -404,16 +410,19 @@ static inline bool ha_check_invariant_jiffy(struct ha_monitor *ha_mon,
}
/*
* ha_invariant_passed_jiffy - prepare the invariant and return the time since reset
+ *
+ * Same first-use semantics as ha_invariant_passed_ns(): anchor the guard clock
+ * now if the env has not been initialised.
*/
static inline u64 ha_invariant_passed_jiffy(struct ha_monitor *ha_mon, enum envs env,
u64 expire, u64 time_ns)
{
- u64 passed = 0;
+ u64 passed;
if (env < 0 || env >= ENV_MAX_STORED)
return 0;
if (ha_monitor_env_invalid(ha_mon, env))
- return 0;
+ ha_reset_clk_jiffy(ha_mon, env);
passed = ha_get_env(ha_mon, env, time_ns);
ha_set_invariant_jiffy(ha_mon, env, expire - passed);
return passed;
--
2.43.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v3 5/9] rv/ha: make da_monitor_reset_hook and EVENT_NONE_LBL overridable
2026-06-07 16:13 [PATCH v3 0/9] rv/tlob: Add task latency over budget RV monitor wen.yang
` (3 preceding siblings ...)
2026-06-07 16:13 ` [PATCH v3 4/9] rv/ha: fix ha_invariant_passed_ns silent bypass of invariant check wen.yang
@ 2026-06-07 16:13 ` wen.yang
2026-06-07 16:13 ` [PATCH v3 6/9] rv/tlob: add tlob hybrid automaton monitor wen.yang
` (3 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: wen.yang @ 2026-06-07 16:13 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, linux-trace-kernel, linux-kernel, Wen Yang
From: Wen Yang <wen.yang@linux.dev>
Wrap the two definitions with #ifndef guards so that HA-based monitors
can substitute their own implementations before including this header:
/* in monitor.c, before #include <rv/ha_monitor.h> */
#define da_monitor_reset_hook my_monitor_reset_env
#define EVENT_NONE_LBL "idle"
No behaviour change for monitors that do not override either macro.
Signed-off-by: Wen Yang <wen.yang@linux.dev>
---
include/rv/ha_monitor.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/include/rv/ha_monitor.h b/include/rv/ha_monitor.h
index e5860900a337..610da54c111f 100644
--- a/include/rv/ha_monitor.h
+++ b/include/rv/ha_monitor.h
@@ -36,7 +36,10 @@ static bool ha_monitor_handle_constraint(struct da_monitor *da_mon,
da_id_type id);
#define da_monitor_event_hook ha_monitor_handle_constraint
#define da_monitor_init_hook ha_monitor_init_env
+/* Allow monitors to override da_monitor_reset_hook before including this header. */
+#ifndef da_monitor_reset_hook
#define da_monitor_reset_hook ha_monitor_reset_env
+#endif
#define da_monitor_sync_hook() synchronize_rcu()
#if !defined(HA_SKIP_AUTO_CLEANUP) && RV_MON_TYPE == RV_MON_PER_TASK
@@ -75,7 +78,9 @@ _Static_assert(offsetof(struct ha_monitor, da_mon) == 0,
#define ENV_INVALID_VALUE U64_MAX
/* Error with no event occurs only on timeouts */
#define EVENT_NONE EVENT_MAX
+#ifndef EVENT_NONE_LBL
#define EVENT_NONE_LBL "none"
+#endif
#define ENV_BUFFER_SIZE 64
#ifdef CONFIG_RV_REACTORS
--
2.43.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v3 6/9] rv/tlob: add tlob hybrid automaton monitor
2026-06-07 16:13 [PATCH v3 0/9] rv/tlob: Add task latency over budget RV monitor wen.yang
` (4 preceding siblings ...)
2026-06-07 16:13 ` [PATCH v3 5/9] rv/ha: make da_monitor_reset_hook and EVENT_NONE_LBL overridable wen.yang
@ 2026-06-07 16:13 ` wen.yang
2026-06-07 16:13 ` [PATCH v3 7/9] rv/tlob: add KUnit tests for the tlob monitor wen.yang
` (2 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: wen.yang @ 2026-06-07 16:13 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, linux-trace-kernel, linux-kernel, Wen Yang
From: Wen Yang <wen.yang@linux.dev>
Add tlob (task latency over budget), a per-task hybrid automaton RV
monitor that tracks elapsed wall-clock time across a user-delimited
code section and emits error_env_tlob when the elapsed time exceeds a
configurable budget.
The monitor uses RV_MON_PER_OBJ with three states (running, waiting,
sleeping) driven by sched_switch and sched_wakeup tracepoints, and a
single clock invariant clk_elapsed < budget enforced by an hrtimer
(HRTIMER_MODE_REL_HARD). On violation, detail_env_tlob provides a
per-state time breakdown (running_ns, waiting_ns, sleeping_ns).
Per-task state is managed via DA_ALLOC_POOL to avoid allocation on the
scheduler tracepoint path. Uprobe pairs are registered through the
tracefs monitor file as "p PATH:OFFSET_START OFFSET_STOP threshold=NS".
Also adds ha_cancel_timer_sync() to ha_monitor.h, a blocking cancel
variant needed by tlob's stop_task path to ensure the hrtimer callback
has completed before the per-task monitor state is freed.
Suggested-by: Gabriele Monaco <gmonaco@redhat.com>
Signed-off-by: Wen Yang <wen.yang@linux.dev>
---
Documentation/trace/rv/index.rst | 1 +
Documentation/trace/rv/monitor_tlob.rst | 177 ++++
kernel/trace/rv/Kconfig | 1 +
kernel/trace/rv/Makefile | 1 +
kernel/trace/rv/monitors/tlob/Kconfig | 12 +
kernel/trace/rv/monitors/tlob/tlob.c | 968 +++++++++++++++++++++
kernel/trace/rv/monitors/tlob/tlob.h | 148 ++++
kernel/trace/rv/monitors/tlob/tlob_trace.h | 49 ++
kernel/trace/rv/rv_trace.h | 1 +
9 files changed, 1358 insertions(+)
create mode 100644 Documentation/trace/rv/monitor_tlob.rst
create mode 100644 kernel/trace/rv/monitors/tlob/Kconfig
create mode 100644 kernel/trace/rv/monitors/tlob/tlob.c
create mode 100644 kernel/trace/rv/monitors/tlob/tlob.h
create mode 100644 kernel/trace/rv/monitors/tlob/tlob_trace.h
diff --git a/Documentation/trace/rv/index.rst b/Documentation/trace/rv/index.rst
index 29769f06bb0f..1501545b5f08 100644
--- a/Documentation/trace/rv/index.rst
+++ b/Documentation/trace/rv/index.rst
@@ -16,5 +16,6 @@ Runtime Verification
monitor_wwnr.rst
monitor_sched.rst
monitor_rtapp.rst
+ monitor_tlob.rst
monitor_stall.rst
monitor_deadline.rst
diff --git a/Documentation/trace/rv/monitor_tlob.rst b/Documentation/trace/rv/monitor_tlob.rst
new file mode 100644
index 000000000000..c651272eab89
--- /dev/null
+++ b/Documentation/trace/rv/monitor_tlob.rst
@@ -0,0 +1,177 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Monitor tlob
+============
+
+- Name: tlob - task latency over budget
+- Type: per-object hybrid automaton (RV_MON_PER_OBJ)
+- Author: Wen Yang <wen.yang@linux.dev>
+
+Description
+-----------
+
+The tlob monitor tracks per-task elapsed wall-clock time (CLOCK_MONOTONIC,
+spanning running, waiting, and sleeping states) and reports a violation when
+the monitored task exceeds a configurable per-invocation budget threshold.
+
+The monitor implements a three-state hybrid automaton with a single clock
+environment variable ``clk_elapsed``. The clock invariant
+``clk_elapsed < BUDGET_NS()`` is active in all three states; when it is
+violated the HA timer fires and the framework emits ``error_env_tlob``
+then calls ``da_monitor_reset()`` automatically::
+
+ | (initial, via task_start)
+ v
+ +--------------+
+ | running | <-----------+
+ +--------------+ |
+ | | |
+ sleep preempt switch_in
+ | | |
+ v v |
+ +---------+ +---------+ |
+ | sleeping| | waiting | -------+
+ +---------+ +---------+
+ | ^
+ +---wakeup---+
+
+ Key transitions:
+ running --(sleep)------> sleeping (task blocks waiting for a resource)
+ running --(preempt)----> waiting (task preempted, back in runqueue)
+ sleeping --(wakeup)-----> waiting (resource available, enters runqueue)
+ waiting --(switch_in)--> running (scheduler picks task, back on CPU)
+
+ ``tlob_start_task()`` calls ``da_handle_start_run_event(task->pid, ws, start_tlob)``.
+ The ``start_tlob`` self-loop on the ``running`` state triggers
+ ``ha_setup_invariants()``, which resets ``clk_elapsed`` and arms the budget
+ timer automatically. ``tlob_stop_task()`` cancels the HA timer synchronously
+ via ``ha_cancel_timer_sync()``, then calls ``da_monitor_reset()``.
+
+The non-running condition (monitor not yet started or reset after a
+stop/violation) is handled implicitly by the RV framework
+(``da_mon->monitoring == 0``) — it is not an explicit DA state.
+
+Per-task state lives in ``struct tlob_task_state`` which is stored as
+``monitor_target`` in the framework's ``da_monitor_storage``, indexed by
+pid. The per-invocation ``threshold_ns`` is read via
+``ha_get_target(ha_mon)->threshold_ns`` inside the HA constraint functions,
+following the same pattern as the ``nomiss`` monitor.
+
+Usage
+-----
+
+tracefs interface (uprobe-based external monitoring)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``monitor`` tracefs file instruments an unmodified binary via uprobes.
+The format follows the ftrace ``uprobe_events`` convention (``PATH:OFFSET``
+for the probe location, ``key=value`` for configuration parameters)::
+
+ p PATH:OFFSET_START OFFSET_STOP threshold=NS
+
+The uprobe at ``OFFSET_START`` fires ``tlob_start_task()``; the uprobe at
+``OFFSET_STOP`` fires ``tlob_stop_task()``. Both offsets are ELF file
+offsets of entry points in ``PATH``. ``PATH`` may contain ``:``; the last
+``:`` in the ``PATH:OFFSET_START`` token is the separator.
+
+To remove a binding, use ``-PATH:OFFSET_START``::
+
+ echo 1 > /sys/kernel/tracing/rv/monitors/tlob/enable
+
+ echo "p /usr/bin/myapp:0x12a0 0x12f0 threshold=5000000" \
+ > /sys/kernel/tracing/rv/monitors/tlob/monitor
+
+ # Remove a binding
+ echo "-/usr/bin/myapp:0x12a0" > /sys/kernel/tracing/rv/monitors/tlob/monitor
+
+ # List registered bindings
+ cat /sys/kernel/tracing/rv/monitors/tlob/monitor
+
+ # Read violations from the trace buffer
+ cat /sys/kernel/tracing/trace
+
+Violation tracepoints
+~~~~~~~~~~~~~~~~~~~~~
+
+Two tracepoints are emitted together on a budget violation:
+
+``error_env_tlob``
+ Standard HA clock-invariant tracepoint (emitted by the RV framework).
+ Fields: ``id`` (task pid), ``state``, ``event`` (``"budget_exceeded"``),
+ ``env`` (``"clk_elapsed"``).
+
+``detail_env_tlob``
+ Tlob-specific breakdown of elapsed time per DA state.
+ Fields: ``id`` (task pid), ``threshold_ns``, ``running_ns``,
+ ``waiting_ns``, ``sleeping_ns``.
+
+ Use ``detail_env_tlob`` to diagnose *which phase* consumed the budget:
+ high ``sleeping_ns`` indicates I/O latency; high ``waiting_ns`` indicates
+ scheduler pressure; high ``running_ns`` indicates a compute overrun.
+
+Example: correlate the two tracepoints to see the breakdown::
+
+ trace-cmd record -e error_env_tlob -e detail_env_tlob &
+ # ... run workload ...
+ trace-cmd report
+
+tracefs files
+~~~~~~~~~~~~~
+
+The following files are specific to tlob under
+``/sys/kernel/tracing/rv/monitors/tlob/``:
+
+``monitor`` (rw)
+ Write ``p PATH:OFFSET_START OFFSET_STOP threshold=NS``
+ to bind two entry uprobes. Write ``-PATH:OFFSET_START`` to remove a
+ binding. Read to list registered bindings in the same format.
+ See the `tracefs interface (uprobe-based external monitoring)`_ section above.
+
+Kernel API
+----------
+
+``tlob_start_task`` and ``tlob_stop_task`` are the implementation-level
+functions called by the uprobe entry/exit handlers; the interface is
+driven from userspace.
+
+.. kernel-doc:: kernel/trace/rv/monitors/tlob/tlob.c
+ :functions: tlob_start_task tlob_stop_task
+
+``tlob_start_task(task, threshold_ns)``
+ Begin monitoring *task* with a total latency budget of *threshold_ns*
+ nanoseconds. Allocates per-task state, sets initial DA state to
+ ``running``, resets ``clk_elapsed``, and arms the HA budget timer.
+ Returns 0, -ENODEV (monitor disabled), -ERANGE (threshold out of range),
+ -EALREADY (already monitoring), -ENOSPC (at capacity), or -ENOMEM.
+
+``tlob_stop_task(task)``
+ Stop monitoring *task*. Synchronously cancels the HA timer via
+ ``ha_cancel_timer_sync()``, checks ``da_monitoring()`` to determine outcome.
+ Returns 0 (clean stop, within budget), -EOVERFLOW (budget was exceeded),
+ -ESRCH (not monitored), or -EAGAIN (concurrent stop racing).
+
+Design notes
+------------
+
+Limitations:
+
+- The initial DA state is always ``running``, set by feeding the synthetic
+ event ``switch_in_tlob`` to ``da_handle_start_event()``. Monitoring a non-current
+ task that is already in waiting or sleeping state at call time misclassifies
+ the first interval as ``running_ns``.
+- ``TASK_STOPPED`` and ``TASK_TRACED`` carry ``prev_state != 0`` and are
+ therefore counted as ``sleeping_ns``, indistinguishable from
+ I/O-blocked time.
+- ``sched_wakeup_new`` is not hooked. In practice this is not an issue
+ because ``tlob_start_task`` is always called from a running context.
+
+Specification
+-------------
+
+Graphviz DOT file in tools/verification/models/tlob.dot.
+
+KUnit tests under ``kernel/trace/rv/monitors/tlob/tlob_kunit.c``
+(CONFIG_TLOB_KUNIT_TEST).
+
+User-space integration tests under ``tools/testing/selftests/verification/``
+(requires CONFIG_RV_MON_TLOB=y and root).
diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig
index e2e0033a00b9..ed2de31d0312 100644
--- a/kernel/trace/rv/Kconfig
+++ b/kernel/trace/rv/Kconfig
@@ -85,6 +85,7 @@ source "kernel/trace/rv/monitors/sleep/Kconfig"
source "kernel/trace/rv/monitors/stall/Kconfig"
source "kernel/trace/rv/monitors/deadline/Kconfig"
source "kernel/trace/rv/monitors/nomiss/Kconfig"
+source "kernel/trace/rv/monitors/tlob/Kconfig"
# Add new deadline monitors here
# Add new monitors here
diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile
index f139b904bea3..ae59e97f8682 100644
--- a/kernel/trace/rv/Makefile
+++ b/kernel/trace/rv/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_RV_MON_OPID) += monitors/opid/opid.o
obj-$(CONFIG_RV_MON_STALL) += monitors/stall/stall.o
obj-$(CONFIG_RV_MON_DEADLINE) += monitors/deadline/deadline.o
obj-$(CONFIG_RV_MON_NOMISS) += monitors/nomiss/nomiss.o
+obj-$(CONFIG_RV_MON_TLOB) += monitors/tlob/tlob.o
# Add new monitors here
obj-$(CONFIG_RV_UPROBE) += rv_uprobe.o
obj-$(CONFIG_RV_REACTORS) += rv_reactors.o
diff --git a/kernel/trace/rv/monitors/tlob/Kconfig b/kernel/trace/rv/monitors/tlob/Kconfig
new file mode 100644
index 000000000000..b29a375de228
--- /dev/null
+++ b/kernel/trace/rv/monitors/tlob/Kconfig
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+config RV_MON_TLOB
+ depends on RV && UPROBES && HIGH_RES_TIMERS
+ select HA_MON_EVENTS_ID
+ select RV_UPROBE
+ bool "tlob monitor"
+ help
+ Enable the tlob (task latency over budget) hybrid-automaton RV
+ monitor. tlob tracks per-task elapsed wall-clock time across a
+ user-delimited code section and emits error_env_tlob when the
+ elapsed time exceeds a configurable per-invocation budget.
diff --git a/kernel/trace/rv/monitors/tlob/tlob.c b/kernel/trace/rv/monitors/tlob/tlob.c
new file mode 100644
index 000000000000..d8e0c4794720
--- /dev/null
+++ b/kernel/trace/rv/monitors/tlob/tlob.c
@@ -0,0 +1,968 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tlob: task latency over budget monitor
+ *
+ * Track the elapsed wall-clock time of a marked code path and detect when
+ * a monitored task exceeds its per-task latency budget. CLOCK_MONOTONIC
+ * is used so both on-CPU and off-CPU time count toward the budget.
+ *
+ * On a budget violation, two tracepoints are emitted from the hrtimer
+ * callback: error_env_tlob signals the violation, and detail_env_tlob
+ * provides a per-state time breakdown (running_ns, waiting_ns, sleeping_ns)
+ * that pinpoints whether the overrun occurred in running, waiting, or sleeping state.
+ *
+ * The monitor uses RV_MON_PER_OBJ: per-task state (struct tlob_task_state)
+ * is stored as monitor_target in the framework's hash table.
+ *
+ * One HA clock invariant is enforced:
+ * clk_elapsed < BUDGET_NS() (active in all states)
+ *
+ * tlob_start_task() uses da_handle_start_run_event(start_tlob) to initialise
+ * the monitor: the DA framework sets the initial state and processes the start
+ * event, which resets clk_elapsed and arms the budget hrtimer via
+ * ha_setup_invariants(). The HA timer is cancelled synchronously by
+ * ha_cancel_timer_sync() in tlob_stop_task().
+ *
+ * Copyright (C) 2026 Wen Yang <wen.yang@linux.dev>
+ */
+#include <linux/hrtimer.h>
+#include <linux/kernel.h>
+#include <linux/ktime.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/namei.h>
+#include <linux/rv.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/tracefs.h>
+#include <kunit/visibility.h>
+#include <rv/instrumentation.h>
+#include <rv/rv_uprobe.h>
+#include "../../rv.h"
+
+#define MODULE_NAME "tlob"
+
+#include <trace/events/sched.h>
+#include <rv_trace.h>
+
+/*
+ * Per-task latency monitoring state. One instance per monitoring window.
+ * Stored as monitor_target in da_monitor_storage; freed via call_rcu.
+ */
+struct tlob_task_state {
+ struct task_struct *task; /* via get_task_struct */
+ u64 threshold_ns; /* budget in nanoseconds */
+
+ /* 1 = cleanup claimed; ha_setup_invariants won't restart the timer. */
+ atomic_t stopping;
+
+ /* Serialises the ns accumulators; held briefly (hardirq-safe). */
+ raw_spinlock_t entry_lock;
+ u64 running_ns; /* time in running state */
+ u64 waiting_ns; /* time in waiting state */
+ u64 sleeping_ns; /* time in sleeping state */
+ ktime_t last_ts;
+
+ struct rcu_head rcu; /* for call_rcu() teardown */
+};
+
+#define RV_MON_TYPE RV_MON_PER_OBJ
+#define HA_TIMER_TYPE HA_TIMER_HRTIMER
+#define DA_MON_ALLOCATION_STRATEGY DA_ALLOC_POOL
+
+/* Type for da_monitor_storage.target; must be defined before the includes. */
+typedef struct tlob_task_state *monitor_target;
+
+/* Forward-declared so da_monitor_reset_hook works before ha_monitor.h. */
+static inline void tlob_reset_notify(struct da_monitor *da_mon);
+#define da_monitor_reset_hook tlob_reset_notify
+
+/* Override EVENT_NONE_LBL so the timer-fired violation shows "budget_exceeded". */
+#define EVENT_NONE_LBL "budget_exceeded"
+
+#include "tlob.h"
+
+/*
+ * DA_MON_POOL_SIZE must be defined HERE: after tlob.h (which defines
+ * TLOB_MAX_MONITORED) and before #include <rv/ha_monitor.h> (which
+ * transitively includes da_monitor.h and expands __da_monitor_init_pool
+ * using this macro). Placing the define before tlob.h or after
+ * ha_monitor.h both cause a build error.
+ */
+#define DA_MON_POOL_SIZE TLOB_MAX_MONITORED
+
+/*
+ * Forward-declare tlob_extra_cleanup so the #define below is valid when
+ * da_monitor.h (included via ha_monitor.h) expands da_extra_cleanup inside
+ * da_monitor_destroy(). The full definition follows after ha_monitor.h.
+ */
+static inline void tlob_extra_cleanup(struct da_monitor *da_mon);
+#define da_extra_cleanup tlob_extra_cleanup
+
+#include <rv/ha_monitor.h>
+
+/*
+ * Called from da_monitor_reset() on both normal stop and hrtimer expiry.
+ * On violation (stopping==0), emits detail_env_tlob.
+ */
+static inline void tlob_reset_notify(struct da_monitor *da_mon)
+{
+ struct ha_monitor *ha_mon = to_ha_monitor(da_mon);
+ struct tlob_task_state *ws;
+
+ ha_monitor_reset_env(da_mon);
+
+ ws = ha_get_target(ha_mon);
+ if (!ws)
+ return;
+
+ /*
+ * Emit per-state breakdown on budget violation only.
+ * stopping==0: timer callback owns this path (genuine overrun).
+ * stopping==1: normal stop claimed ownership first; skip.
+ */
+ if (!atomic_read(&ws->stopping)) {
+ unsigned int curr_state = READ_ONCE(da_mon->curr_state);
+ u64 running_ns, waiting_ns, sleeping_ns, partial_ns;
+ unsigned long flags;
+
+ /*
+ * Snapshot accumulators; partial_ns covers curr_state time
+ * not yet folded in (transition-out pending).
+ */
+ raw_spin_lock_irqsave(&ws->entry_lock, flags);
+ partial_ns = ktime_get_ns() - ktime_to_ns(ws->last_ts);
+ running_ns = ws->running_ns +
+ (curr_state == running_tlob ? partial_ns : 0);
+ waiting_ns = ws->waiting_ns +
+ (curr_state == waiting_tlob ? partial_ns : 0);
+ sleeping_ns = ws->sleeping_ns +
+ (curr_state == sleeping_tlob ? partial_ns : 0);
+ raw_spin_unlock_irqrestore(&ws->entry_lock, flags);
+
+ trace_detail_env_tlob(da_get_id(da_mon), ws->threshold_ns,
+ running_ns, waiting_ns, sleeping_ns);
+ }
+}
+
+#define BUDGET_NS(ha_mon) (ha_get_target(ha_mon)->threshold_ns)
+
+/* HA constraint functions (called by ha_monitor_handle_constraint) */
+
+static u64 ha_get_env(struct ha_monitor *ha_mon, enum envs_tlob env, u64 time_ns)
+{
+ if (env == clk_elapsed_tlob)
+ return ha_get_clk_ns(ha_mon, env, time_ns);
+ return ENV_INVALID_VALUE;
+}
+
+/*
+ * ha_verify_invariants - clk_elapsed < BUDGET_NS must hold in all states.
+ *
+ * The invariant is uniform across running/waiting/sleeping; check it
+ * unconditionally rather than enumerating each state.
+ */
+static inline bool ha_verify_invariants(struct ha_monitor *ha_mon,
+ enum states curr_state, enum events event,
+ enum states next_state, u64 time_ns)
+{
+ return ha_check_invariant_ns(ha_mon, clk_elapsed_tlob, time_ns);
+}
+
+/*
+ * Convert invariant (deadline) to guard (reset anchor) on state transitions.
+ *
+ * The conversion is identical for every departing state; skip only self-loops.
+ */
+static inline void ha_convert_inv_guard(struct ha_monitor *ha_mon,
+ enum states curr_state, enum events event,
+ enum states next_state, u64 time_ns)
+{
+ if (curr_state != next_state)
+ ha_inv_to_guard(ha_mon, clk_elapsed_tlob, BUDGET_NS(ha_mon), time_ns);
+}
+
+/* No per-event guard conditions for tlob; invariants suffice. */
+static inline bool ha_verify_guards(struct ha_monitor *ha_mon,
+ enum states curr_state, enum events event,
+ enum states next_state, u64 time_ns)
+{
+ return true;
+}
+
+/*
+ * Arm or cancel the HA budget timer on state transitions.
+ *
+ * The timer must run in every monitored state (running/waiting/sleeping),
+ * so arm it whenever next_state is any of the three. On a self-loop caused
+ * by a non-start event the timer is already running; skip the redundant
+ * restart. On a true state change the old timer is implicitly superseded by
+ * the new ha_start_timer_ns() call.
+ *
+ * Guard on stopping: sched_switch events can arrive after ha_cancel_timer_sync,
+ * restarting the timer and triggering an ODEBUG "activate active" splat.
+ * The _acquire pairs with the cmpxchg_release in tlob_stop_task.
+ */
+static inline void ha_setup_invariants(struct ha_monitor *ha_mon,
+ enum states curr_state, enum events event,
+ enum states next_state, u64 time_ns)
+{
+ if (next_state == curr_state && event != start_tlob)
+ return;
+
+ if (next_state < state_max_tlob) {
+ if (!atomic_read_acquire(&ha_get_target(ha_mon)->stopping))
+ ha_start_timer_ns(ha_mon, clk_elapsed_tlob, BUDGET_NS(ha_mon), time_ns);
+ } else {
+ ha_cancel_timer(ha_mon);
+ }
+}
+
+static bool ha_verify_constraint(struct ha_monitor *ha_mon,
+ enum states curr_state, enum events event,
+ enum states next_state, u64 time_ns)
+{
+ if (!ha_verify_invariants(ha_mon, curr_state, event, next_state, time_ns))
+ return false;
+
+ ha_convert_inv_guard(ha_mon, curr_state, event, next_state, time_ns);
+
+ if (!ha_verify_guards(ha_mon, curr_state, event, next_state, time_ns))
+ return false;
+
+ ha_setup_invariants(ha_mon, curr_state, event, next_state, time_ns);
+
+ return true;
+}
+
+static struct kmem_cache *tlob_state_cache;
+
+/* Uprobe binding list; protected by tlob_uprobe_mutex. */
+static LIST_HEAD(tlob_uprobe_list);
+static DEFINE_MUTEX(tlob_uprobe_mutex);
+
+/* Serialises duplicate-check + da_handle_start_run_event() for the same pid. */
+static DEFINE_MUTEX(tlob_start_mutex);
+
+
+/* Per-uprobe-binding state: a start + stop probe pair for one binary region. */
+struct tlob_uprobe_binding {
+ struct list_head list;
+ u64 threshold_ns;
+ char binpath[TLOB_MAX_PATH];
+ loff_t offset_start;
+ loff_t offset_stop;
+ struct rv_uprobe *start_probe;
+ struct rv_uprobe *stop_probe;
+};
+
+/* RCU callback: free the slab once no readers remain. */
+static void tlob_free_rcu(struct rcu_head *head)
+{
+ struct tlob_task_state *ws =
+ container_of(head, struct tlob_task_state, rcu);
+ kmem_cache_free(tlob_state_cache, ws);
+}
+
+/*
+ * da_extra_cleanup - per-task teardown called by da_monitor_destroy().
+ *
+ * Claims cleanup ownership via CAS; cancels the budget timer; decrements the
+ * monitored-task counter; and schedules the slab free via call_rcu().
+ * Must run before da_monitor_reset() (i.e. before hash_del_rcu()) so that
+ * ha_cancel_timer_sync() can safely access the still-registered ha_monitor.
+ */
+static inline void tlob_extra_cleanup(struct da_monitor *da_mon)
+{
+ struct ha_monitor *ha_mon = to_ha_monitor(da_mon);
+ struct tlob_task_state *ws = ha_get_target(ha_mon);
+
+ if (!ws)
+ return;
+
+ if (atomic_cmpxchg_release(&ws->stopping, 0, 1) != 0)
+ return;
+
+ ha_cancel_timer_sync(ha_mon);
+ put_task_struct(ws->task);
+ call_rcu(&ws->rcu, tlob_free_rcu);
+}
+
+/*
+ * __tlob_acc - accumulate elapsed ns into one per-state counter.
+ *
+ * Looks up the task's tlob_task_state under RCU, adds the interval
+ * [ws->last_ts, now] to the field at @offset within the state struct,
+ * and updates last_ts. Returns true if the task is monitored.
+ *
+ * entry_lock is a raw spinlock so this is safe from hardirq context.
+ */
+static inline bool __tlob_acc(struct task_struct *task, ktime_t now,
+ size_t offset)
+{
+ struct tlob_task_state *ws;
+ unsigned long flags;
+
+ scoped_guard(rcu) {
+ ws = da_get_target_by_id(task->pid);
+ if (!ws)
+ return false;
+ raw_spin_lock_irqsave(&ws->entry_lock, flags);
+ *(u64 *)((char *)ws + offset) += ktime_to_ns(ktime_sub(now, ws->last_ts));
+ ws->last_ts = now;
+ raw_spin_unlock_irqrestore(&ws->entry_lock, flags);
+ }
+ return true;
+}
+
+/* Accumulate running_ns for prev; returns true if prev is monitored. */
+static inline bool tlob_acc_running(struct task_struct *task, ktime_t now)
+{
+ return __tlob_acc(task, now, offsetof(struct tlob_task_state, running_ns));
+}
+
+/* Accumulate waiting_ns for next; returns true if next is monitored. */
+static inline bool tlob_acc_waiting(struct task_struct *task, ktime_t now)
+{
+ return __tlob_acc(task, now, offsetof(struct tlob_task_state, waiting_ns));
+}
+
+/*
+ * handle_sched_switch - advance the DA on every context switch.
+ *
+ * Generates three DA events:
+ * prev, prev_state != 0 -> sleep_tlob (running -> sleeping)
+ * prev, prev_state == 0 -> preempt_tlob (running -> waiting)
+ * next -> switch_in_tlob (waiting -> running)
+ *
+ * A single ktime_get() at handler entry is shared by both acc calls so that
+ * prev's running_ns and next's waiting_ns share the same context-switch
+ * timestamp; neither absorbs handler overhead into its accumulator.
+ *
+ * No waiting->sleeping edge exists: a task can only block voluntarily
+ * (call schedule()) while it is executing on CPU, which corresponds to
+ * the running DA state. A task in the waiting state is TASK_RUNNING in
+ * kernel terms (on the runqueue) and cannot block itself.
+ *
+ * da_handle_event() is called unconditionally: it skips tasks that have no
+ * monitor entry in the hash table.
+ */
+static void handle_sched_switch(void *data, bool preempt_unused,
+ struct task_struct *prev,
+ struct task_struct *next,
+ unsigned int prev_state)
+{
+ ktime_t now = ktime_get();
+ bool prev_preempted = (prev_state == 0);
+
+ /*
+ * No guard on tlob_num_monitored here: da_handle_event() internally
+ * calls da_monitor_handling_event() which checks both rv_monitoring_on()
+ * and da_monitoring(da_mon). The hash lookup inside da_get_monitor()
+ * simply returns NULL for unmonitored tasks, which is equally fast as
+ * an atomic_read() guard. By omitting the guard we avoid touching the
+ * tlob_num_monitored cacheline on every global context-switch.
+ */
+ if (tlob_acc_running(prev, now))
+ da_handle_event(prev->pid, NULL,
+ prev_preempted ? preempt_tlob : sleep_tlob);
+ if (tlob_acc_waiting(next, now))
+ da_handle_event(next->pid, NULL, switch_in_tlob);
+}
+
+/* Accumulate sleeping_ns on wakeup; returns true if task is monitored. */
+static inline bool tlob_acc_sleeping(struct task_struct *task, ktime_t now)
+{
+ return __tlob_acc(task, now, offsetof(struct tlob_task_state, sleeping_ns));
+}
+
+/*
+ * handle_sched_wakeup - sleeping -> waiting transition.
+ *
+ * try_to_wake_up() skips TASK_RUNNING tasks, so this never fires for a
+ * task already in running or waiting state.
+ */
+static void handle_sched_wakeup(void *data, struct task_struct *p)
+{
+ ktime_t now = ktime_get();
+
+ /* Same reasoning as handle_sched_switch: rely on hash-lookup fast path. */
+ if (tlob_acc_sleeping(p, now))
+ da_handle_event(p->pid, NULL, wakeup_tlob);
+}
+
+/*
+ * handle_sched_process_exit - clean up if a task exits without TRACE_STOP.
+ *
+ * Called in do_exit() context; the task still has a valid pid here.
+ * tlob_stop_task() returns -ESRCH if the task is not monitored, which is fine.
+ */
+static void handle_sched_process_exit(void *data, struct task_struct *p,
+ bool group_dead)
+{
+ tlob_stop_task(p);
+}
+
+
+
+/**
+ * tlob_start_task - begin monitoring @task with budget @threshold_ns ns.
+ * @task: Task to monitor; may be current or another task.
+ * @threshold_ns: Latency budget in nanoseconds (wall-clock; running + waiting + sleeping).
+ * Must be in [1000, TLOB_MAX_THRESHOLD_NS].
+ *
+ * Returns 0, -ENODEV, -ERANGE, -EALREADY, -ENOMEM, or -ENOSPC.
+ */
+int tlob_start_task(struct task_struct *task, u64 threshold_ns)
+{
+ struct tlob_task_state *ws;
+
+ if (!da_monitor_enabled())
+ return -ENODEV;
+
+ if (threshold_ns < 1000 || threshold_ns > TLOB_MAX_THRESHOLD_NS)
+ return -ERANGE;
+
+ /* Serialise duplicate-check + pool-slot claim for the same pid. */
+ guard(mutex)(&tlob_start_mutex);
+
+ if (da_get_target_by_id(task->pid))
+ return -EALREADY;
+
+ ws = kmem_cache_zalloc(tlob_state_cache, GFP_KERNEL);
+ if (!ws)
+ return -ENOMEM;
+
+ ws->task = task;
+ get_task_struct(task);
+ ws->threshold_ns = threshold_ns;
+ ws->last_ts = ktime_get();
+ raw_spin_lock_init(&ws->entry_lock);
+
+ /*
+ * da_handle_start_run_event() claims a pool slot via da_prepare_storage(),
+ * initialises the monitor, and delivers start_tlob in one step: the
+ * generated ha_setup_invariants() resets clk_elapsed and arms the timer.
+ * Returns 0 if the pool is exhausted (-ENOSPC).
+ */
+ if (!da_handle_start_run_event(task->pid, ws, start_tlob)) {
+ put_task_struct(task);
+ kmem_cache_free(tlob_state_cache, ws);
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tlob_start_task);
+
+/**
+ * tlob_stop_task - stop monitoring @task.
+ * @task: Task to stop.
+ *
+ * CAS on ws->stopping (0->1) under RCU claims cleanup ownership;
+ * the winner cancels the timer synchronously and frees all resources.
+ *
+ * Returns 0, -EOVERFLOW (budget exceeded), -ESRCH (not monitored),
+ * or -EAGAIN (concurrent caller claimed cleanup).
+ */
+int tlob_stop_task(struct task_struct *task)
+{
+ struct da_monitor *da_mon;
+ struct ha_monitor *ha_mon;
+ struct tlob_task_state *ws;
+ bool budget_exceeded;
+
+ scoped_guard(rcu) {
+ ws = da_get_target_by_id(task->pid);
+ if (!ws)
+ return -ESRCH;
+
+ da_mon = da_get_monitor(task->pid, NULL);
+ if (unlikely(!da_mon)) {
+ /* ws in hash but da_mon gone; internal inconsistency. */
+ WARN_ON_ONCE(1);
+ return -ESRCH;
+ }
+
+ ha_mon = to_ha_monitor(da_mon);
+
+ /*
+ * CAS (0->1) claims cleanup ownership under RCU (ws guaranteed valid).
+ * _release pairs with atomic_read_acquire in ha_setup_invariants.
+ */
+ if (atomic_cmpxchg_release(&ws->stopping, 0, 1) != 0)
+ return -EAGAIN;
+ }
+
+ /* Wait for in-flight timer callback before reading da_monitoring. */
+ ha_cancel_timer_sync(ha_mon);
+
+ /* Timer fired first -> budget exceeded; otherwise reset normally. */
+ scoped_guard(rcu) {
+ budget_exceeded = !da_monitoring(da_mon);
+ if (!budget_exceeded)
+ da_monitor_reset(da_mon);
+ }
+ da_destroy_storage(task->pid);
+
+ put_task_struct(ws->task);
+ call_rcu(&ws->rcu, tlob_free_rcu);
+ return budget_exceeded ? -EOVERFLOW : 0;
+}
+EXPORT_SYMBOL_GPL(tlob_stop_task);
+
+
+static int tlob_uprobe_entry_handler(struct rv_uprobe *p, struct pt_regs *regs,
+ __u64 *data)
+{
+ struct tlob_uprobe_binding *b = p->priv;
+
+ tlob_start_task(current, b->threshold_ns);
+ return 0;
+}
+
+static int tlob_uprobe_stop_handler(struct rv_uprobe *p, struct pt_regs *regs,
+ __u64 *data)
+{
+ tlob_stop_task(current);
+ return 0;
+}
+
+/*
+ * Register start + stop entry uprobes for a binding.
+ * Called with tlob_uprobe_mutex held.
+ */
+static int tlob_add_uprobe(u64 threshold_ns, const char *binpath,
+ loff_t offset_start, loff_t offset_stop)
+{
+ struct tlob_uprobe_binding *b, *tmp_b;
+ char pathbuf[TLOB_MAX_PATH];
+ struct path path;
+ char *canon;
+ int ret;
+
+ if (binpath[0] != '/')
+ return -EINVAL;
+
+ b = kzalloc_obj(*b, GFP_KERNEL);
+ if (!b)
+ return -ENOMEM;
+
+ b->threshold_ns = threshold_ns;
+ b->offset_start = offset_start;
+ b->offset_stop = offset_stop;
+
+ ret = kern_path(binpath, LOOKUP_FOLLOW, &path);
+ if (ret)
+ goto err_free;
+
+ if (!d_is_reg(path.dentry)) {
+ ret = -EINVAL;
+ goto err_path;
+ }
+
+ /* Reject duplicate start offset for the same binary. */
+ list_for_each_entry(tmp_b, &tlob_uprobe_list, list) {
+ if (tmp_b->offset_start == offset_start &&
+ tmp_b->start_probe->path.dentry == path.dentry) {
+ ret = -EEXIST;
+ goto err_path;
+ }
+ }
+
+ canon = d_path(&path, pathbuf, sizeof(pathbuf));
+ if (IS_ERR(canon)) {
+ ret = PTR_ERR(canon);
+ goto err_path;
+ }
+ strscpy(b->binpath, canon, sizeof(b->binpath));
+
+ /* Both probes share b (priv) and path; attach_path refs path itself. */
+ b->start_probe = rv_uprobe_attach_path(&path, offset_start,
+ tlob_uprobe_entry_handler, NULL, b);
+ if (IS_ERR(b->start_probe)) {
+ ret = PTR_ERR(b->start_probe);
+ b->start_probe = NULL;
+ goto err_path;
+ }
+
+ b->stop_probe = rv_uprobe_attach_path(&path, offset_stop,
+ tlob_uprobe_stop_handler, NULL, b);
+ if (IS_ERR(b->stop_probe)) {
+ ret = PTR_ERR(b->stop_probe);
+ b->stop_probe = NULL;
+ goto err_start;
+ }
+
+ path_put(&path);
+ list_add_tail(&b->list, &tlob_uprobe_list);
+ return 0;
+
+err_start:
+ rv_uprobe_detach(b->start_probe);
+err_path:
+ path_put(&path);
+err_free:
+ kfree(b);
+ return ret;
+}
+
+static int tlob_remove_uprobe_by_key(loff_t offset_start, const char *binpath)
+{
+ struct tlob_uprobe_binding *b, *tmp;
+ struct path remove_path;
+ int ret;
+
+ ret = kern_path(binpath, LOOKUP_FOLLOW, &remove_path);
+ if (ret)
+ return ret;
+
+ ret = -ENOENT;
+ list_for_each_entry_safe(b, tmp, &tlob_uprobe_list, list) {
+ if (b->offset_start != offset_start)
+ continue;
+ if (b->start_probe->path.dentry != remove_path.dentry)
+ continue;
+ list_del(&b->list);
+ rv_uprobe_detach(b->start_probe);
+ rv_uprobe_detach(b->stop_probe);
+ kfree(b);
+ ret = 0;
+ break;
+ }
+
+ path_put(&remove_path);
+ return ret;
+}
+
+static void tlob_remove_all_uprobes(void)
+{
+ struct tlob_uprobe_binding *b, *tmp;
+ LIST_HEAD(pending);
+
+ mutex_lock(&tlob_uprobe_mutex);
+ list_for_each_entry_safe(b, tmp, &tlob_uprobe_list, list) {
+ list_move(&b->list, &pending);
+ rv_uprobe_unregister_nosync(b->start_probe);
+ rv_uprobe_unregister_nosync(b->stop_probe);
+ }
+ mutex_unlock(&tlob_uprobe_mutex);
+
+ if (list_empty(&pending))
+ return;
+
+ /*
+ * One global barrier for all probes dequeued above; no new handlers
+ * for any of them can fire after this returns.
+ */
+ rv_uprobe_sync();
+
+ list_for_each_entry_safe(b, tmp, &pending, list) {
+ rv_uprobe_free(b->start_probe);
+ rv_uprobe_free(b->stop_probe);
+ kfree(b);
+ }
+}
+
+static ssize_t tlob_monitor_read(struct file *file,
+ char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ const int line_sz = TLOB_MAX_PATH + 128;
+ struct tlob_uprobe_binding *b;
+ char *buf, *p;
+ int n = 0, buf_sz, pos = 0;
+ ssize_t ret;
+
+ mutex_lock(&tlob_uprobe_mutex);
+ list_for_each_entry(b, &tlob_uprobe_list, list)
+ n++;
+
+ buf_sz = (n ? n : 1) * line_sz + 1;
+ buf = kmalloc(buf_sz, GFP_KERNEL);
+ if (!buf) {
+ mutex_unlock(&tlob_uprobe_mutex);
+ return -ENOMEM;
+ }
+
+ list_for_each_entry(b, &tlob_uprobe_list, list) {
+ p = b->binpath;
+ pos += scnprintf(buf + pos, buf_sz - pos,
+ "p %s:0x%llx 0x%llx threshold=%llu\n",
+ p,
+ (unsigned long long)b->offset_start,
+ (unsigned long long)b->offset_stop,
+ b->threshold_ns);
+ }
+ mutex_unlock(&tlob_uprobe_mutex);
+
+ ret = simple_read_from_buffer(ubuf, count, ppos, buf, pos);
+ kfree(buf);
+ return ret;
+}
+
+/*
+ * Parse "p PATH:OFFSET_START OFFSET_STOP threshold=NS".
+ * PATH may contain ':'; the last ':' separates path from offset.
+ * Returns 0, -EINVAL, or -ERANGE.
+ */
+static int tlob_parse_uprobe_line(char *buf, u64 *thr_out,
+ char **path_out,
+ loff_t *start_out, loff_t *stop_out)
+{
+ unsigned long long thr = 0, stop_val = 0;
+ long long start_val;
+ char *p, *path_token, *token, *colon;
+ bool got_stop = false, got_thr = false;
+ int n;
+
+ /* Must start with "p " */
+ if (buf[0] != 'p' || buf[1] != ' ')
+ return -EINVAL;
+
+ p = buf + 2;
+ while (*p == ' ')
+ p++;
+
+ /* First space-delimited token is PATH:OFFSET_START */
+ path_token = strsep(&p, " \t");
+ if (!path_token || !*path_token)
+ return -EINVAL;
+
+ /* Split at last ':' to handle paths that contain ':'. */
+ colon = strrchr(path_token, ':');
+ if (!colon || colon - path_token < 2)
+ return -EINVAL;
+ *colon = '\0';
+
+ if (path_token[0] != '/')
+ return -EINVAL;
+
+ n = 0;
+ if (sscanf(colon + 1, "%lli%n", &start_val, &n) != 1 || n == 0)
+ return -EINVAL;
+ if (start_val < 0)
+ return -EINVAL;
+
+ /* Remaining tokens: OFFSET_STOP threshold=NS */
+ while (p && (token = strsep(&p, " \t")) != NULL) {
+ if (!*token)
+ continue;
+ if (strncmp(token, "threshold=", 10) == 0) {
+ if (kstrtoull(token + 10, 0, &thr))
+ return -EINVAL;
+ if (thr < 1000 || thr > TLOB_MAX_THRESHOLD_NS)
+ return -ERANGE;
+ got_thr = true;
+ } else if (!got_stop) {
+ long long sv;
+
+ n = 0;
+ if (sscanf(token, "%lli%n", &sv, &n) != 1 || n == 0)
+ return -EINVAL;
+ if (sv < 0)
+ return -EINVAL;
+ stop_val = (unsigned long long)sv;
+ got_stop = true;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ if (!got_stop || !got_thr)
+ return -EINVAL;
+ if (start_val == (long long)stop_val)
+ return -EINVAL;
+
+ *thr_out = thr;
+ *path_out = path_token;
+ *start_out = (loff_t)start_val;
+ *stop_out = (loff_t)stop_val;
+ return 0;
+}
+
+/* Parse "-PATH:OFFSET_START" (ftrace uprobe_events removal convention). */
+static int tlob_parse_remove_line(char *buf, char **path_out, loff_t *start_out)
+{
+ char *binpath, *colon;
+ long long off;
+ int n = 0;
+
+ if (buf[0] != '-')
+ return -EINVAL;
+ binpath = buf + 1;
+ if (binpath[0] != '/')
+ return -EINVAL;
+ colon = strrchr(binpath, ':');
+ if (!colon || colon - binpath < 2)
+ return -EINVAL;
+ *colon = '\0';
+ if (sscanf(colon + 1, "%lli%n", &off, &n) != 1 || n == 0)
+ return -EINVAL;
+ *path_out = binpath;
+ *start_out = (loff_t)off;
+ return 0;
+}
+
+VISIBLE_IF_KUNIT int tlob_create_or_delete_uprobe(char *buf)
+{
+ loff_t offset_start, offset_stop;
+ u64 threshold_ns;
+ char *binpath;
+ int ret;
+
+ if (buf[0] == '-') {
+ ret = tlob_parse_remove_line(buf, &binpath, &offset_start);
+ if (ret)
+ return ret;
+ mutex_lock(&tlob_uprobe_mutex);
+ ret = tlob_remove_uprobe_by_key(offset_start, binpath);
+ mutex_unlock(&tlob_uprobe_mutex);
+ return ret;
+ }
+ ret = tlob_parse_uprobe_line(buf, &threshold_ns, &binpath,
+ &offset_start, &offset_stop);
+ if (ret)
+ return ret;
+ mutex_lock(&tlob_uprobe_mutex);
+ ret = tlob_add_uprobe(threshold_ns, binpath, offset_start, offset_stop);
+ mutex_unlock(&tlob_uprobe_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_IF_KUNIT(tlob_create_or_delete_uprobe);
+
+static ssize_t tlob_monitor_write(struct file *file,
+ const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ char buf[TLOB_MAX_PATH + 128];
+
+ if (count >= sizeof(buf))
+ return -EINVAL;
+ if (copy_from_user(buf, ubuf, count))
+ return -EFAULT;
+ buf[count] = '\0';
+ if (count > 0 && buf[count - 1] == '\n')
+ buf[count - 1] = '\0';
+ return tlob_create_or_delete_uprobe(buf) ?: (ssize_t)count;
+}
+
+static const struct file_operations tlob_monitor_fops = {
+ .open = simple_open,
+ .read = tlob_monitor_read,
+ .write = tlob_monitor_write,
+ .llseek = noop_llseek,
+};
+
+static int __tlob_init_monitor(void)
+{
+ int retval;
+
+ tlob_state_cache = kmem_cache_create("tlob_task_state",
+ sizeof(struct tlob_task_state),
+ 0, 0, NULL);
+ if (!tlob_state_cache)
+ return -ENOMEM;
+
+ retval = ha_monitor_init();
+ if (retval) {
+ kmem_cache_destroy(tlob_state_cache);
+ tlob_state_cache = NULL;
+ return retval;
+ }
+
+ rv_this.enabled = 1;
+ return 0;
+}
+
+static void __tlob_destroy_monitor(void)
+{
+ rv_this.enabled = 0;
+ /*
+ * Remove uprobes first; rv_uprobe_sync() inside ensures all in-flight
+ * handlers have finished before we proceed.
+ */
+ tlob_remove_all_uprobes();
+
+ /*
+ * da_monitor_destroy() iterates any remaining entries via da_extra_cleanup
+ * (tlob_extra_cleanup), cancels their timers, and frees their state.
+ * rcu_barrier() inside drains both da_pool_return_cb and tlob_free_rcu
+ * callbacks before the pool arrays are freed.
+ */
+ ha_monitor_destroy();
+ kmem_cache_destroy(tlob_state_cache);
+ tlob_state_cache = NULL;
+}
+
+static int tlob_enable_hooks(void)
+{
+ rv_attach_trace_probe("tlob", sched_switch, handle_sched_switch);
+ rv_attach_trace_probe("tlob", sched_wakeup, handle_sched_wakeup);
+ rv_attach_trace_probe("tlob", sched_process_exit, handle_sched_process_exit);
+ return 0;
+}
+
+static void tlob_disable_hooks(void)
+{
+ rv_detach_trace_probe("tlob", sched_switch, handle_sched_switch);
+ rv_detach_trace_probe("tlob", sched_wakeup, handle_sched_wakeup);
+ rv_detach_trace_probe("tlob", sched_process_exit, handle_sched_process_exit);
+}
+
+static int enable_tlob(void)
+{
+ int retval;
+
+ retval = __tlob_init_monitor();
+ if (retval)
+ return retval;
+
+ return tlob_enable_hooks();
+}
+
+static void disable_tlob(void)
+{
+ tlob_disable_hooks();
+ __tlob_destroy_monitor();
+}
+
+static struct rv_monitor rv_this = {
+ .name = "tlob",
+ .description = "Per-task latency-over-budget monitor.",
+ .enable = enable_tlob,
+ .disable = disable_tlob,
+ .reset = da_monitor_reset_all,
+ .enabled = 0,
+};
+
+static int __init register_tlob(void)
+{
+ int ret;
+
+ ret = rv_register_monitor(&rv_this, NULL);
+ if (ret)
+ return ret;
+
+ if (rv_this.root_d) {
+ if (!tracefs_create_file("monitor", 0644, rv_this.root_d, NULL,
+ &tlob_monitor_fops)) {
+ rv_unregister_monitor(&rv_this);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static void __exit unregister_tlob(void)
+{
+ rv_unregister_monitor(&rv_this);
+}
+
+module_init(register_tlob);
+module_exit(unregister_tlob);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Wen Yang <wen.yang@linux.dev>");
+MODULE_DESCRIPTION("tlob: task latency over budget per-task monitor.");
diff --git a/kernel/trace/rv/monitors/tlob/tlob.h b/kernel/trace/rv/monitors/tlob/tlob.h
new file mode 100644
index 000000000000..b6724e629c69
--- /dev/null
+++ b/kernel/trace/rv/monitors/tlob/tlob.h
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _RV_TLOB_H
+#define _RV_TLOB_H
+
+/*
+ * C representation of the tlob hybrid automaton.
+ *
+ * Three-state HA following sched_stat / wwnr monitor naming conventions:
+ *
+ * running (initial) - task is executing on CPU [sched_stat: runtime]
+ * waiting - task is in runqueue, awaiting CPU [sched_stat: wait ]
+ * sleeping - task is blocked, awaiting resource[sched_stat: sleep ]
+ *
+ * Events (derived from sched_switch / sched_wakeup tracepoints):
+ * start - tlob_start_task() running → running (resets clock, arms timer)
+ * sleep - sched_switch, prev_state != 0 running → sleeping
+ * preempt - sched_switch, prev_state == 0 running → waiting
+ * wakeup - sched_wakeup sleeping → waiting
+ * switch_in - sched_switch, next == task waiting → running
+ *
+ * One HA clock invariant:
+ * clk_elapsed < BUDGET_NS() active in all states (total latency budget)
+ *
+ * tlob_start_task() uses da_handle_start_run_event(start_tlob) to initialise
+ * the monitor: the DA framework sets the initial state and then processes the
+ * start event, which resets clk_elapsed and arms the budget hrtimer via the
+ * generated ha_setup_invariants().
+ * tlob_stop_task() calls ha_cancel_timer_sync() + da_monitor_reset() directly.
+ *
+ * For the format description see:
+ * Documentation/trace/rv/deterministic_automata.rst
+ */
+
+#include <linux/rv.h>
+#include <linux/sched.h>
+
+#define MONITOR_NAME tlob
+
+enum states_tlob {
+ running_tlob,
+ waiting_tlob,
+ sleeping_tlob,
+ state_max_tlob,
+};
+
+#define INVALID_STATE state_max_tlob
+
+enum events_tlob {
+ start_tlob,
+ sleep_tlob,
+ preempt_tlob,
+ wakeup_tlob,
+ switch_in_tlob,
+ event_max_tlob,
+};
+
+/*
+ * HA environment variable: clk_elapsed is the only clock.
+ * It measures wall-clock time since task_start and is active in all states.
+ */
+enum envs_tlob {
+ clk_elapsed_tlob,
+ env_max_tlob,
+ env_max_stored_tlob = env_max_tlob,
+};
+
+_Static_assert(env_max_stored_tlob <= MAX_HA_ENV_LEN, "Not enough slots");
+#define HA_CLK_NS
+
+struct automaton_tlob {
+ char *state_names[state_max_tlob];
+ char *event_names[event_max_tlob];
+ char *env_names[env_max_tlob];
+ unsigned char function[state_max_tlob][event_max_tlob];
+ unsigned char initial_state;
+ bool final_states[state_max_tlob];
+};
+
+static const struct automaton_tlob automaton_tlob = {
+ .state_names = {
+ "running",
+ "waiting",
+ "sleeping",
+ },
+ .event_names = {
+ "start",
+ "sleep",
+ "preempt",
+ "wakeup",
+ "switch_in",
+ },
+ .env_names = {
+ "clk_elapsed",
+ },
+ .function = {
+ /* running */
+ {
+ running_tlob, /* start (tlob_start_task, resets clock) */
+ sleeping_tlob, /* sleep (sched_switch, prev_state != 0) */
+ waiting_tlob, /* preempt (sched_switch, prev_state == 0) */
+ INVALID_STATE, /* wakeup (TASK_RUNNING can't be woken) */
+ INVALID_STATE, /* switch_in (already on CPU) */
+ },
+ /* waiting */
+ {
+ INVALID_STATE, /* start (not in running state) */
+ INVALID_STATE, /* sleep (not on CPU) */
+ INVALID_STATE, /* preempt (not on CPU) */
+ INVALID_STATE, /* wakeup (already TASK_RUNNING) */
+ running_tlob, /* switch_in */
+ },
+ /* sleeping */
+ {
+ INVALID_STATE, /* start (not in running state) */
+ INVALID_STATE, /* sleep (already sleeping) */
+ INVALID_STATE, /* preempt (not on CPU) */
+ waiting_tlob, /* wakeup */
+ INVALID_STATE, /* switch_in (must go through waiting first) */
+ },
+ },
+ .initial_state = running_tlob,
+ .final_states = { 1, 0, 0 },
+};
+
+/* Maximum number of concurrently monitored tasks. */
+#define TLOB_MAX_MONITORED 64U
+
+/* Maximum binary path length for uprobe binding. */
+#define TLOB_MAX_PATH 256
+
+/*
+ * Upper bound on the monitoring budget (1 hour = 3 600 000 000 000 ns).
+ * The ns-resolution accumulators (running_ns, waiting_ns, sleeping_ns)
+ * are u64; keeping the window below this limit ensures they stay well
+ * clear of u64 overflow and covers every realistic latency-monitoring
+ * use case.
+ */
+#define TLOB_MAX_THRESHOLD_NS 3600000000000ULL
+
+/* Exported to ioctl/uprobe layers and KUnit */
+int tlob_start_task(struct task_struct *task, u64 threshold_ns);
+int tlob_stop_task(struct task_struct *task);
+
+#if IS_ENABLED(CONFIG_KUNIT)
+int tlob_create_or_delete_uprobe(char *buf);
+#endif /* CONFIG_KUNIT */
+
+#endif /* _RV_TLOB_H */
diff --git a/kernel/trace/rv/monitors/tlob/tlob_trace.h b/kernel/trace/rv/monitors/tlob/tlob_trace.h
new file mode 100644
index 000000000000..1ac4900d38e8
--- /dev/null
+++ b/kernel/trace/rv/monitors/tlob/tlob_trace.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_TLOB
+DEFINE_EVENT(event_da_monitor_id, event_tlob,
+ TP_PROTO(int id, char *state, char *event, char *next_state, bool final_state),
+ TP_ARGS(id, state, event, next_state, final_state));
+
+DEFINE_EVENT(error_da_monitor_id, error_tlob,
+ TP_PROTO(int id, char *state, char *event),
+ TP_ARGS(id, state, event));
+
+DEFINE_EVENT(error_env_da_monitor_id, error_env_tlob,
+ TP_PROTO(int id, char *state, char *event, char *env),
+ TP_ARGS(id, state, event, env));
+
+/*
+ * detail_env_tlob - per-state latency breakdown emitted on budget violation.
+ *
+ * Fired immediately after error_env_tlob from the hrtimer callback.
+ * Fields show how much time was spent in each DA state since tlob_start_task().
+ * running_ns + waiting_ns + sleeping_ns ≈ total elapsed time (threshold_ns exceeded).
+ */
+TRACE_EVENT(detail_env_tlob,
+ TP_PROTO(int id, u64 threshold_ns,
+ u64 running_ns, u64 waiting_ns, u64 sleeping_ns),
+ TP_ARGS(id, threshold_ns, running_ns, waiting_ns, sleeping_ns),
+ TP_STRUCT__entry(
+ __field(int, id)
+ __field(u64, threshold_ns)
+ __field(u64, running_ns)
+ __field(u64, waiting_ns)
+ __field(u64, sleeping_ns)
+ ),
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->threshold_ns = threshold_ns;
+ __entry->running_ns = running_ns;
+ __entry->waiting_ns = waiting_ns;
+ __entry->sleeping_ns = sleeping_ns;
+ ),
+ TP_printk("pid=%d threshold_ns=%llu running_ns=%llu waiting_ns=%llu sleeping_ns=%llu",
+ __entry->id, __entry->threshold_ns,
+ __entry->running_ns, __entry->waiting_ns, __entry->sleeping_ns)
+);
+#endif /* CONFIG_RV_MON_TLOB */
diff --git a/kernel/trace/rv/rv_trace.h b/kernel/trace/rv/rv_trace.h
index 9622c269789c..a4bc215c1f15 100644
--- a/kernel/trace/rv/rv_trace.h
+++ b/kernel/trace/rv/rv_trace.h
@@ -189,6 +189,7 @@ DECLARE_EVENT_CLASS(error_env_da_monitor_id,
#include <monitors/stall/stall_trace.h>
#include <monitors/nomiss/nomiss_trace.h>
+#include <monitors/tlob/tlob_trace.h>
// Add new monitors based on CONFIG_HA_MON_EVENTS_ID here
#endif
--
2.43.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v3 7/9] rv/tlob: add KUnit tests for the tlob monitor
2026-06-07 16:13 [PATCH v3 0/9] rv/tlob: Add task latency over budget RV monitor wen.yang
` (5 preceding siblings ...)
2026-06-07 16:13 ` [PATCH v3 6/9] rv/tlob: add tlob hybrid automaton monitor wen.yang
@ 2026-06-07 16:13 ` wen.yang
2026-06-07 16:13 ` [PATCH v3 8/9] selftests/verification: fix verificationtest-ktap for out-of-tree execution wen.yang
2026-06-07 16:13 ` [PATCH v3 9/9] selftests/verification: add tlob selftests wen.yang
8 siblings, 0 replies; 10+ messages in thread
From: wen.yang @ 2026-06-07 16:13 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, linux-trace-kernel, linux-kernel, Wen Yang
From: Wen Yang <wen.yang@linux.dev>
Add CONFIG_TLOB_KUNIT_TEST (tristate, depends on RV_MON_TLOB && KUNIT,
default KUNIT_ALL_TESTS) with a single test suite covering the uprobe
line parser: valid bindings are accepted, malformed ones return -EINVAL,
and out-of-range thresholds return -ERANGE.
Signed-off-by: Wen Yang <wen.yang@linux.dev>
---
kernel/trace/rv/Makefile | 1 +
kernel/trace/rv/monitors/tlob/.kunitconfig | 6 ++
kernel/trace/rv/monitors/tlob/Kconfig | 7 ++
kernel/trace/rv/monitors/tlob/tlob_kunit.c | 92 ++++++++++++++++++++++
4 files changed, 106 insertions(+)
create mode 100644 kernel/trace/rv/monitors/tlob/.kunitconfig
create mode 100644 kernel/trace/rv/monitors/tlob/tlob_kunit.c
diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile
index ae59e97f8682..316d53398345 100644
--- a/kernel/trace/rv/Makefile
+++ b/kernel/trace/rv/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_RV_MON_STALL) += monitors/stall/stall.o
obj-$(CONFIG_RV_MON_DEADLINE) += monitors/deadline/deadline.o
obj-$(CONFIG_RV_MON_NOMISS) += monitors/nomiss/nomiss.o
obj-$(CONFIG_RV_MON_TLOB) += monitors/tlob/tlob.o
+obj-$(CONFIG_TLOB_KUNIT_TEST) += monitors/tlob/tlob_kunit.o
# Add new monitors here
obj-$(CONFIG_RV_UPROBE) += rv_uprobe.o
obj-$(CONFIG_RV_REACTORS) += rv_reactors.o
diff --git a/kernel/trace/rv/monitors/tlob/.kunitconfig b/kernel/trace/rv/monitors/tlob/.kunitconfig
new file mode 100644
index 000000000000..35d313dfc20d
--- /dev/null
+++ b/kernel/trace/rv/monitors/tlob/.kunitconfig
@@ -0,0 +1,6 @@
+CONFIG_FTRACE=y
+CONFIG_KUNIT=y
+CONFIG_MODULES=y
+CONFIG_RV=y
+CONFIG_RV_MON_TLOB=y
+CONFIG_TLOB_KUNIT_TEST=y
diff --git a/kernel/trace/rv/monitors/tlob/Kconfig b/kernel/trace/rv/monitors/tlob/Kconfig
index b29a375de228..7ec3326640c2 100644
--- a/kernel/trace/rv/monitors/tlob/Kconfig
+++ b/kernel/trace/rv/monitors/tlob/Kconfig
@@ -10,3 +10,10 @@ config RV_MON_TLOB
monitor. tlob tracks per-task elapsed wall-clock time across a
user-delimited code section and emits error_env_tlob when the
elapsed time exceeds a configurable per-invocation budget.
+
+config TLOB_KUNIT_TEST
+ tristate "KUnit tests for tlob monitor" if !KUNIT_ALL_TESTS
+ depends on RV_MON_TLOB && KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ Enable KUnit unit tests for the tlob RV monitor.
diff --git a/kernel/trace/rv/monitors/tlob/tlob_kunit.c b/kernel/trace/rv/monitors/tlob/tlob_kunit.c
new file mode 100644
index 000000000000..6450d61b26c3
--- /dev/null
+++ b/kernel/trace/rv/monitors/tlob/tlob_kunit.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit tests for the tlob RV monitor.
+ *
+ */
+#include <kunit/test.h>
+
+#include "tlob.h"
+
+MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
+
+static const char * const tlob_parse_valid[] = {
+ "p /usr/bin/myapp:4768 4848 threshold=5000000",
+ "p /usr/bin/myapp:0x12a0 0x12f0 threshold=10000000",
+ "p /opt/my:app/bin:0x100 0x200 threshold=1000000",
+};
+
+static const char * const tlob_parse_invalid[] = {
+ /* add: malformed */
+ "p :0x100 0x200 threshold=5000",
+ "p /usr/bin/myapp:0x100 threshold=5000",
+ "p /usr/bin/myapp:-1 0x200 threshold=5000",
+ "p /usr/bin/myapp:0x100 0x200",
+ "p /usr/bin/myapp:0x100 0x100 threshold=5000",
+ /* remove: malformed */
+ "-usr/bin/myapp:0x100",
+ "-/usr/bin/myapp",
+ "-/:0x100",
+ "-/usr/bin/myapp:abc",
+};
+
+/* threshold_ns < 1000 or > TLOB_MAX_THRESHOLD_NS return -ERANGE, not -EINVAL. */
+static const char * const tlob_parse_out_of_range[] = {
+ "p /usr/bin/myapp:0x100 0x200 threshold=0",
+ "p /usr/bin/myapp:0x100 0x200 threshold=999",
+ "p /usr/bin/myapp:0x100 0x200 threshold=3600000000001", /* TLOB_MAX_THRESHOLD_NS + 1 */
+};
+
+/*
+ * Valid add lines return -ENOENT (kern_path() finds no such file in the test
+ * environment) rather than 0; a non-(-EINVAL) return confirms the format was
+ * accepted by the parser.
+ */
+static void tlob_parse_valid_accepted(struct kunit *test)
+{
+ char buf[128];
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tlob_parse_valid); i++) {
+ strscpy(buf, tlob_parse_valid[i], sizeof(buf));
+ KUNIT_EXPECT_NE(test, tlob_create_or_delete_uprobe(buf), -EINVAL);
+ }
+}
+
+static void tlob_parse_invalid_rejected(struct kunit *test)
+{
+ char buf[128];
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tlob_parse_invalid); i++) {
+ strscpy(buf, tlob_parse_invalid[i], sizeof(buf));
+ KUNIT_EXPECT_EQ(test, tlob_create_or_delete_uprobe(buf), -EINVAL);
+ }
+}
+
+static void tlob_parse_out_of_range_rejected(struct kunit *test)
+{
+ char buf[128];
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tlob_parse_out_of_range); i++) {
+ strscpy(buf, tlob_parse_out_of_range[i], sizeof(buf));
+ KUNIT_EXPECT_EQ(test, tlob_create_or_delete_uprobe(buf), -ERANGE);
+ }
+}
+
+static struct kunit_case tlob_parse_cases[] = {
+ KUNIT_CASE(tlob_parse_valid_accepted),
+ KUNIT_CASE(tlob_parse_invalid_rejected),
+ KUNIT_CASE(tlob_parse_out_of_range_rejected),
+ {}
+};
+
+static struct kunit_suite tlob_parse_suite = {
+ .name = "tlob_parse",
+ .test_cases = tlob_parse_cases,
+};
+
+kunit_test_suite(tlob_parse_suite);
+
+MODULE_DESCRIPTION("KUnit tests for the tlob RV monitor");
+MODULE_LICENSE("GPL");
--
2.43.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v3 8/9] selftests/verification: fix verificationtest-ktap for out-of-tree execution
2026-06-07 16:13 [PATCH v3 0/9] rv/tlob: Add task latency over budget RV monitor wen.yang
` (6 preceding siblings ...)
2026-06-07 16:13 ` [PATCH v3 7/9] rv/tlob: add KUnit tests for the tlob monitor wen.yang
@ 2026-06-07 16:13 ` wen.yang
2026-06-07 16:13 ` [PATCH v3 9/9] selftests/verification: add tlob selftests wen.yang
8 siblings, 0 replies; 10+ messages in thread
From: wen.yang @ 2026-06-07 16:13 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, linux-trace-kernel, linux-kernel, Wen Yang
From: Wen Yang <wen.yang@linux.dev>
verificationtest-ktap used CWD-relative paths which broke when
invoked outside the verification directory (e.g. via vng).
Resolve paths via realpath "$(dirname "$0")" so the script works
from any working directory. Accept an optional subdirectory argument
interpreted relative to the script's directory.
Suggested-by: Gabriele Monaco <gmonaco@redhat.com>
Signed-off-by: Wen Yang <wen.yang@linux.dev>
---
tools/testing/selftests/verification/verificationtest-ktap | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/verification/verificationtest-ktap b/tools/testing/selftests/verification/verificationtest-ktap
index 18f7fe324e2f..055747cef38a 100755
--- a/tools/testing/selftests/verification/verificationtest-ktap
+++ b/tools/testing/selftests/verification/verificationtest-ktap
@@ -5,4 +5,6 @@
#
# Copyright (C) Arm Ltd., 2023
-../ftrace/ftracetest -K -v --rv ../verification
+dir=$(realpath "$(dirname "$0")")
+testdir=$(cd "$dir" && realpath "${1:-.}")
+"$dir/../ftrace/ftracetest" -K -v --rv "$testdir"
--
2.43.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v3 9/9] selftests/verification: add tlob selftests
2026-06-07 16:13 [PATCH v3 0/9] rv/tlob: Add task latency over budget RV monitor wen.yang
` (7 preceding siblings ...)
2026-06-07 16:13 ` [PATCH v3 8/9] selftests/verification: fix verificationtest-ktap for out-of-tree execution wen.yang
@ 2026-06-07 16:13 ` wen.yang
8 siblings, 0 replies; 10+ messages in thread
From: wen.yang @ 2026-06-07 16:13 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, linux-trace-kernel, linux-kernel, Wen Yang
From: Wen Yang <wen.yang@linux.dev>
Add selftest coverage for the tlob uprobe monitoring interface under
tools/testing/selftests/verification/.
test.d/tlob/ contains both the helper sources (tlob_target, tlob_sym)
and the seven test scripts so the test suite is self-contained.
tlob_target provides busy-spin, sleep, and preempt workloads; tlob_sym
resolves ELF symbol offsets for uprobe registration.
Seven test scripts exercise uprobe binding management, budget violation
detection, and per-state time accounting (running_ns, waiting_ns,
sleeping_ns).
Signed-off-by: Wen Yang <wen.yang@linux.dev>
---
.../testing/selftests/verification/.gitignore | 2 +
tools/testing/selftests/verification/Makefile | 19 +-
.../verification/test.d/tlob/Makefile | 20 ++
.../verification/test.d/tlob/test.d/functions | 1 +
.../verification/test.d/tlob/tlob_sym.c | 189 ++++++++++++++++++
.../verification/test.d/tlob/tlob_target.c | 138 +++++++++++++
.../verification/test.d/tlob/uprobe_bind.tc | 37 ++++
.../test.d/tlob/uprobe_detail_running.tc | 51 +++++
.../test.d/tlob/uprobe_detail_sleeping.tc | 50 +++++
.../test.d/tlob/uprobe_detail_waiting.tc | 66 ++++++
.../verification/test.d/tlob/uprobe_multi.tc | 64 ++++++
.../test.d/tlob/uprobe_no_event.tc | 19 ++
.../test.d/tlob/uprobe_violation.tc | 67 +++++++
13 files changed, 722 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/verification/test.d/tlob/Makefile
create mode 100644 tools/testing/selftests/verification/test.d/tlob/test.d/functions
create mode 100644 tools/testing/selftests/verification/test.d/tlob/tlob_sym.c
create mode 100644 tools/testing/selftests/verification/test.d/tlob/tlob_target.c
create mode 100644 tools/testing/selftests/verification/test.d/tlob/uprobe_bind.tc
create mode 100644 tools/testing/selftests/verification/test.d/tlob/uprobe_detail_running.tc
create mode 100644 tools/testing/selftests/verification/test.d/tlob/uprobe_detail_sleeping.tc
create mode 100644 tools/testing/selftests/verification/test.d/tlob/uprobe_detail_waiting.tc
create mode 100644 tools/testing/selftests/verification/test.d/tlob/uprobe_multi.tc
create mode 100644 tools/testing/selftests/verification/test.d/tlob/uprobe_no_event.tc
create mode 100644 tools/testing/selftests/verification/test.d/tlob/uprobe_violation.tc
diff --git a/tools/testing/selftests/verification/.gitignore b/tools/testing/selftests/verification/.gitignore
index 2659417cb2c7..cbbd03ee16c7 100644
--- a/tools/testing/selftests/verification/.gitignore
+++ b/tools/testing/selftests/verification/.gitignore
@@ -1,2 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
logs
+test.d/tlob/tlob_sym
+test.d/tlob/tlob_target
diff --git a/tools/testing/selftests/verification/Makefile b/tools/testing/selftests/verification/Makefile
index aa8790c22a71..0b32bdfdb8db 100644
--- a/tools/testing/selftests/verification/Makefile
+++ b/tools/testing/selftests/verification/Makefile
@@ -1,8 +1,25 @@
# SPDX-License-Identifier: GPL-2.0
-all:
TEST_PROGS := verificationtest-ktap
TEST_FILES := test.d settings
EXTRA_CLEAN := $(OUTPUT)/logs/*
+# Subdirectories that provide binaries used by the test runner.
+# Each entry must contain a Makefile that accepts OUTDIR= and
+# deposits its binaries there.
+BUILD_SUBDIRS := test.d/tlob
+
include ../lib.mk
+
+all: $(patsubst %,_build_%,$(BUILD_SUBDIRS))
+
+clean: $(patsubst %,_clean_%,$(BUILD_SUBDIRS))
+
+.PHONY: $(patsubst %,_build_%,$(BUILD_SUBDIRS)) \
+ $(patsubst %,_clean_%,$(BUILD_SUBDIRS))
+
+$(patsubst %,_build_%,$(BUILD_SUBDIRS)): _build_%:
+ $(MAKE) -C $* OUTDIR="$(OUTPUT)" TOOLS_INCLUDES="$(TOOLS_INCLUDES)"
+
+$(patsubst %,_clean_%,$(BUILD_SUBDIRS)): _clean_%:
+ $(MAKE) -C $* OUTDIR="$(OUTPUT)" clean
diff --git a/tools/testing/selftests/verification/test.d/tlob/Makefile b/tools/testing/selftests/verification/test.d/tlob/Makefile
new file mode 100644
index 000000000000..29b3519b255f
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+# Builds tlob selftest helper binaries in the directory of this Makefile.
+#
+# Invoked by ../../Makefile via BUILD_SUBDIRS; outputs tlob_sym and
+# tlob_target alongside the .tc scripts so they are self-contained.
+
+CFLAGS += $(TOOLS_INCLUDES)
+
+.PHONY: all
+all: tlob_sym tlob_target
+
+tlob_sym: tlob_sym.c
+ $(CC) $(CFLAGS) -o $@ $<
+
+tlob_target: tlob_target.c
+ $(CC) $(CFLAGS) -o $@ $<
+
+.PHONY: clean
+clean:
+ $(RM) tlob_sym tlob_target
diff --git a/tools/testing/selftests/verification/test.d/tlob/test.d/functions b/tools/testing/selftests/verification/test.d/tlob/test.d/functions
new file mode 100644
index 000000000000..0b4c5e4344d2
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/test.d/functions
@@ -0,0 +1 @@
+. "${TOP_DIR%/*}/functions"
diff --git a/tools/testing/selftests/verification/test.d/tlob/tlob_sym.c b/tools/testing/selftests/verification/test.d/tlob/tlob_sym.c
new file mode 100644
index 000000000000..1b7ba1c6d95b
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/tlob_sym.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tlob_sym.c - ELF symbol-to-file-offset utility for tlob selftests
+ *
+ * Usage: tlob_sym sym_offset <binary> <symbol>
+ *
+ * Prints the ELF file offset of <symbol> in <binary> to stdout.
+ *
+ * Exit: 0 = found, 1 = error / not found.
+ */
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+static int sym_offset(const char *binary, const char *symname)
+{
+ int fd;
+ struct stat st;
+ void *map;
+ Elf64_Ehdr *ehdr;
+ Elf32_Ehdr *ehdr32;
+ int is64;
+ uint64_t sym_vaddr = 0;
+ int found = 0;
+ uint64_t file_offset = 0;
+
+ fd = open(binary, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "open %s: %s\n", binary, strerror(errno));
+ return 1;
+ }
+ if (fstat(fd, &st) < 0) {
+ close(fd);
+ return 1;
+ }
+ map = mmap(NULL, (size_t)st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ close(fd);
+ if (map == MAP_FAILED) {
+ fprintf(stderr, "mmap: %s\n", strerror(errno));
+ return 1;
+ }
+
+ ehdr = (Elf64_Ehdr *)map;
+ ehdr32 = (Elf32_Ehdr *)map;
+ if (st.st_size < 4 ||
+ ehdr->e_ident[EI_MAG0] != ELFMAG0 ||
+ ehdr->e_ident[EI_MAG1] != ELFMAG1 ||
+ ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
+ ehdr->e_ident[EI_MAG3] != ELFMAG3) {
+ fprintf(stderr, "%s: not an ELF file\n", binary);
+ munmap(map, (size_t)st.st_size);
+ return 1;
+ }
+ is64 = (ehdr->e_ident[EI_CLASS] == ELFCLASS64);
+
+ if (is64) {
+ Elf64_Shdr *shdrs = (Elf64_Shdr *)((char *)map + ehdr->e_shoff);
+ Elf64_Shdr *shstrtab_hdr = &shdrs[ehdr->e_shstrndx];
+ const char *shstrtab = (char *)map + shstrtab_hdr->sh_offset;
+ int si;
+
+ for (int pass = 0; pass < 2 && !found; pass++) {
+ const char *target = pass ? ".dynsym" : ".symtab";
+
+ for (si = 0; si < ehdr->e_shnum && !found; si++) {
+ Elf64_Shdr *sh = &shdrs[si];
+ const char *name = shstrtab + sh->sh_name;
+
+ if (strcmp(name, target) != 0)
+ continue;
+
+ Elf64_Shdr *strtab_sh = &shdrs[sh->sh_link];
+ const char *strtab = (char *)map + strtab_sh->sh_offset;
+ Elf64_Sym *syms = (Elf64_Sym *)((char *)map + sh->sh_offset);
+ uint64_t nsyms = sh->sh_size / sizeof(Elf64_Sym);
+ uint64_t j;
+
+ for (j = 0; j < nsyms; j++) {
+ if (strcmp(strtab + syms[j].st_name, symname) == 0) {
+ sym_vaddr = syms[j].st_value;
+ found = 1;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!found) {
+ fprintf(stderr, "symbol '%s' not found in %s\n", symname, binary);
+ munmap(map, (size_t)st.st_size);
+ return 1;
+ }
+
+ Elf64_Phdr *phdrs = (Elf64_Phdr *)((char *)map + ehdr->e_phoff);
+ int pi;
+
+ for (pi = 0; pi < ehdr->e_phnum; pi++) {
+ Elf64_Phdr *ph = &phdrs[pi];
+
+ if (ph->p_type != PT_LOAD)
+ continue;
+ if (sym_vaddr >= ph->p_vaddr &&
+ sym_vaddr < ph->p_vaddr + ph->p_filesz) {
+ file_offset = sym_vaddr - ph->p_vaddr + ph->p_offset;
+ break;
+ }
+ }
+ } else {
+ Elf32_Shdr *shdrs = (Elf32_Shdr *)((char *)map + ehdr32->e_shoff);
+ Elf32_Shdr *shstrtab_hdr = &shdrs[ehdr32->e_shstrndx];
+ const char *shstrtab = (char *)map + shstrtab_hdr->sh_offset;
+ int si;
+ uint32_t sym_vaddr32 = 0;
+
+ for (int pass = 0; pass < 2 && !found; pass++) {
+ const char *target = pass ? ".dynsym" : ".symtab";
+
+ for (si = 0; si < ehdr32->e_shnum && !found; si++) {
+ Elf32_Shdr *sh = &shdrs[si];
+ const char *name = shstrtab + sh->sh_name;
+
+ if (strcmp(name, target) != 0)
+ continue;
+
+ Elf32_Shdr *strtab_sh = &shdrs[sh->sh_link];
+ const char *strtab = (char *)map + strtab_sh->sh_offset;
+ Elf32_Sym *syms = (Elf32_Sym *)((char *)map + sh->sh_offset);
+ uint32_t nsyms = sh->sh_size / sizeof(Elf32_Sym);
+ uint32_t j;
+
+ for (j = 0; j < nsyms; j++) {
+ if (strcmp(strtab + syms[j].st_name, symname) == 0) {
+ sym_vaddr32 = syms[j].st_value;
+ found = 1;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!found) {
+ fprintf(stderr, "symbol '%s' not found in %s\n", symname, binary);
+ munmap(map, (size_t)st.st_size);
+ return 1;
+ }
+
+ Elf32_Phdr *phdrs = (Elf32_Phdr *)((char *)map + ehdr32->e_phoff);
+ int pi;
+
+ for (pi = 0; pi < ehdr32->e_phnum; pi++) {
+ Elf32_Phdr *ph = &phdrs[pi];
+
+ if (ph->p_type != PT_LOAD)
+ continue;
+ if (sym_vaddr32 >= ph->p_vaddr &&
+ sym_vaddr32 < ph->p_vaddr + ph->p_filesz) {
+ file_offset = sym_vaddr32 - ph->p_vaddr + ph->p_offset;
+ break;
+ }
+ }
+ sym_vaddr = sym_vaddr32;
+ }
+
+ munmap(map, (size_t)st.st_size);
+
+ if (!file_offset && sym_vaddr) {
+ fprintf(stderr, "could not map vaddr 0x%lx to file offset\n",
+ (unsigned long)sym_vaddr);
+ return 1;
+ }
+
+ printf("0x%lx\n", (unsigned long)file_offset);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ if (argc != 4 || strcmp(argv[1], "sym_offset") != 0) {
+ fprintf(stderr, "Usage: %s sym_offset <binary> <symbol>\n", argv[0]);
+ return 1;
+ }
+ return sym_offset(argv[2], argv[3]);
+}
diff --git a/tools/testing/selftests/verification/test.d/tlob/tlob_target.c b/tools/testing/selftests/verification/test.d/tlob/tlob_target.c
new file mode 100644
index 000000000000..0fdbc575d71d
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/tlob_target.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tlob_target.c - uprobe target binary for tlob selftests.
+ *
+ * Provides three start/stop probe pairs, each designed to exercise a
+ * different dominant component of the detail_env_tlob ns breakdown:
+ *
+ * tlob_busy_work / tlob_busy_work_done - busy-spin: running_ns dominates
+ * tlob_sleep_work / tlob_sleep_work_done - nanosleep: sleeping_ns dominates
+ * tlob_preempt_work / tlob_preempt_work_done - busy-spin: waiting_ns dominates
+ * (needs an RT competitor on the same CPU)
+ *
+ * Usage: tlob_target <duration_ms> [mode]
+ *
+ * mode is one of: busy (default), sleep, preempt.
+ * Loops in 200 ms iterations until <duration_ms> has elapsed
+ * (0 = run for ~24 hours).
+ */
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
+static inline int timespec_before(const struct timespec *a,
+ const struct timespec *b)
+{
+ return a->tv_sec < b->tv_sec ||
+ (a->tv_sec == b->tv_sec && a->tv_nsec < b->tv_nsec);
+}
+
+static void timespec_add_ms(struct timespec *ts, unsigned long ms)
+{
+ ts->tv_sec += ms / 1000;
+ ts->tv_nsec += (long)(ms % 1000) * 1000000L;
+ if (ts->tv_nsec >= 1000000000L) {
+ ts->tv_sec++;
+ ts->tv_nsec -= 1000000000L;
+ }
+}
+
+/* stop probe; noinline keeps the entry point visible to uprobes */
+noinline void tlob_busy_work_done(void)
+{
+ /* empty: uprobe fires on entry */
+}
+
+/* start probe; busy-spin so running_ns dominates */
+noinline void tlob_busy_work(unsigned long duration_ns)
+{
+ struct timespec start, now;
+ unsigned long elapsed;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ do {
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ elapsed = (unsigned long)(now.tv_sec - start.tv_sec)
+ * 1000000000UL
+ + (unsigned long)(now.tv_nsec - start.tv_nsec);
+ } while (elapsed < duration_ns);
+
+ tlob_busy_work_done();
+}
+
+/* stop probe; noinline keeps the entry point visible to uprobes */
+noinline void tlob_sleep_work_done(void)
+{
+ /* empty: uprobe fires on entry */
+}
+
+/* start probe; nanosleep so sleeping_ns dominates */
+noinline void tlob_sleep_work(unsigned long duration_ms)
+{
+ struct timespec ts = {
+ .tv_sec = duration_ms / 1000,
+ .tv_nsec = (long)(duration_ms % 1000) * 1000000L,
+ };
+ nanosleep(&ts, NULL);
+ tlob_sleep_work_done();
+}
+
+/* stop probe; noinline keeps the entry point visible to uprobes */
+noinline void tlob_preempt_work_done(void)
+{
+ /* empty: uprobe fires on entry */
+}
+
+/*
+ * start probe; busy-spin so an RT competitor on the same CPU drives
+ * waiting_ns (prev_state==0 -> preempt event, task stays runnable off-CPU).
+ */
+noinline void tlob_preempt_work(unsigned long duration_ms)
+{
+ struct timespec start, now;
+ unsigned long elapsed;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ do {
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ elapsed = (unsigned long)(now.tv_sec - start.tv_sec)
+ * 1000000000UL
+ + (unsigned long)(now.tv_nsec - start.tv_nsec);
+ } while (elapsed < duration_ms * 1000000UL);
+
+ tlob_preempt_work_done();
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned long duration_ms = 0;
+ const char *mode = "busy";
+ struct timespec deadline, now;
+
+ if (argc >= 2)
+ duration_ms = strtoul(argv[1], NULL, 10);
+ if (argc >= 3)
+ mode = argv[2];
+
+ clock_gettime(CLOCK_MONOTONIC, &deadline);
+ timespec_add_ms(&deadline, duration_ms ? duration_ms : 86400000UL);
+
+ do {
+ if (strcmp(mode, "sleep") == 0)
+ tlob_sleep_work(200);
+ else if (strcmp(mode, "preempt") == 0)
+ tlob_preempt_work(200);
+ else
+ tlob_busy_work(200 * 1000000UL);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ } while (timespec_before(&now, &deadline));
+
+ return 0;
+}
diff --git a/tools/testing/selftests/verification/test.d/tlob/uprobe_bind.tc b/tools/testing/selftests/verification/test.d/tlob/uprobe_bind.tc
new file mode 100644
index 000000000000..1ac3db6ca7bb
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/uprobe_bind.tc
@@ -0,0 +1,37 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test tlob monitor uprobe binding (visible in monitor file, removable, duplicate rejected)
+# requires: tlob:monitor
+
+RV_BINDIR="${RV_BINDIR:-$(realpath "$(dirname "${1:-$0}")")}"
+UPROBE_TARGET="${RV_BINDIR}/tlob_target"
+TLOB_SYM="${RV_BINDIR}/tlob_sym"
+[ -x "$UPROBE_TARGET" ] || exit_unsupported
+[ -x "$TLOB_SYM" ] || exit_unsupported
+TLOB_MONITOR=monitors/tlob/monitor
+
+busy_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_busy_work 2>/dev/null)
+stop_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_busy_work_done 2>/dev/null)
+[ -n "$busy_offset" ] || exit_unsupported
+[ -n "$stop_offset" ] || exit_unsupported
+
+"$UPROBE_TARGET" 30000 &
+busy_pid=$!
+sleep 0.05
+
+echo 1 > monitors/tlob/enable
+echo "p ${UPROBE_TARGET}:${busy_offset} ${stop_offset} threshold=5000000000" > "$TLOB_MONITOR"
+
+# Binding must appear in monitor file with canonical hex-offset format.
+grep -qE "^p ${UPROBE_TARGET}:0x[0-9a-f]+ 0x[0-9a-f]+ threshold=[0-9]+$" "$TLOB_MONITOR"
+grep -q "threshold=5000000000" "$TLOB_MONITOR"
+
+# Duplicate offset_start must be rejected.
+! echo "p ${UPROBE_TARGET}:${busy_offset} ${stop_offset} threshold=9999000" > "$TLOB_MONITOR" 2>/dev/null
+
+# Remove the binding; it must no longer appear.
+echo "-${UPROBE_TARGET}:${busy_offset}" > "$TLOB_MONITOR"
+! grep -q "^p .*:0x${busy_offset#0x} " "$TLOB_MONITOR"
+
+kill "$busy_pid" 2>/dev/null || true; wait "$busy_pid" 2>/dev/null || true
+echo 0 > monitors/tlob/enable
diff --git a/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_running.tc b/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_running.tc
new file mode 100644
index 000000000000..2814caa34902
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_running.tc
@@ -0,0 +1,51 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test tlob monitor detail running (running_ns dominates when task busy-spins between probes)
+# requires: tlob:monitor
+
+RV_BINDIR="${RV_BINDIR:-$(realpath "$(dirname "${1:-$0}")")}"
+UPROBE_TARGET="${RV_BINDIR}/tlob_target"
+TLOB_SYM="${RV_BINDIR}/tlob_sym"
+[ -x "$UPROBE_TARGET" ] || exit_unsupported
+[ -x "$TLOB_SYM" ] || exit_unsupported
+TLOB_MONITOR=monitors/tlob/monitor
+
+start_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_busy_work 2>/dev/null)
+stop_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_busy_work_done 2>/dev/null)
+[ -n "$start_offset" ] || exit_unsupported
+[ -n "$stop_offset" ] || exit_unsupported
+
+"$UPROBE_TARGET" 5000 &
+busy_pid=$!
+sleep 0.05
+
+echo 1 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 1 > /sys/kernel/tracing/tracing_on
+echo 1 > monitors/tlob/enable
+echo > /sys/kernel/tracing/trace
+
+# 10 µs budget; task busy-spins 200 ms per iteration -> running_ns dominates.
+echo "p ${UPROBE_TARGET}:${start_offset} ${stop_offset} threshold=10000" > "$TLOB_MONITOR"
+
+found=0; i=0
+while [ "$i" -lt 30 ]; do
+ sleep 0.1
+ grep -q "detail_env_tlob" /sys/kernel/tracing/trace && { found=1; break; }
+ i=$((i+1))
+done
+
+echo "-${UPROBE_TARGET}:${start_offset}" > "$TLOB_MONITOR" 2>/dev/null
+kill "$busy_pid" 2>/dev/null || true; wait "$busy_pid" 2>/dev/null || true
+echo 0 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 0 > monitors/tlob/enable
+
+[ "$found" = "1" ]
+
+line=$(grep "detail_env_tlob" /sys/kernel/tracing/trace | head -n 1)
+running=$(echo "$line" | sed 's/.*running_ns=\([0-9]*\).*/\1/')
+waiting=$(echo "$line" | sed 's/.*waiting_ns=\([0-9]*\).*/\1/')
+sleeping=$(echo "$line" | sed 's/.*sleeping_ns=\([0-9]*\).*/\1/')
+# Busy-spin keeps the task on-CPU: running_ns must exceed sleeping_ns.
+[ "$running" -gt "$sleeping" ]
+
+echo > /sys/kernel/tracing/trace
diff --git a/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_sleeping.tc b/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_sleeping.tc
new file mode 100644
index 000000000000..0a6470b4cadb
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_sleeping.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test tlob monitor detail sleeping (sleeping_ns dominates when task blocks between probes)
+# requires: tlob:monitor
+
+RV_BINDIR="${RV_BINDIR:-$(realpath "$(dirname "${1:-$0}")")}"
+UPROBE_TARGET="${RV_BINDIR}/tlob_target"
+TLOB_SYM="${RV_BINDIR}/tlob_sym"
+[ -x "$UPROBE_TARGET" ] || exit_unsupported
+[ -x "$TLOB_SYM" ] || exit_unsupported
+TLOB_MONITOR=monitors/tlob/monitor
+
+start_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_sleep_work 2>/dev/null)
+stop_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_sleep_work_done 2>/dev/null)
+[ -n "$start_offset" ] || exit_unsupported
+[ -n "$stop_offset" ] || exit_unsupported
+
+"$UPROBE_TARGET" 5000 sleep &
+busy_pid=$!
+sleep 0.05
+
+echo 1 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 1 > /sys/kernel/tracing/tracing_on
+echo 1 > monitors/tlob/enable
+echo > /sys/kernel/tracing/trace
+
+# 50 ms budget; task sleeps 200 ms per iteration -> sleeping_ns dominates.
+echo "p ${UPROBE_TARGET}:${start_offset} ${stop_offset} threshold=50000000" > "$TLOB_MONITOR"
+
+found=0; i=0
+while [ "$i" -lt 30 ]; do
+ sleep 0.1
+ grep -q "detail_env_tlob" /sys/kernel/tracing/trace && { found=1; break; }
+ i=$((i+1))
+done
+
+echo "-${UPROBE_TARGET}:${start_offset}" > "$TLOB_MONITOR" 2>/dev/null
+kill "$busy_pid" 2>/dev/null || true; wait "$busy_pid" 2>/dev/null || true
+echo 0 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 0 > monitors/tlob/enable
+
+[ "$found" = "1" ]
+
+line=$(grep "detail_env_tlob" /sys/kernel/tracing/trace | head -n 1)
+running=$(echo "$line" | sed 's/.*running_ns=\([0-9]*\).*/\1/')
+waiting=$(echo "$line" | sed 's/.*waiting_ns=\([0-9]*\).*/\1/')
+sleeping=$(echo "$line" | sed 's/.*sleeping_ns=\([0-9]*\).*/\1/')
+[ "$sleeping" -gt "$((running + waiting))" ]
+
+echo > /sys/kernel/tracing/trace
diff --git a/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_waiting.tc b/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_waiting.tc
new file mode 100644
index 000000000000..ef22fce700fc
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/uprobe_detail_waiting.tc
@@ -0,0 +1,66 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test tlob monitor detail waiting (waiting_ns dominates when task is preempted between probes)
+# requires: tlob:monitor
+
+RV_BINDIR="${RV_BINDIR:-$(realpath "$(dirname "${1:-$0}")")}"
+UPROBE_TARGET="${RV_BINDIR}/tlob_target"
+TLOB_SYM="${RV_BINDIR}/tlob_sym"
+[ -x "$UPROBE_TARGET" ] || exit_unsupported
+[ -x "$TLOB_SYM" ] || exit_unsupported
+TLOB_MONITOR=monitors/tlob/monitor
+
+command -v chrt > /dev/null || exit_unsupported
+command -v taskset > /dev/null || exit_unsupported
+
+start_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_preempt_work 2>/dev/null)
+stop_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_preempt_work_done 2>/dev/null)
+[ -n "$start_offset" ] || exit_unsupported
+[ -n "$stop_offset" ] || exit_unsupported
+
+cpu=0
+
+echo 1 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 1 > /sys/kernel/tracing/tracing_on
+echo 1 > monitors/tlob/enable
+echo > /sys/kernel/tracing/trace
+
+# Register probe before the target starts so the start uprobe fires on the
+# first entry to tlob_preempt_work. Budget: 500 ms.
+echo "p ${UPROBE_TARGET}:${start_offset} ${stop_offset} threshold=500000000" > "$TLOB_MONITOR"
+
+# Target starts; start probe fires on tlob_preempt_work entry.
+taskset -c "$cpu" "$UPROBE_TARGET" 5000 preempt &
+busy_pid=$!
+sleep 0.05
+
+# RT hog on the same CPU preempts the target; target stays in waiting state
+# (runnable, off-CPU) until the budget expires -> waiting_ns dominates.
+chrt -f 99 taskset -c "$cpu" sh -c 'while true; do :; done' 2>/dev/null &
+hog_pid=$!
+
+found=0; i=0
+while [ "$i" -lt 30 ]; do
+ sleep 0.1
+ grep -q "detail_env_tlob" /sys/kernel/tracing/trace && { found=1; break; }
+ i=$((i+1))
+done
+
+# Kill the RT hog first so tlob_target can release any in-flight SRCU read
+# section from uprobe_notify_resume; otherwise probe removal blocks in
+# synchronize_srcu with the hog monopolising the CPU at FIFO-99.
+kill "$hog_pid" 2>/dev/null || true; wait "$hog_pid" 2>/dev/null || true
+kill "$busy_pid" 2>/dev/null || true; wait "$busy_pid" 2>/dev/null || true
+echo "-${UPROBE_TARGET}:${start_offset}" > "$TLOB_MONITOR" 2>/dev/null
+echo 0 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 0 > monitors/tlob/enable
+
+[ "$found" = "1" ]
+
+line=$(grep "detail_env_tlob" /sys/kernel/tracing/trace | head -n 1)
+running=$(echo "$line" | sed 's/.*running_ns=\([0-9]*\).*/\1/')
+sleeping=$(echo "$line" | sed 's/.*sleeping_ns=\([0-9]*\).*/\1/')
+waiting=$(echo "$line" | sed 's/.*waiting_ns=\([0-9]*\).*/\1/')
+[ "$waiting" -gt "$((running + sleeping))" ]
+
+echo > /sys/kernel/tracing/trace
diff --git a/tools/testing/selftests/verification/test.d/tlob/uprobe_multi.tc b/tools/testing/selftests/verification/test.d/tlob/uprobe_multi.tc
new file mode 100644
index 000000000000..f1bd6c955f1d
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/uprobe_multi.tc
@@ -0,0 +1,64 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test tlob monitor multiple uprobe bindings (different offsets fire independently)
+# requires: tlob:monitor
+
+RV_BINDIR="${RV_BINDIR:-$(realpath "$(dirname "${1:-$0}")")}"
+UPROBE_TARGET="${RV_BINDIR}/tlob_target"
+TLOB_SYM="${RV_BINDIR}/tlob_sym"
+[ -x "$UPROBE_TARGET" ] || exit_unsupported
+[ -x "$TLOB_SYM" ] || exit_unsupported
+TLOB_MONITOR=monitors/tlob/monitor
+
+busy_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_busy_work 2>/dev/null)
+busy_stop=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_busy_work_done 2>/dev/null)
+sleep_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_sleep_work 2>/dev/null)
+sleep_stop=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_sleep_work_done 2>/dev/null)
+[ -n "$busy_offset" ] || exit_unsupported
+[ -n "$busy_stop" ] || exit_unsupported
+[ -n "$sleep_offset" ] || exit_unsupported
+[ -n "$sleep_stop" ] || exit_unsupported
+
+"$UPROBE_TARGET" 30000 & # busy mode: tlob_busy_work fires every 200 ms
+busy_pid=$!
+"$UPROBE_TARGET" 30000 sleep & # sleep mode: tlob_sleep_work fires every 200 ms
+sleep_pid=$!
+sleep 0.05
+
+echo 1 > /sys/kernel/tracing/events/rv/error_env_tlob/enable
+echo 1 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 1 > /sys/kernel/tracing/tracing_on
+echo 1 > monitors/tlob/enable
+echo > /sys/kernel/tracing/trace
+
+# Binding A: 5 s budget on the busy probe - must not fire in 200 ms loops.
+echo "p ${UPROBE_TARGET}:${busy_offset} ${busy_stop} threshold=5000000000" > "$TLOB_MONITOR"
+# Binding B: 10 µs budget on the sleep probe - fires on first invocation.
+echo "p ${UPROBE_TARGET}:${sleep_offset} ${sleep_stop} threshold=10000" > "$TLOB_MONITOR"
+
+# Wait up to 2 s for error_env_tlob from binding B.
+found=0; i=0
+while [ "$i" -lt 20 ]; do
+ sleep 0.1
+ grep -q "error_env_tlob" /sys/kernel/tracing/trace && { found=1; break; }
+ i=$((i+1))
+done
+
+echo "-${UPROBE_TARGET}:${busy_offset}" > "$TLOB_MONITOR" 2>/dev/null
+echo "-${UPROBE_TARGET}:${sleep_offset}" > "$TLOB_MONITOR" 2>/dev/null
+kill "$sleep_pid" 2>/dev/null || true; wait "$sleep_pid" 2>/dev/null || true
+kill "$busy_pid" 2>/dev/null || true; wait "$busy_pid" 2>/dev/null || true
+
+echo 0 > monitors/tlob/enable
+echo 0 > /sys/kernel/tracing/events/rv/error_env_tlob/enable
+echo 0 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+
+[ "$found" = "1" ]
+# error_env_tlob payload: clock variable must be present.
+# The event field can be "budget_exceeded" (hrtimer path) or the DA event
+# name ("sleep", "preempt") depending on which fires first; don't constrain it.
+grep "error_env_tlob" /sys/kernel/tracing/trace | head -n 1 | grep -q "clk_elapsed="
+# detail_env_tlob must appear alongside the error.
+grep -q "detail_env_tlob" /sys/kernel/tracing/trace
+
+echo > /sys/kernel/tracing/trace
diff --git a/tools/testing/selftests/verification/test.d/tlob/uprobe_no_event.tc b/tools/testing/selftests/verification/test.d/tlob/uprobe_no_event.tc
new file mode 100644
index 000000000000..a143635a60ce
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/uprobe_no_event.tc
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test tlob monitor no spurious events without active uprobe binding
+# requires: tlob:monitor
+
+TLOB_MONITOR=monitors/tlob/monitor
+
+echo 1 > /sys/kernel/tracing/events/rv/error_env_tlob/enable
+echo 1 > /sys/kernel/tracing/tracing_on
+echo 1 > monitors/tlob/enable
+echo > /sys/kernel/tracing/trace
+
+sleep 0.5
+
+! grep -q "error_env_tlob" /sys/kernel/tracing/trace
+
+echo 0 > monitors/tlob/enable
+echo 0 > /sys/kernel/tracing/events/rv/error_env_tlob/enable
+echo > /sys/kernel/tracing/trace
diff --git a/tools/testing/selftests/verification/test.d/tlob/uprobe_violation.tc b/tools/testing/selftests/verification/test.d/tlob/uprobe_violation.tc
new file mode 100644
index 000000000000..d210d9c3a92d
--- /dev/null
+++ b/tools/testing/selftests/verification/test.d/tlob/uprobe_violation.tc
@@ -0,0 +1,67 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: Test tlob monitor budget violation (error_env_tlob and detail_env_tlob fire with correct fields)
+# requires: tlob:monitor
+
+RV_BINDIR="${RV_BINDIR:-$(realpath "$(dirname "${1:-$0}")")}"
+UPROBE_TARGET="${RV_BINDIR}/tlob_target"
+TLOB_SYM="${RV_BINDIR}/tlob_sym"
+[ -x "$UPROBE_TARGET" ] || exit_unsupported
+[ -x "$TLOB_SYM" ] || exit_unsupported
+TLOB_MONITOR=monitors/tlob/monitor
+
+busy_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_busy_work 2>/dev/null)
+stop_offset=$("$TLOB_SYM" sym_offset "$UPROBE_TARGET" tlob_busy_work_done 2>/dev/null)
+[ -n "$busy_offset" ] || exit_unsupported
+[ -n "$stop_offset" ] || exit_unsupported
+
+"$UPROBE_TARGET" 30000 &
+busy_pid=$!
+sleep 0.05
+
+echo 1 > /sys/kernel/tracing/events/rv/error_env_tlob/enable
+echo 1 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 1 > /sys/kernel/tracing/tracing_on
+echo 1 > monitors/tlob/enable
+echo > /sys/kernel/tracing/trace
+
+# 10 µs budget - fires almost immediately; task is busy-spinning on-CPU.
+echo "p ${UPROBE_TARGET}:${busy_offset} ${stop_offset} threshold=10000" > "$TLOB_MONITOR"
+
+# wait up to 2 s for detail_env_tlob
+found=0; i=0
+while [ "$i" -lt 20 ]; do
+ sleep 0.1
+ grep -q "detail_env_tlob" /sys/kernel/tracing/trace && { found=1; break; }
+ i=$((i+1))
+done
+
+echo "-${UPROBE_TARGET}:${busy_offset}" > "$TLOB_MONITOR" 2>/dev/null
+kill "$busy_pid" 2>/dev/null || true; wait "$busy_pid" 2>/dev/null || true
+echo 0 > /sys/kernel/tracing/events/rv/error_env_tlob/enable
+echo 0 > /sys/kernel/tracing/events/rv/detail_env_tlob/enable
+echo 0 > monitors/tlob/enable
+
+[ "$found" = "1" ]
+
+# error_env_tlob must carry the clk_elapsed environment field.
+# The event label is "budget_exceeded" when detected by the hrtimer callback,
+# or the triggering sched event name when detected by the constraint path on a
+# preemption that races with the timer (common on PREEMPT_RT / VM). Both are
+# valid detections; check the env field instead of the label.
+grep "error_env_tlob" /sys/kernel/tracing/trace | head -n 1 | grep -q "clk_elapsed="
+
+# detail_env_tlob must have all five fields with the correct threshold
+line=$(grep "detail_env_tlob" /sys/kernel/tracing/trace | head -n 1)
+echo "$line" | grep -q "pid="
+echo "$line" | grep -q "threshold_ns=10000"
+echo "$line" | grep -q "running_ns="
+echo "$line" | grep -q "waiting_ns="
+echo "$line" | grep -q "sleeping_ns="
+
+# Busy-spin keeps the task on-CPU: running_ns must exceed sleeping_ns.
+running=$(echo "$line" | sed 's/.*running_ns=\([0-9]*\).*/\1/')
+sleeping=$(echo "$line" | sed 's/.*sleeping_ns=\([0-9]*\).*/\1/')
+[ "$running" -gt "$sleeping" ]
+
+echo > /sys/kernel/tracing/trace
--
2.43.0
^ permalink raw reply related [flat|nested] 10+ messages in thread