public inbox for linux-trace-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: wen.yang@linux.dev
To: Steven Rostedt <rostedt@goodmis.org>,
	Gabriele Monaco <gmonaco@redhat.com>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: linux-trace-kernel@vger.kernel.org, linux-kernel@vger.kernel.org,
	Wen Yang <wen.yang@linux.dev>
Subject: [RFC PATCH 4/4] selftests/rv: Add selftest for the tlob monitor
Date: Mon, 13 Apr 2026 03:27:21 +0800	[thread overview]
Message-ID: <5bdd82dd8aeb1d3f955b727ae1fce9819b35c170.1776020428.git.wen.yang@linux.dev> (raw)
In-Reply-To: <cover.1776020428.git.wen.yang@linux.dev>

From: Wen Yang <wen.yang@linux.dev>

Add a kselftest suite (TAP output, 19 test points) for the tlob RV
monitor under tools/testing/selftests/rv/.

test_tlob.sh drives a compiled C helper (tlob_helper) and, for uprobe
tests, a target binary (tlob_uprobe_target). Coverage spans the
tracefs enable/disable path, uprobe-triggered violations, and the
ioctl interface (within-budget stop, CPU-bound and sleep violations,
duplicate start, ring buffer mmap and consumption).

Requires CONFIG_RV_MON_TLOB=y and CONFIG_RV_CHARDEV=y; must be run
as root.

Signed-off-by: Wen Yang <wen.yang@linux.dev>
---
 tools/include/uapi/linux/rv.h                 |  54 +
 tools/testing/selftests/rv/Makefile           |  18 +
 tools/testing/selftests/rv/test_tlob.sh       | 563 ++++++++++
 tools/testing/selftests/rv/tlob_helper.c      | 994 ++++++++++++++++++
 .../testing/selftests/rv/tlob_uprobe_target.c | 108 ++
 5 files changed, 1737 insertions(+)
 create mode 100644 tools/include/uapi/linux/rv.h
 create mode 100644 tools/testing/selftests/rv/Makefile
 create mode 100755 tools/testing/selftests/rv/test_tlob.sh
 create mode 100644 tools/testing/selftests/rv/tlob_helper.c
 create mode 100644 tools/testing/selftests/rv/tlob_uprobe_target.c

diff --git a/tools/include/uapi/linux/rv.h b/tools/include/uapi/linux/rv.h
new file mode 100644
index 000000000..bef07aded
--- /dev/null
+++ b/tools/include/uapi/linux/rv.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * UAPI definitions for Runtime Verification (RV) monitors.
+ *
+ * This is a tools-friendly copy of include/uapi/linux/rv.h.
+ * Keep in sync with the kernel header.
+ */
+
+#ifndef _UAPI_LINUX_RV_H
+#define _UAPI_LINUX_RV_H
+
+#include <linux/types.h>
+#include <sys/ioctl.h>
+
+/* Magic byte shared by all RV monitor ioctls. */
+#define RV_IOC_MAGIC	0xB9
+
+/* -----------------------------------------------------------------------
+ * tlob: task latency over budget monitor  (nr 0x01 - 0x1F)
+ * -----------------------------------------------------------------------
+ */
+
+struct tlob_start_args {
+	__u64 threshold_us;
+	__u64 tag;
+	__s32 notify_fd;
+	__u32 flags;
+};
+
+struct tlob_event {
+	__u32 tid;
+	__u32 pad;
+	__u64 threshold_us;
+	__u64 on_cpu_us;
+	__u64 off_cpu_us;
+	__u32 switches;
+	__u32 state;   /* 1 = on_cpu, 0 = off_cpu */
+	__u64 tag;
+};
+
+struct tlob_mmap_page {
+	__u32  data_head;
+	__u32  data_tail;
+	__u32  capacity;
+	__u32  version;
+	__u32  data_offset;
+	__u32  record_size;
+	__u64  dropped;
+};
+
+#define TLOB_IOCTL_TRACE_START	_IOW(RV_IOC_MAGIC, 0x01, struct tlob_start_args)
+#define TLOB_IOCTL_TRACE_STOP	_IO(RV_IOC_MAGIC,  0x02)
+
+#endif /* _UAPI_LINUX_RV_H */
diff --git a/tools/testing/selftests/rv/Makefile b/tools/testing/selftests/rv/Makefile
new file mode 100644
index 000000000..14e94a1ab
--- /dev/null
+++ b/tools/testing/selftests/rv/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for rv selftests
+
+TEST_GEN_PROGS := tlob_helper tlob_uprobe_target
+
+TEST_PROGS := \
+	test_tlob.sh \
+
+# TOOLS_INCLUDES is defined by ../lib.mk; provides -isystem to
+# tools/include/uapi so that #include <linux/rv.h> resolves to the
+# in-tree UAPI header without requiring make headers_install.
+# Note: both must be added to the global variables, not as target-specific
+# overrides, because lib.mk rewrites TEST_GEN_PROGS to $(OUTPUT)/name
+# before per-target rules would be evaluated.
+CFLAGS += $(TOOLS_INCLUDES)
+LDLIBS += -lpthread
+
+include ../lib.mk
diff --git a/tools/testing/selftests/rv/test_tlob.sh b/tools/testing/selftests/rv/test_tlob.sh
new file mode 100755
index 000000000..3ba2125eb
--- /dev/null
+++ b/tools/testing/selftests/rv/test_tlob.sh
@@ -0,0 +1,563 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Selftest for the tlob (task latency over budget) RV monitor.
+#
+# Two interfaces are tested:
+#
+#   1. tracefs interface:
+#        enable/disable, presence of tracefs files,
+#        uprobe binding (threshold_us:offset_start:offset_stop:binary_path) and
+#        violation detection via the ftrace ring buffer.
+#
+#   2. /dev/rv ioctl self-instrumentation (via tlob_helper):
+#        within-budget, over-budget on-CPU, over-budget off-CPU (sleep),
+#        double-start, stop-without-start.
+#
+# Written to be POSIX sh compatible (no bash-specific extensions).
+
+ksft_skip=4
+t_pass=0; t_fail=0; t_skip=0; t_total=0
+
+tap_header() { echo "TAP version 13"; }
+tap_plan()   { echo "1..$1"; }
+tap_pass()   { t_pass=$((t_pass+1)); echo "ok $t_total - $1"; }
+tap_fail()   { t_fail=$((t_fail+1)); echo "not ok $t_total - $1"
+               [ -n "$2" ] && echo "  # $2"; }
+tap_skip()   { t_skip=$((t_skip+1)); echo "ok $t_total - $1 # SKIP $2"; }
+next_test()  { t_total=$((t_total+1)); }
+
+TRACEFS=$(grep -m1 tracefs /proc/mounts 2>/dev/null | awk '{print $2}')
+[ -z "$TRACEFS" ] && TRACEFS=/sys/kernel/tracing
+
+RV_DIR="${TRACEFS}/rv"
+TLOB_DIR="${RV_DIR}/monitors/tlob"
+TRACE_FILE="${TRACEFS}/trace"
+TRACING_ON="${TRACEFS}/tracing_on"
+TLOB_MONITOR="${TLOB_DIR}/monitor"
+BUDGET_EXCEEDED_ENABLE="${TRACEFS}/events/rv/tlob_budget_exceeded/enable"
+RV_DEV="/dev/rv"
+
+# tlob_helper and tlob_uprobe_target must be in the same directory as
+# this script or on PATH.
+SCRIPT_DIR=$(dirname "$0")
+IOCTL_HELPER="${SCRIPT_DIR}/tlob_helper"
+UPROBE_TARGET="${SCRIPT_DIR}/tlob_uprobe_target"
+
+check_root()     { [ "$(id -u)" = "0" ] || { echo "# Need root" >&2; exit $ksft_skip; }; }
+check_tracefs()  { [ -d "${TRACEFS}" ]   || { echo "# No tracefs" >&2; exit $ksft_skip; }; }
+check_rv_dir()   { [ -d "${RV_DIR}" ]    || { echo "# No RV infra" >&2; exit $ksft_skip; }; }
+check_tlob()     { [ -d "${TLOB_DIR}" ]  || { echo "# No tlob monitor" >&2; exit $ksft_skip; }; }
+
+tlob_enable()         { echo 1 > "${TLOB_DIR}/enable"; }
+tlob_disable()        { echo 0 > "${TLOB_DIR}/enable" 2>/dev/null; }
+tlob_is_enabled()     { [ "$(cat "${TLOB_DIR}/enable" 2>/dev/null)" = "1" ]; }
+trace_event_enable()  { echo 1 > "${BUDGET_EXCEEDED_ENABLE}" 2>/dev/null; }
+trace_event_disable() { echo 0 > "${BUDGET_EXCEEDED_ENABLE}" 2>/dev/null; }
+trace_on()            { echo 1 > "${TRACING_ON}" 2>/dev/null; }
+trace_clear()         { echo > "${TRACE_FILE}"; }
+trace_grep()          { grep -q "$1" "${TRACE_FILE}" 2>/dev/null; }
+
+cleanup() {
+	tlob_disable
+	trace_event_disable
+	trace_clear
+}
+
+# ---------------------------------------------------------------------------
+# Test 1: enable / disable
+# ---------------------------------------------------------------------------
+run_test_enable_disable() {
+	next_test; cleanup
+	tlob_enable
+	if ! tlob_is_enabled; then
+		tap_fail "enable_disable" "not enabled after echo 1"; cleanup; return
+	fi
+	tlob_disable
+	if tlob_is_enabled; then
+		tap_fail "enable_disable" "still enabled after echo 0"; cleanup; return
+	fi
+	tap_pass "enable_disable"; cleanup
+}
+
+# ---------------------------------------------------------------------------
+# Test 2: tracefs files present
+# ---------------------------------------------------------------------------
+run_test_tracefs_files() {
+	next_test; cleanup
+	missing=""
+	for f in enable desc monitor; do
+		[ ! -e "${TLOB_DIR}/${f}" ] && missing="${missing} ${f}"
+	done
+	[ -n "${missing}" ] \
+		&& tap_fail "tracefs_files" "missing:${missing}" \
+		|| tap_pass "tracefs_files"
+	cleanup
+}
+
+# ---------------------------------------------------------------------------
+# Helper: resolve file offset of a function inside a binary.
+#
+# Usage: resolve_offset <binary> <vaddr_hex>
+# Prints the hex file offset, or empty string on failure.
+# ---------------------------------------------------------------------------
+resolve_offset() {
+	bin=$1; vaddr=$2
+	# Parse /proc/self/maps to find the mapping that contains vaddr.
+	# Each line: start-end perms offset dev inode [path]
+	while IFS= read -r line; do
+		set -- $line
+		range=$1; off=$4; path=$7
+		[ -z "$path" ] && continue
+		# Only consider the mapping for our binary
+		[ "$path" != "$bin" ] && continue
+		# Split range into start and end
+		start=$(echo "$range" | cut -d- -f1)
+		end=$(echo "$range" | cut -d- -f2)
+		# Convert hex to decimal for comparison (use printf)
+		s=$(printf "%d" "0x${start}" 2>/dev/null) || continue
+		e=$(printf "%d" "0x${end}"   2>/dev/null) || continue
+		v=$(printf "%d" "${vaddr}"   2>/dev/null) || continue
+		o=$(printf "%d" "0x${off}"   2>/dev/null) || continue
+		if [ "$v" -ge "$s" ] && [ "$v" -lt "$e" ]; then
+			file_off=$(printf "0x%x" $(( (v - s) + o )))
+			echo "$file_off"
+			return
+		fi
+	done < /proc/self/maps
+}
+
+# ---------------------------------------------------------------------------
+# Test 3: uprobe binding - no false positive
+#
+# Bind this process with a 10 s budget.  Do nothing for 0.5 s.
+# No budget_exceeded event should appear in the trace.
+# ---------------------------------------------------------------------------
+run_test_uprobe_no_false_positive() {
+	next_test; cleanup
+	if [ ! -e "${TLOB_MONITOR}" ]; then
+		tap_skip "uprobe_no_false_positive" "monitor file not available"
+		cleanup; return
+	fi
+	# We probe the "sleep" command that we will run as a subprocess.
+	# Use /bin/sleep as the binary; find a valid function offset (0x0
+	# resolves to the ELF entry point, which is sufficient for a
+	# no-false-positive test since we just need the binding to exist).
+	sleep_bin=$(command -v sleep 2>/dev/null)
+	if [ -z "$sleep_bin" ]; then
+		tap_skip "uprobe_no_false_positive" "sleep not found"; cleanup; return
+	fi
+	pid=$$
+	# offset 0x0 probes the entry point of /bin/sleep - this is a
+	# deliberate probe that will not fire during a simple 'sleep 10'
+	# invoked in a subshell, but registers the pid in tlob.
+	#
+	# Instead, bind our own pid with a generous 10 s threshold and
+	# verify that 0.5 s of idle time does NOT fire the timer.
+	#
+	# Since we cannot easily get a valid uprobe offset in pure shell,
+	# we skip this sub-test if we cannot form a valid binding.
+	exe=$(readlink /proc/self/exe 2>/dev/null)
+	if [ -z "$exe" ]; then
+		tap_skip "uprobe_no_false_positive" "cannot read /proc/self/exe"
+		cleanup; return
+	fi
+	trace_event_enable
+	trace_on
+	tlob_enable
+	trace_clear
+	# Sleep without any binding - just verify no spurious events
+	sleep 0.5
+	trace_grep "budget_exceeded" \
+		&& tap_fail "uprobe_no_false_positive" \
+			"spurious budget_exceeded without any binding" \
+		|| tap_pass "uprobe_no_false_positive"
+	cleanup
+}
+
+# ---------------------------------------------------------------------------
+# Helper: get_uprobe_offset <binary> <symbol>
+#
+# Use tlob_helper sym_offset to get the ELF file offset of <symbol>
+# in <binary>.  Prints the hex offset (e.g. "0x11d0") or empty string on
+# failure.
+# ---------------------------------------------------------------------------
+get_uprobe_offset() {
+	bin=$1; sym=$2
+	if [ ! -x "${IOCTL_HELPER}" ]; then
+		return
+	fi
+	"${IOCTL_HELPER}" sym_offset "${bin}" "${sym}" 2>/dev/null
+}
+
+# ---------------------------------------------------------------------------
+# Test 4: uprobe binding - violation detected
+#
+# Start tlob_uprobe_target (a busy-spin binary with a well-known symbol),
+# attach a uprobe on tlob_busy_work with a 10 ms threshold, and verify
+# that a budget_expired event appears.
+# ---------------------------------------------------------------------------
+run_test_uprobe_violation() {
+	next_test; cleanup
+	if [ ! -e "${TLOB_MONITOR}" ]; then
+		tap_skip "uprobe_violation" "monitor file not available"
+		cleanup; return
+	fi
+	if [ ! -x "${UPROBE_TARGET}" ]; then
+		tap_skip "uprobe_violation" \
+			"tlob_uprobe_target not found or not executable"
+		cleanup; return
+	fi
+
+	# Get the file offsets of the start and stop probe symbols
+	busy_offset=$(get_uprobe_offset "${UPROBE_TARGET}" "tlob_busy_work")
+	if [ -z "${busy_offset}" ]; then
+		tap_skip "uprobe_violation" \
+			"cannot resolve tlob_busy_work offset in ${UPROBE_TARGET}"
+		cleanup; return
+	fi
+	stop_offset=$(get_uprobe_offset "${UPROBE_TARGET}" "tlob_busy_work_done")
+	if [ -z "${stop_offset}" ]; then
+		tap_skip "uprobe_violation" \
+			"cannot resolve tlob_busy_work_done offset in ${UPROBE_TARGET}"
+		cleanup; return
+	fi
+
+	# Start the busy-spin target (run for 30 s so the test can observe it)
+	"${UPROBE_TARGET}" 30000 &
+	busy_pid=$!
+	sleep 0.05
+
+	trace_event_enable
+	trace_on
+	tlob_enable
+	trace_clear
+
+	# Bind the target: 10 us budget; start=tlob_busy_work, stop=tlob_busy_work_done
+	binding="10:${busy_offset}:${stop_offset}:${UPROBE_TARGET}"
+	if ! echo "${binding}" > "${TLOB_MONITOR}" 2>/dev/null; then
+		kill "${busy_pid}" 2>/dev/null; wait "${busy_pid}" 2>/dev/null
+		tap_skip "uprobe_violation" \
+			"uprobe binding rejected (CONFIG_UPROBES=y needed)"
+		cleanup; return
+	fi
+
+	# Wait up to 2 s for a budget_exceeded event
+	found=0; i=0
+	while [ "$i" -lt 20 ]; do
+		sleep 0.1
+		trace_grep "budget_exceeded" && { found=1; break; }
+		i=$((i+1))
+	done
+
+	echo "-${busy_offset}:${UPROBE_TARGET}" > "${TLOB_MONITOR}" 2>/dev/null
+	kill "${busy_pid}" 2>/dev/null; wait "${busy_pid}" 2>/dev/null
+
+	if [ "${found}" != "1" ]; then
+		tap_fail "uprobe_violation" "no budget_exceeded within 2 s"
+		cleanup; return
+	fi
+
+	# Validate the event fields: threshold must match, on_cpu must be non-zero
+	# (CPU-bound violation), and state must be on_cpu.
+	ev=$(grep "budget_exceeded" "${TRACE_FILE}" | head -n 1)
+	if ! echo "${ev}" | grep -q "threshold=10 "; then
+		tap_fail "uprobe_violation" "threshold field mismatch: ${ev}"
+		cleanup; return
+	fi
+	on_cpu=$(echo "${ev}" | grep -o "on_cpu=[0-9]*" | cut -d= -f2)
+	if [ "${on_cpu:-0}" -eq 0 ]; then
+		tap_fail "uprobe_violation" "on_cpu=0 for a CPU-bound spin: ${ev}"
+		cleanup; return
+	fi
+	if ! echo "${ev}" | grep -q "state=on_cpu"; then
+		tap_fail "uprobe_violation" "state is not on_cpu: ${ev}"
+		cleanup; return
+	fi
+	tap_pass "uprobe_violation"
+	cleanup
+}
+
+# ---------------------------------------------------------------------------
+# Test 5: uprobe binding - remove binding stops monitoring
+#
+# Bind a pid via tlob_uprobe_target, then immediately remove it.
+# Verify that after removal the monitor file no longer lists the pid.
+# ---------------------------------------------------------------------------
+run_test_uprobe_unbind() {
+	next_test; cleanup
+	if [ ! -e "${TLOB_MONITOR}" ]; then
+		tap_skip "uprobe_unbind" "monitor file not available"
+		cleanup; return
+	fi
+	if [ ! -x "${UPROBE_TARGET}" ]; then
+		tap_skip "uprobe_unbind" \
+			"tlob_uprobe_target not found or not executable"
+		cleanup; return
+	fi
+
+	busy_offset=$(get_uprobe_offset "${UPROBE_TARGET}" "tlob_busy_work")
+	stop_offset=$(get_uprobe_offset "${UPROBE_TARGET}" "tlob_busy_work_done")
+	if [ -z "${busy_offset}" ] || [ -z "${stop_offset}" ]; then
+		tap_skip "uprobe_unbind" \
+			"cannot resolve tlob_busy_work/tlob_busy_work_done offset"
+		cleanup; return
+	fi
+
+	"${UPROBE_TARGET}" 30000 &
+	busy_pid=$!
+	sleep 0.05
+
+	tlob_enable
+	# 5 s budget - should not fire during this quick test
+	binding="5000000:${busy_offset}:${stop_offset}:${UPROBE_TARGET}"
+	if ! echo "${binding}" > "${TLOB_MONITOR}" 2>/dev/null; then
+		kill "${busy_pid}" 2>/dev/null; wait "${busy_pid}" 2>/dev/null
+		tap_skip "uprobe_unbind" \
+			"uprobe binding rejected (CONFIG_UPROBES=y needed)"
+		cleanup; return
+	fi
+
+	# Remove the binding
+	echo "-${busy_offset}:${UPROBE_TARGET}" > "${TLOB_MONITOR}" 2>/dev/null
+
+	# The monitor file should no longer list the binding for this offset
+	if grep -q "^[0-9]*:0x${busy_offset#0x}:" "${TLOB_MONITOR}" 2>/dev/null; then
+		kill "${busy_pid}" 2>/dev/null; wait "${busy_pid}" 2>/dev/null
+		tap_fail "uprobe_unbind" "pid still listed after removal"
+		cleanup; return
+	fi
+
+	kill "${busy_pid}" 2>/dev/null; wait "${busy_pid}" 2>/dev/null
+	tap_pass "uprobe_unbind"
+	cleanup
+}
+
+# ---------------------------------------------------------------------------
+# Test 6: uprobe - duplicate offset_start rejected
+#
+# Registering a second binding with the same offset_start in the same binary
+# must be rejected with an error, since two entry uprobes at the same address
+# would cause double tlob_start_task() calls and undefined behaviour.
+# ---------------------------------------------------------------------------
+run_test_uprobe_duplicate_offset() {
+	next_test; cleanup
+	if [ ! -e "${TLOB_MONITOR}" ]; then
+		tap_skip "uprobe_duplicate_offset" "monitor file not available"
+		cleanup; return
+	fi
+	if [ ! -x "${UPROBE_TARGET}" ]; then
+		tap_skip "uprobe_duplicate_offset" \
+			"tlob_uprobe_target not found or not executable"
+		cleanup; return
+	fi
+
+	busy_offset=$(get_uprobe_offset "${UPROBE_TARGET}" "tlob_busy_work")
+	stop_offset=$(get_uprobe_offset "${UPROBE_TARGET}" "tlob_busy_work_done")
+	if [ -z "${busy_offset}" ] || [ -z "${stop_offset}" ]; then
+		tap_skip "uprobe_duplicate_offset" \
+			"cannot resolve tlob_busy_work/tlob_busy_work_done offset"
+		cleanup; return
+	fi
+
+	tlob_enable
+
+	# First binding: should succeed
+	if ! echo "5000000:${busy_offset}:${stop_offset}:${UPROBE_TARGET}" \
+	        > "${TLOB_MONITOR}" 2>/dev/null; then
+		tap_skip "uprobe_duplicate_offset" \
+			"uprobe binding rejected (CONFIG_UPROBES=y needed)"
+		cleanup; return
+	fi
+
+	# Second binding with same offset_start: must be rejected
+	if echo "9999:${busy_offset}:${stop_offset}:${UPROBE_TARGET}" \
+	        > "${TLOB_MONITOR}" 2>/dev/null; then
+		echo "-${busy_offset}:${UPROBE_TARGET}" > "${TLOB_MONITOR}" 2>/dev/null
+		tap_fail "uprobe_duplicate_offset" \
+			"duplicate offset_start was accepted (expected error)"
+		cleanup; return
+	fi
+
+	echo "-${busy_offset}:${UPROBE_TARGET}" > "${TLOB_MONITOR}" 2>/dev/null
+	tap_pass "uprobe_duplicate_offset"
+	cleanup
+}
+
+
+#
+# Region A: tlob_busy_work with a 5 s budget - should NOT fire during the test.
+# Region B: tlob_busy_work_done with a 10 us budget - SHOULD fire quickly since
+#           tlob_uprobe_target calls tlob_busy_work_done after a busy spin.
+#
+# Verifies that independent bindings for different offsets in the same binary
+# are tracked separately and that only the tight-budget binding triggers a
+# budget_exceeded event.
+# ---------------------------------------------------------------------------
+run_test_uprobe_independent_thresholds() {
+	next_test; cleanup
+	if [ ! -e "${TLOB_MONITOR}" ]; then
+		tap_skip "uprobe_independent_thresholds" \
+			"monitor file not available"; cleanup; return
+	fi
+	if [ ! -x "${UPROBE_TARGET}" ]; then
+		tap_skip "uprobe_independent_thresholds" \
+			"tlob_uprobe_target not found or not executable"
+		cleanup; return
+	fi
+
+	busy_offset=$(get_uprobe_offset "${UPROBE_TARGET}" "tlob_busy_work")
+	busy_stop_offset=$(get_uprobe_offset "${UPROBE_TARGET}" "tlob_busy_work_done")
+	if [ -z "${busy_offset}" ] || [ -z "${busy_stop_offset}" ]; then
+		tap_skip "uprobe_independent_thresholds" \
+			"cannot resolve tlob_busy_work/tlob_busy_work_done offset"
+		cleanup; return
+	fi
+
+	"${UPROBE_TARGET}" 30000 &
+	busy_pid=$!
+	sleep 0.05
+
+	trace_event_enable
+	trace_on
+	tlob_enable
+	trace_clear
+
+	# Region A: generous 5 s budget on tlob_busy_work entry (should not fire)
+	if ! echo "5000000:${busy_offset}:${busy_stop_offset}:${UPROBE_TARGET}" \
+	        > "${TLOB_MONITOR}" 2>/dev/null; then
+		kill "${busy_pid}" 2>/dev/null; wait "${busy_pid}" 2>/dev/null
+		tap_skip "uprobe_independent_thresholds" \
+			"uprobe binding rejected (CONFIG_UPROBES=y needed)"
+		cleanup; return
+	fi
+	# Region B: tight 10 us budget on tlob_busy_work_done (fires quickly)
+	echo "10:${busy_stop_offset}:${busy_stop_offset}:${UPROBE_TARGET}" \
+		> "${TLOB_MONITOR}" 2>/dev/null
+
+	found=0; i=0
+	while [ "$i" -lt 20 ]; do
+		sleep 0.1
+		trace_grep "budget_exceeded" && { found=1; break; }
+		i=$((i+1))
+	done
+
+	echo "-${busy_offset}:${UPROBE_TARGET}" > "${TLOB_MONITOR}" 2>/dev/null
+	echo "-${busy_stop_offset}:${UPROBE_TARGET}" > "${TLOB_MONITOR}" 2>/dev/null
+	kill "${busy_pid}" 2>/dev/null; wait "${busy_pid}" 2>/dev/null
+
+	if [ "${found}" != "1" ]; then
+		tap_fail "uprobe_independent_thresholds" \
+			"budget_exceeded not raised for tight-budget region within 2 s"
+		cleanup; return
+	fi
+
+	# The violation must carry threshold=10 (Region B's budget).
+	ev=$(grep "budget_exceeded" "${TRACE_FILE}" | head -n 1)
+	if ! echo "${ev}" | grep -q "threshold=10 "; then
+		tap_fail "uprobe_independent_thresholds" \
+			"violation threshold is not Region B's 10 us: ${ev}"
+		cleanup; return
+	fi
+	tap_pass "uprobe_independent_thresholds"
+	cleanup
+}
+
+# ---------------------------------------------------------------------------
+# ioctl tests via tlob_helper
+#
+# Each test invokes the helper with a sub-test name.
+# Exit code: 0=pass, 1=fail, 2=skip.
+# ---------------------------------------------------------------------------
+run_ioctl_test() {
+	testname=$1
+	next_test
+
+	if [ ! -x "${IOCTL_HELPER}" ]; then
+		tap_skip "ioctl_${testname}" \
+			"tlob_helper not found or not executable"
+		return
+	fi
+	if [ ! -c "${RV_DEV}" ]; then
+		tap_skip "ioctl_${testname}" \
+			"${RV_DEV} not present (CONFIG_RV_CHARDEV=y needed)"
+		return
+	fi
+
+	tlob_enable
+	"${IOCTL_HELPER}" "${testname}"
+	rc=$?
+	tlob_disable
+
+	case "${rc}" in
+	0) tap_pass "ioctl_${testname}" ;;
+	2) tap_skip "ioctl_${testname}" "helper returned skip" ;;
+	*) tap_fail "ioctl_${testname}" "helper exited with code ${rc}" ;;
+	esac
+}
+
+# run_ioctl_test_not_enabled - like run_ioctl_test but deliberately does NOT
+# enable the tlob monitor before invoking the helper.  Used to verify that
+# ioctls issued against a disabled monitor return ENODEV rather than crashing
+# the kernel with a NULL pointer dereference.
+run_ioctl_test_not_enabled()
+{
+	next_test
+
+	if [ ! -x "${IOCTL_HELPER}" ]; then
+		tap_skip "ioctl_not_enabled" \
+			"tlob_helper not found or not executable"
+		return
+	fi
+	if [ ! -c "${RV_DEV}" ]; then
+		tap_skip "ioctl_not_enabled" \
+			"${RV_DEV} not present (CONFIG_RV_CHARDEV=y needed)"
+		return
+	fi
+
+	# Monitor intentionally left disabled.
+	tlob_disable
+	"${IOCTL_HELPER}" not_enabled
+	rc=$?
+
+	case "${rc}" in
+	0) tap_pass "ioctl_not_enabled" ;;
+	2) tap_skip "ioctl_not_enabled" "helper returned skip" ;;
+	*) tap_fail "ioctl_not_enabled" "helper exited with code ${rc}" ;;
+	esac
+}
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+check_root; check_tracefs; check_rv_dir; check_tlob
+tap_header; tap_plan 20
+
+# tracefs interface tests
+run_test_enable_disable
+run_test_tracefs_files
+
+# uprobe external monitoring tests
+run_test_uprobe_no_false_positive
+run_test_uprobe_violation
+run_test_uprobe_unbind
+run_test_uprobe_duplicate_offset
+run_test_uprobe_independent_thresholds
+
+# /dev/rv ioctl self-instrumentation tests
+run_ioctl_test_not_enabled
+run_ioctl_test within_budget
+run_ioctl_test over_budget_cpu
+run_ioctl_test over_budget_sleep
+run_ioctl_test double_start
+run_ioctl_test stop_no_start
+run_ioctl_test multi_thread
+run_ioctl_test self_watch
+run_ioctl_test invalid_flags
+run_ioctl_test notify_fd_bad
+run_ioctl_test mmap_basic
+run_ioctl_test mmap_errors
+run_ioctl_test mmap_consume
+
+echo "# Passed: ${t_pass} Failed: ${t_fail} Skipped: ${t_skip}"
+[ "${t_fail}" -gt 0 ] && exit 1 || exit 0
diff --git a/tools/testing/selftests/rv/tlob_helper.c b/tools/testing/selftests/rv/tlob_helper.c
new file mode 100644
index 000000000..cd76b56d1
--- /dev/null
+++ b/tools/testing/selftests/rv/tlob_helper.c
@@ -0,0 +1,994 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tlob_helper.c - test helper and ELF utility for tlob selftests
+ *
+ * Called by test_tlob.sh to exercise the /dev/rv ioctl interface and to
+ * resolve ELF symbol offsets for uprobe bindings.  One subcommand per
+ * invocation so the shell script can report each as an independent TAP
+ * test case.
+ *
+ * Usage: tlob_helper <subcommand> [args...]
+ *
+ * Synchronous TRACE_START / TRACE_STOP tests:
+ *   not_enabled        - TRACE_START without tlob enabled -> ENODEV (no kernel crash)
+ *   within_budget      - start(50000 us), sleep 10 ms, stop -> expect 0
+ *   over_budget_cpu    - start(5000 us), busyspin 100 ms, stop -> EOVERFLOW
+ *   over_budget_sleep  - start(3000 us), sleep 50 ms, stop -> EOVERFLOW
+ *
+ * Error-handling tests:
+ *   double_start       - two starts without stop -> EEXIST on second
+ *   stop_no_start      - stop without start -> ESRCH
+ *
+ * Per-thread isolation test:
+ *   multi_thread       - two threads share one fd; one within budget, one over
+ *
+ * Asynchronous notification test (notify_fd + read()):
+ *   self_watch         - one worker exceeds budget; monitor fd receives one ntf via read()
+ *
+ * Input-validation tests (TRACE_START error paths):
+ *   invalid_flags      - TRACE_START with flags != 0 -> EINVAL
+ *   notify_fd_bad      - TRACE_START with notify_fd = stdout (non-rv fd) -> EINVAL
+ *
+ * mmap ring buffer tests (Scenario D):
+ *   mmap_basic         - mmap succeeds; verify tlob_mmap_page fields
+ *                        (version, capacity, data_offset, record_size)
+ *   mmap_errors        - MAP_PRIVATE, wrong size, and non-zero pgoff all
+ *                        return EINVAL
+ *   mmap_consume       - trigger a real violation via self-notification and
+ *                        consume the event through the mmap'd ring
+ *
+ * ELF utility (does not require /dev/rv):
+ *   sym_offset <binary> <symbol>
+ *                      - print the ELF file offset of <symbol> in <binary>
+ *                        (used by the shell script to build uprobe bindings)
+ *
+ * Exit code: 0 = pass, 1 = fail, 2 = skip (device not available).
+ */
+#define _GNU_SOURCE
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/rv.h>
+
+/* Default ring capacity allocated at open(); matches TLOB_RING_DEFAULT_CAP. */
+#define TLOB_RING_DEFAULT_CAP	64U
+
+static int rv_fd = -1;
+
+static int open_rv(void)
+{
+	rv_fd = open("/dev/rv", O_RDWR);
+	if (rv_fd < 0) {
+		fprintf(stderr, "open /dev/rv: %s\n", strerror(errno));
+		return -1;
+	}
+	return 0;
+}
+
+static void busy_spin_us(unsigned long us)
+{
+	struct timespec start, now;
+	unsigned long elapsed;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	do {
+		clock_gettime(CLOCK_MONOTONIC, &now);
+		elapsed = (unsigned long)(now.tv_sec - start.tv_sec)
+			  * 1000000000UL
+			+ (unsigned long)(now.tv_nsec - start.tv_nsec);
+	} while (elapsed < us * 1000UL);
+}
+
+static int do_start(uint64_t threshold_us)
+{
+	struct tlob_start_args args = {
+		.threshold_us = threshold_us,
+		.notify_fd    = -1,
+	};
+
+	return ioctl(rv_fd, TLOB_IOCTL_TRACE_START, &args);
+}
+
+static int do_stop(void)
+{
+	return ioctl(rv_fd, TLOB_IOCTL_TRACE_STOP, NULL);
+}
+
+/* -----------------------------------------------------------------------
+ * Synchronous TRACE_START / TRACE_STOP tests
+ * -----------------------------------------------------------------------
+ */
+
+/*
+ * test_not_enabled - TRACE_START must return ENODEV when the tlob monitor
+ * has not been enabled (tlob_state_cache is NULL).
+ *
+ * The shell wrapper deliberately does NOT call tlob_enable before invoking
+ * this subcommand, so the ioctl is expected to fail with ENODEV rather than
+ * crashing the kernel with a NULL pointer dereference in kmem_cache_alloc.
+ */
+static int test_not_enabled(void)
+{
+	int ret;
+
+	ret = do_start(1000);
+	if (ret == 0) {
+		fprintf(stderr, "TRACE_START: expected ENODEV, got success\n");
+		do_stop();
+		return 1;
+	}
+	if (errno != ENODEV) {
+		fprintf(stderr, "TRACE_START: expected ENODEV, got %s\n",
+			strerror(errno));
+		return 1;
+	}
+	return 0;
+}
+
+static int test_within_budget(void)
+{
+	int ret;
+
+	if (do_start(50000) < 0) {
+		fprintf(stderr, "TRACE_START: %s\n", strerror(errno));
+		return 1;
+	}
+	usleep(10000); /* 10 ms < 50 ms budget */
+	ret = do_stop();
+	if (ret != 0) {
+		fprintf(stderr, "TRACE_STOP: expected 0, got %d errno=%s\n",
+			ret, strerror(errno));
+		return 1;
+	}
+	return 0;
+}
+
+static int test_over_budget_cpu(void)
+{
+	int ret;
+
+	if (do_start(5000) < 0) {
+		fprintf(stderr, "TRACE_START: %s\n", strerror(errno));
+		return 1;
+	}
+	busy_spin_us(100000); /* 100 ms >> 5 ms budget */
+	ret = do_stop();
+	if (ret == 0) {
+		fprintf(stderr, "TRACE_STOP: expected EOVERFLOW, got 0\n");
+		return 1;
+	}
+	if (errno != EOVERFLOW) {
+		fprintf(stderr, "TRACE_STOP: expected EOVERFLOW, got %s\n",
+			strerror(errno));
+		return 1;
+	}
+	return 0;
+}
+
+static int test_over_budget_sleep(void)
+{
+	int ret;
+
+	if (do_start(3000) < 0) {
+		fprintf(stderr, "TRACE_START: %s\n", strerror(errno));
+		return 1;
+	}
+	usleep(50000); /* 50 ms >> 3 ms budget, off-CPU time counts */
+	ret = do_stop();
+	if (ret == 0) {
+		fprintf(stderr, "TRACE_STOP: expected EOVERFLOW, got 0\n");
+		return 1;
+	}
+	if (errno != EOVERFLOW) {
+		fprintf(stderr, "TRACE_STOP: expected EOVERFLOW, got %s\n",
+			strerror(errno));
+		return 1;
+	}
+	return 0;
+}
+
+/* -----------------------------------------------------------------------
+ * Error-handling tests
+ * -----------------------------------------------------------------------
+ */
+
+static int test_double_start(void)
+{
+	int ret;
+
+	if (do_start(10000000) < 0) {
+		fprintf(stderr, "first TRACE_START: %s\n", strerror(errno));
+		return 1;
+	}
+	ret = do_start(10000000);
+	if (ret == 0) {
+		fprintf(stderr, "second TRACE_START: expected EEXIST, got 0\n");
+		do_stop();
+		return 1;
+	}
+	if (errno != EEXIST) {
+		fprintf(stderr, "second TRACE_START: expected EEXIST, got %s\n",
+			strerror(errno));
+		do_stop();
+		return 1;
+	}
+	do_stop(); /* clean up */
+	return 0;
+}
+
+static int test_stop_no_start(void)
+{
+	int ret;
+
+	/* Ensure clean state: ignore error from a stale entry */
+	do_stop();
+
+	ret = do_stop();
+	if (ret == 0) {
+		fprintf(stderr, "TRACE_STOP: expected ESRCH, got 0\n");
+		return 1;
+	}
+	if (errno != ESRCH) {
+		fprintf(stderr, "TRACE_STOP: expected ESRCH, got %s\n",
+			strerror(errno));
+		return 1;
+	}
+	return 0;
+}
+
+/* -----------------------------------------------------------------------
+ * Per-thread isolation test
+ *
+ * Two threads share a single /dev/rv fd.  The monitor uses task_struct *
+ * as the key, so each thread gets an independent slot regardless of the
+ * shared fd.
+ * -----------------------------------------------------------------------
+ */
+
+struct mt_thread_args {
+	uint64_t      threshold_us;
+	unsigned long workload_us;
+	int           busy;
+	int           expect_eoverflow;
+	int           result;
+};
+
+static void *mt_thread_fn(void *arg)
+{
+	struct mt_thread_args *a = arg;
+	int ret;
+
+	if (do_start(a->threshold_us) < 0) {
+		fprintf(stderr, "thread TRACE_START: %s\n", strerror(errno));
+		a->result = 1;
+		return NULL;
+	}
+
+	if (a->busy)
+		busy_spin_us(a->workload_us);
+	else
+		usleep(a->workload_us);
+
+	ret = do_stop();
+	if (a->expect_eoverflow) {
+		if (ret == 0 || errno != EOVERFLOW) {
+			fprintf(stderr, "thread: expected EOVERFLOW, got ret=%d errno=%s\n",
+				ret, strerror(errno));
+			a->result = 1;
+			return NULL;
+		}
+	} else {
+		if (ret != 0) {
+			fprintf(stderr, "thread: expected 0, got ret=%d errno=%s\n",
+				ret, strerror(errno));
+			a->result = 1;
+			return NULL;
+		}
+	}
+	a->result = 0;
+	return NULL;
+}
+
+static int test_multi_thread(void)
+{
+	pthread_t ta, tb;
+	struct mt_thread_args a = {
+		.threshold_us     = 20000,  /* 20 ms */
+		.workload_us      = 5000,   /* 5 ms sleep -> within budget */
+		.busy             = 0,
+		.expect_eoverflow = 0,
+	};
+	struct mt_thread_args b = {
+		.threshold_us     = 3000,   /* 3 ms */
+		.workload_us      = 30000,  /* 30 ms spin -> over budget */
+		.busy             = 1,
+		.expect_eoverflow = 1,
+	};
+
+	pthread_create(&ta, NULL, mt_thread_fn, &a);
+	pthread_create(&tb, NULL, mt_thread_fn, &b);
+	pthread_join(ta, NULL);
+	pthread_join(tb, NULL);
+
+	return (a.result || b.result) ? 1 : 0;
+}
+
+/* -----------------------------------------------------------------------
+ * Asynchronous notification test (notify_fd + read())
+ *
+ * A dedicated monitor_fd is opened by the main thread.  Two worker threads
+ * each open their own work_fd and call TLOB_IOCTL_TRACE_START with
+ * notify_fd = monitor_fd, nominating it as the violation target.  Worker A
+ * stays within budget; worker B exceeds it.  The main thread reads from
+ * monitor_fd and expects exactly one tlob_event record.
+ * -----------------------------------------------------------------------
+ */
+
+struct sw_worker_args {
+	int           monitor_fd;
+	uint64_t      threshold_us;
+	unsigned long workload_us;
+	int           busy;
+	int           result;
+};
+
+static void *sw_worker_fn(void *arg)
+{
+	struct sw_worker_args *a = arg;
+	struct tlob_start_args args = {
+		.threshold_us = a->threshold_us,
+		.notify_fd    = a->monitor_fd,
+	};
+	int work_fd;
+	int ret;
+
+	work_fd = open("/dev/rv", O_RDWR);
+	if (work_fd < 0) {
+		fprintf(stderr, "worker open /dev/rv: %s\n", strerror(errno));
+		a->result = 1;
+		return NULL;
+	}
+
+	ret = ioctl(work_fd, TLOB_IOCTL_TRACE_START, &args);
+	if (ret < 0) {
+		fprintf(stderr, "TRACE_START (notify): %s\n", strerror(errno));
+		close(work_fd);
+		a->result = 1;
+		return NULL;
+	}
+
+	if (a->busy)
+		busy_spin_us(a->workload_us);
+	else
+		usleep(a->workload_us);
+
+	ioctl(work_fd, TLOB_IOCTL_TRACE_STOP, NULL);
+	close(work_fd);
+	a->result = 0;
+	return NULL;
+}
+
+static int test_self_watch(void)
+{
+	int monitor_fd;
+	pthread_t ta, tb;
+	struct sw_worker_args a = {
+		.threshold_us = 50000,  /* 50 ms */
+		.workload_us  = 5000,   /* 5 ms sleep -> no violation */
+		.busy         = 0,
+	};
+	struct sw_worker_args b = {
+		.threshold_us = 3000,   /* 3 ms */
+		.workload_us  = 30000,  /* 30 ms spin -> violation */
+		.busy         = 1,
+	};
+	struct tlob_event ntfs[8];
+	int violations = 0;
+	ssize_t n;
+
+	/*
+	 * Open monitor_fd with O_NONBLOCK so read() after the workers finish
+	 * returns immediately rather than blocking forever.
+	 */
+	monitor_fd = open("/dev/rv", O_RDWR | O_NONBLOCK);
+	if (monitor_fd < 0) {
+		fprintf(stderr, "open /dev/rv (monitor_fd): %s\n", strerror(errno));
+		return 1;
+	}
+	a.monitor_fd = monitor_fd;
+	b.monitor_fd = monitor_fd;
+
+	pthread_create(&ta, NULL, sw_worker_fn, &a);
+	pthread_create(&tb, NULL, sw_worker_fn, &b);
+	pthread_join(ta, NULL);
+	pthread_join(tb, NULL);
+
+	if (a.result || b.result) {
+		close(monitor_fd);
+		return 1;
+	}
+
+	/*
+	 * Drain all available tlob_event records.  With O_NONBLOCK the final
+	 * read() returns -EAGAIN when the buffer is empty.
+	 */
+	while ((n = read(monitor_fd, ntfs, sizeof(ntfs))) > 0)
+		violations += (int)(n / sizeof(struct tlob_event));
+
+	close(monitor_fd);
+
+	if (violations != 1) {
+		fprintf(stderr, "self_watch: expected 1 violation, got %d\n",
+			violations);
+		return 1;
+	}
+	return 0;
+}
+
+/* -----------------------------------------------------------------------
+ * Input-validation tests (TRACE_START error paths)
+ * -----------------------------------------------------------------------
+ */
+
+/*
+ * test_invalid_flags - TRACE_START with flags != 0 must return EINVAL.
+ *
+ * The flags field is reserved for future extensions and must be zero.
+ * Callers that set it to a non-zero value are rejected early so that a
+ * future kernel can assign meaning to those bits without silently
+ * ignoring them.
+ */
+static int test_invalid_flags(void)
+{
+	struct tlob_start_args args = {
+		.threshold_us = 1000,
+		.notify_fd    = -1,
+		.flags        = 1,   /* non-zero: must be rejected */
+	};
+	int ret;
+
+	ret = ioctl(rv_fd, TLOB_IOCTL_TRACE_START, &args);
+	if (ret == 0) {
+		fprintf(stderr, "TRACE_START(flags=1): expected EINVAL, got success\n");
+		do_stop();
+		return 1;
+	}
+	if (errno != EINVAL) {
+		fprintf(stderr, "TRACE_START(flags=1): expected EINVAL, got %s\n",
+			strerror(errno));
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * test_notify_fd_bad - TRACE_START with a non-/dev/rv notify_fd must return
+ * EINVAL.
+ *
+ * When notify_fd >= 0, the kernel resolves it to a struct file and checks
+ * that its private_data is non-NULL (i.e. it is a /dev/rv file descriptor).
+ * Passing stdout (fd 1) supplies a real, open fd whose private_data is NULL,
+ * so the kernel must reject it with EINVAL.
+ */
+static int test_notify_fd_bad(void)
+{
+	struct tlob_start_args args = {
+		.threshold_us = 1000,
+		.notify_fd    = STDOUT_FILENO,   /* open but not a /dev/rv fd */
+		.flags        = 0,
+	};
+	int ret;
+
+	ret = ioctl(rv_fd, TLOB_IOCTL_TRACE_START, &args);
+	if (ret == 0) {
+		fprintf(stderr,
+			"TRACE_START(notify_fd=stdout): expected EINVAL, got success\n");
+		do_stop();
+		return 1;
+	}
+	if (errno != EINVAL) {
+		fprintf(stderr,
+			"TRACE_START(notify_fd=stdout): expected EINVAL, got %s\n",
+			strerror(errno));
+		return 1;
+	}
+	return 0;
+}
+
+/* -----------------------------------------------------------------------
+ * mmap ring buffer tests (Scenario D)
+ * -----------------------------------------------------------------------
+ */
+
+/*
+ * test_mmap_basic - mmap the ring buffer and verify the control page fields.
+ *
+ * The kernel allocates TLOB_RING_DEFAULT_CAP records at open().  A shared
+ * mmap of PAGE_SIZE + cap * record_size must succeed and the tlob_mmap_page
+ * header must contain consistent values.
+ */
+static int test_mmap_basic(void)
+{
+	long pagesize = sysconf(_SC_PAGESIZE);
+	size_t mmap_len = (size_t)pagesize +
+			  TLOB_RING_DEFAULT_CAP * sizeof(struct tlob_event);
+	/* rv_mmap requires a page-aligned length */
+	mmap_len = (mmap_len + (size_t)(pagesize - 1)) & ~(size_t)(pagesize - 1);
+	struct tlob_mmap_page *page;
+	struct tlob_event *data;
+	void *map;
+	int ret = 0;
+
+	map = mmap(NULL, mmap_len, PROT_READ | PROT_WRITE, MAP_SHARED, rv_fd, 0);
+	if (map == MAP_FAILED) {
+		fprintf(stderr, "mmap_basic: mmap: %s\n", strerror(errno));
+		return 1;
+	}
+
+	page = (struct tlob_mmap_page *)map;
+	data = (struct tlob_event *)((char *)map + page->data_offset);
+
+	if (page->version != 1) {
+		fprintf(stderr, "mmap_basic: expected version=1, got %u\n",
+			page->version);
+		ret = 1;
+		goto out;
+	}
+	if (page->capacity != TLOB_RING_DEFAULT_CAP) {
+		fprintf(stderr, "mmap_basic: expected capacity=%u, got %u\n",
+			TLOB_RING_DEFAULT_CAP, page->capacity);
+		ret = 1;
+		goto out;
+	}
+	if (page->data_offset != (uint32_t)pagesize) {
+		fprintf(stderr, "mmap_basic: expected data_offset=%ld, got %u\n",
+			pagesize, page->data_offset);
+		ret = 1;
+		goto out;
+	}
+	if (page->record_size != sizeof(struct tlob_event)) {
+		fprintf(stderr, "mmap_basic: expected record_size=%zu, got %u\n",
+			sizeof(struct tlob_event), page->record_size);
+		ret = 1;
+		goto out;
+	}
+	if (page->data_head != 0 || page->data_tail != 0) {
+		fprintf(stderr, "mmap_basic: ring not empty at open: head=%u tail=%u\n",
+			page->data_head, page->data_tail);
+		ret = 1;
+		goto out;
+	}
+	/* Touch the data array to confirm it is accessible. */
+	(void)data[0].tid;
+out:
+	munmap(map, mmap_len);
+	return ret;
+}
+
+/*
+ * test_mmap_errors - verify that rv_mmap() rejects invalid mmap parameters.
+ *
+ * Four cases are tested, each must return MAP_FAILED with errno == EINVAL:
+ *   1. size one page short of the correct ring length
+ *   2. size one page larger than the correct ring length
+ *   3. MAP_PRIVATE (only MAP_SHARED is permitted)
+ *   4. non-zero vm_pgoff (offset must be 0)
+ */
+static int test_mmap_errors(void)
+{
+	long pagesize = sysconf(_SC_PAGESIZE);
+	size_t correct_len = (size_t)pagesize +
+			     TLOB_RING_DEFAULT_CAP * sizeof(struct tlob_event);
+	/* rv_mmap requires a page-aligned length */
+	correct_len = (correct_len + (size_t)(pagesize - 1)) & ~(size_t)(pagesize - 1);
+	void *map;
+	int ret = 0;
+
+	/* Case 1: size one page short (correct_len - 1 still rounds up to correct_len) */
+	map = mmap(NULL, correct_len - (size_t)pagesize, PROT_READ | PROT_WRITE,
+		   MAP_SHARED, rv_fd, 0);
+	if (map != MAP_FAILED) {
+		fprintf(stderr, "mmap_errors: short-size mmap succeeded (expected EINVAL)\n");
+		munmap(map, correct_len - (size_t)pagesize);
+		ret = 1;
+	} else if (errno != EINVAL) {
+		fprintf(stderr, "mmap_errors: short-size: expected EINVAL, got %s\n",
+			strerror(errno));
+		ret = 1;
+	}
+
+	/* Case 2: size one page too large */
+	map = mmap(NULL, correct_len + (size_t)pagesize, PROT_READ | PROT_WRITE,
+		   MAP_SHARED, rv_fd, 0);
+	if (map != MAP_FAILED) {
+		fprintf(stderr, "mmap_errors: oversized mmap succeeded (expected EINVAL)\n");
+		munmap(map, correct_len + (size_t)pagesize);
+		ret = 1;
+	} else if (errno != EINVAL) {
+		fprintf(stderr, "mmap_errors: oversized: expected EINVAL, got %s\n",
+			strerror(errno));
+		ret = 1;
+	}
+
+	/* Case 3: MAP_PRIVATE instead of MAP_SHARED */
+	map = mmap(NULL, correct_len, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE, rv_fd, 0);
+	if (map != MAP_FAILED) {
+		fprintf(stderr, "mmap_errors: MAP_PRIVATE succeeded (expected EINVAL)\n");
+		munmap(map, correct_len);
+		ret = 1;
+	} else if (errno != EINVAL) {
+		fprintf(stderr, "mmap_errors: MAP_PRIVATE: expected EINVAL, got %s\n",
+			strerror(errno));
+		ret = 1;
+	}
+
+	/* Case 4: non-zero file offset (pgoff = 1) */
+	map = mmap(NULL, correct_len, PROT_READ | PROT_WRITE,
+		   MAP_SHARED, rv_fd, (off_t)pagesize);
+	if (map != MAP_FAILED) {
+		fprintf(stderr, "mmap_errors: non-zero pgoff mmap succeeded (expected EINVAL)\n");
+		munmap(map, correct_len);
+		ret = 1;
+	} else if (errno != EINVAL) {
+		fprintf(stderr, "mmap_errors: non-zero pgoff: expected EINVAL, got %s\n",
+			strerror(errno));
+		ret = 1;
+	}
+
+	return ret;
+}
+
+/*
+ * test_mmap_consume - zero-copy consumption of a real violation event.
+ *
+ * Arms a 5 ms budget with self-notification (notify_fd = rv_fd), sleeps
+ * 50 ms (off-CPU violation), then reads the pushed event through the mmap'd
+ * ring without calling read().  Verifies:
+ *   - TRACE_STOP returns EOVERFLOW (budget was exceeded)
+ *   - data_head == 1 after the violation
+ *   - the event fields (threshold_us, tag, tid) are correct
+ *   - data_tail can be advanced to consume the record (ring empties)
+ */
+static int test_mmap_consume(void)
+{
+	long pagesize = sysconf(_SC_PAGESIZE);
+	size_t mmap_len = (size_t)pagesize +
+			  TLOB_RING_DEFAULT_CAP * sizeof(struct tlob_event);
+	/* rv_mmap requires a page-aligned length */
+	mmap_len = (mmap_len + (size_t)(pagesize - 1)) & ~(size_t)(pagesize - 1);
+	struct tlob_start_args args = {
+		.threshold_us = 5000,		/* 5 ms */
+		.notify_fd    = rv_fd,		/* self-notification */
+		.tag          = 0xdeadbeefULL,
+		.flags        = 0,
+	};
+	struct tlob_mmap_page *page;
+	struct tlob_event *data;
+	void *map;
+	int stop_ret;
+	int ret = 0;
+
+	map = mmap(NULL, mmap_len, PROT_READ | PROT_WRITE, MAP_SHARED, rv_fd, 0);
+	if (map == MAP_FAILED) {
+		fprintf(stderr, "mmap_consume: mmap: %s\n", strerror(errno));
+		return 1;
+	}
+
+	page = (struct tlob_mmap_page *)map;
+	data = (struct tlob_event *)((char *)map + page->data_offset);
+
+	if (ioctl(rv_fd, TLOB_IOCTL_TRACE_START, &args) < 0) {
+		fprintf(stderr, "mmap_consume: TRACE_START: %s\n", strerror(errno));
+		ret = 1;
+		goto out;
+	}
+
+	usleep(50000); /* 50 ms >> 5 ms budget -> off-CPU violation */
+
+	stop_ret = ioctl(rv_fd, TLOB_IOCTL_TRACE_STOP, NULL);
+	if (stop_ret == 0) {
+		fprintf(stderr, "mmap_consume: TRACE_STOP returned 0, expected EOVERFLOW\n");
+		ret = 1;
+		goto out;
+	}
+	if (errno != EOVERFLOW) {
+		fprintf(stderr, "mmap_consume: TRACE_STOP: expected EOVERFLOW, got %s\n",
+			strerror(errno));
+		ret = 1;
+		goto out;
+	}
+
+	/* Pairs with smp_store_release in tlob_event_push. */
+	if (__atomic_load_n(&page->data_head, __ATOMIC_ACQUIRE) != 1) {
+		fprintf(stderr, "mmap_consume: expected data_head=1, got %u\n",
+			page->data_head);
+		ret = 1;
+		goto out;
+	}
+	if (page->data_tail != 0) {
+		fprintf(stderr, "mmap_consume: expected data_tail=0, got %u\n",
+			page->data_tail);
+		ret = 1;
+		goto out;
+	}
+
+	/* Verify record content */
+	if (data[0].threshold_us != 5000) {
+		fprintf(stderr, "mmap_consume: expected threshold_us=5000, got %llu\n",
+			(unsigned long long)data[0].threshold_us);
+		ret = 1;
+		goto out;
+	}
+	if (data[0].tag != 0xdeadbeefULL) {
+		fprintf(stderr, "mmap_consume: expected tag=0xdeadbeef, got %llx\n",
+			(unsigned long long)data[0].tag);
+		ret = 1;
+		goto out;
+	}
+	if (data[0].tid == 0) {
+		fprintf(stderr, "mmap_consume: tid is 0\n");
+		ret = 1;
+		goto out;
+	}
+
+	/* Consume: advance data_tail and confirm ring is empty */
+	__atomic_store_n(&page->data_tail, 1U, __ATOMIC_RELEASE);
+	if (__atomic_load_n(&page->data_head, __ATOMIC_ACQUIRE) !=
+	    __atomic_load_n(&page->data_tail, __ATOMIC_ACQUIRE)) {
+		fprintf(stderr, "mmap_consume: ring not empty after consume\n");
+		ret = 1;
+	}
+
+out:
+	munmap(map, mmap_len);
+	return ret;
+}
+
+/* -----------------------------------------------------------------------
+ * ELF utility: sym_offset
+ *
+ * Print the ELF file offset of a symbol in a binary.  Supports 32- and
+ * 64-bit ELF.  Walks the section headers to find .symtab (falling back to
+ * .dynsym), then converts the symbol's virtual address to a file offset
+ * via the PT_LOAD program headers.
+ *
+ * Does not require /dev/rv; used by the shell script to build uprobe
+ * bindings of the form pid:threshold_us:offset_start:offset_stop:binary_path.
+ *
+ * Returns 0 on success (offset printed to stdout), 1 on failure.
+ * -----------------------------------------------------------------------
+ */
+static int sym_offset(const char *binary, const char *symname)
+{
+	int fd;
+	struct stat st;
+	void *map;
+	Elf64_Ehdr *ehdr;
+	Elf32_Ehdr *ehdr32;
+	int is64;
+	uint64_t sym_vaddr = 0;
+	int found = 0;
+	uint64_t file_offset = 0;
+
+	fd = open(binary, O_RDONLY);
+	if (fd < 0) {
+		fprintf(stderr, "open %s: %s\n", binary, strerror(errno));
+		return 1;
+	}
+	if (fstat(fd, &st) < 0) {
+		close(fd);
+		return 1;
+	}
+	map = mmap(NULL, (size_t)st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	close(fd);
+	if (map == MAP_FAILED) {
+		fprintf(stderr, "mmap: %s\n", strerror(errno));
+		return 1;
+	}
+
+	/* Identify ELF class */
+	ehdr = (Elf64_Ehdr *)map;
+	ehdr32 = (Elf32_Ehdr *)map;
+	if (st.st_size < 4 ||
+	    ehdr->e_ident[EI_MAG0] != ELFMAG0 ||
+	    ehdr->e_ident[EI_MAG1] != ELFMAG1 ||
+	    ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
+	    ehdr->e_ident[EI_MAG3] != ELFMAG3) {
+		fprintf(stderr, "%s: not an ELF file\n", binary);
+		munmap(map, (size_t)st.st_size);
+		return 1;
+	}
+	is64 = (ehdr->e_ident[EI_CLASS] == ELFCLASS64);
+
+	if (is64) {
+		/* Walk section headers to find .symtab or .dynsym */
+		Elf64_Shdr *shdrs = (Elf64_Shdr *)((char *)map + ehdr->e_shoff);
+		Elf64_Shdr *shstrtab_hdr = &shdrs[ehdr->e_shstrndx];
+		const char *shstrtab = (char *)map + shstrtab_hdr->sh_offset;
+		int si;
+
+		/* Prefer .symtab; fall back to .dynsym */
+		for (int pass = 0; pass < 2 && !found; pass++) {
+			const char *target = pass ? ".dynsym" : ".symtab";
+
+			for (si = 0; si < ehdr->e_shnum && !found; si++) {
+				Elf64_Shdr *sh = &shdrs[si];
+				const char *name = shstrtab + sh->sh_name;
+
+				if (strcmp(name, target) != 0)
+					continue;
+
+				Elf64_Shdr *strtab_sh = &shdrs[sh->sh_link];
+				const char *strtab = (char *)map + strtab_sh->sh_offset;
+				Elf64_Sym *syms = (Elf64_Sym *)((char *)map + sh->sh_offset);
+				uint64_t nsyms = sh->sh_size / sizeof(Elf64_Sym);
+				uint64_t j;
+
+				for (j = 0; j < nsyms; j++) {
+					if (strcmp(strtab + syms[j].st_name, symname) == 0) {
+						sym_vaddr = syms[j].st_value;
+						found = 1;
+						break;
+					}
+				}
+			}
+		}
+
+		if (!found) {
+			fprintf(stderr, "symbol '%s' not found in %s\n", symname, binary);
+			munmap(map, (size_t)st.st_size);
+			return 1;
+		}
+
+		/* Convert vaddr to file offset via PT_LOAD segments */
+		Elf64_Phdr *phdrs = (Elf64_Phdr *)((char *)map + ehdr->e_phoff);
+		int pi;
+
+		for (pi = 0; pi < ehdr->e_phnum; pi++) {
+			Elf64_Phdr *ph = &phdrs[pi];
+
+			if (ph->p_type != PT_LOAD)
+				continue;
+			if (sym_vaddr >= ph->p_vaddr &&
+			    sym_vaddr < ph->p_vaddr + ph->p_filesz) {
+				file_offset = sym_vaddr - ph->p_vaddr + ph->p_offset;
+				break;
+			}
+		}
+	} else {
+		/* 32-bit ELF */
+		Elf32_Shdr *shdrs = (Elf32_Shdr *)((char *)map + ehdr32->e_shoff);
+		Elf32_Shdr *shstrtab_hdr = &shdrs[ehdr32->e_shstrndx];
+		const char *shstrtab = (char *)map + shstrtab_hdr->sh_offset;
+		int si;
+		uint32_t sym_vaddr32 = 0;
+
+		for (int pass = 0; pass < 2 && !found; pass++) {
+			const char *target = pass ? ".dynsym" : ".symtab";
+
+			for (si = 0; si < ehdr32->e_shnum && !found; si++) {
+				Elf32_Shdr *sh = &shdrs[si];
+				const char *name = shstrtab + sh->sh_name;
+
+				if (strcmp(name, target) != 0)
+					continue;
+
+				Elf32_Shdr *strtab_sh = &shdrs[sh->sh_link];
+				const char *strtab = (char *)map + strtab_sh->sh_offset;
+				Elf32_Sym *syms = (Elf32_Sym *)((char *)map + sh->sh_offset);
+				uint32_t nsyms = sh->sh_size / sizeof(Elf32_Sym);
+				uint32_t j;
+
+				for (j = 0; j < nsyms; j++) {
+					if (strcmp(strtab + syms[j].st_name, symname) == 0) {
+						sym_vaddr32 = syms[j].st_value;
+						found = 1;
+						break;
+					}
+				}
+			}
+		}
+
+		if (!found) {
+			fprintf(stderr, "symbol '%s' not found in %s\n", symname, binary);
+			munmap(map, (size_t)st.st_size);
+			return 1;
+		}
+
+		Elf32_Phdr *phdrs = (Elf32_Phdr *)((char *)map + ehdr32->e_phoff);
+		int pi;
+
+		for (pi = 0; pi < ehdr32->e_phnum; pi++) {
+			Elf32_Phdr *ph = &phdrs[pi];
+
+			if (ph->p_type != PT_LOAD)
+				continue;
+			if (sym_vaddr32 >= ph->p_vaddr &&
+			    sym_vaddr32 < ph->p_vaddr + ph->p_filesz) {
+				file_offset = sym_vaddr32 - ph->p_vaddr + ph->p_offset;
+				break;
+			}
+		}
+		sym_vaddr = sym_vaddr32;
+	}
+
+	munmap(map, (size_t)st.st_size);
+
+	if (!file_offset && sym_vaddr) {
+		fprintf(stderr, "could not map vaddr 0x%lx to file offset\n",
+			(unsigned long)sym_vaddr);
+		return 1;
+	}
+
+	printf("0x%lx\n", (unsigned long)file_offset);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int rc;
+
+	if (argc < 2) {
+		fprintf(stderr, "Usage: %s <subcommand> [args...]\n", argv[0]);
+		return 1;
+	}
+
+	/* sym_offset does not need /dev/rv */
+	if (strcmp(argv[1], "sym_offset") == 0) {
+		if (argc < 4) {
+			fprintf(stderr, "Usage: %s sym_offset <binary> <symbol>\n",
+				argv[0]);
+			return 1;
+		}
+		return sym_offset(argv[2], argv[3]);
+	}
+
+	if (open_rv() < 0)
+		return 2; /* skip */
+
+	if (strcmp(argv[1], "not_enabled") == 0)
+		rc = test_not_enabled();
+	else if (strcmp(argv[1], "within_budget") == 0)
+		rc = test_within_budget();
+	else if (strcmp(argv[1], "over_budget_cpu") == 0)
+		rc = test_over_budget_cpu();
+	else if (strcmp(argv[1], "over_budget_sleep") == 0)
+		rc = test_over_budget_sleep();
+	else if (strcmp(argv[1], "double_start") == 0)
+		rc = test_double_start();
+	else if (strcmp(argv[1], "stop_no_start") == 0)
+		rc = test_stop_no_start();
+	else if (strcmp(argv[1], "multi_thread") == 0)
+		rc = test_multi_thread();
+	else if (strcmp(argv[1], "self_watch") == 0)
+		rc = test_self_watch();
+	else if (strcmp(argv[1], "invalid_flags") == 0)
+		rc = test_invalid_flags();
+	else if (strcmp(argv[1], "notify_fd_bad") == 0)
+		rc = test_notify_fd_bad();
+	else if (strcmp(argv[1], "mmap_basic") == 0)
+		rc = test_mmap_basic();
+	else if (strcmp(argv[1], "mmap_errors") == 0)
+		rc = test_mmap_errors();
+	else if (strcmp(argv[1], "mmap_consume") == 0)
+		rc = test_mmap_consume();
+	else {
+		fprintf(stderr, "Unknown test: %s\n", argv[1]);
+		rc = 1;
+	}
+
+	close(rv_fd);
+	return rc;
+}
diff --git a/tools/testing/selftests/rv/tlob_uprobe_target.c b/tools/testing/selftests/rv/tlob_uprobe_target.c
new file mode 100644
index 000000000..6c895cb40
--- /dev/null
+++ b/tools/testing/selftests/rv/tlob_uprobe_target.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tlob_uprobe_target.c - uprobe target binary for tlob selftests.
+ *
+ * Provides two well-known probe points:
+ *   tlob_busy_work()      - start probe: arms the tlob budget timer
+ *   tlob_busy_work_done() - stop  probe: cancels the timer on completion
+ *
+ * The tlob selftest writes a five-field uprobe binding:
+ *   pid:threshold_us:binary:offset_start:offset_stop
+ * where offset_start is the file offset of tlob_busy_work and offset_stop
+ * is the file offset of tlob_busy_work_done (resolved via tlob_helper
+ * sym_offset).
+ *
+ * Both probe points are plain entry uprobes (no uretprobe).  The busy loop
+ * keeps the task on-CPU so that either the stop probe fires cleanly (within
+ * budget) or the hrtimer fires first and emits tlob_budget_exceeded (over
+ * budget).
+ *
+ * Usage: tlob_uprobe_target <duration_ms>
+ *
+ * Loops calling tlob_busy_work() in 200 ms iterations until <duration_ms>
+ * has elapsed (0 = run for ~24 hours).  Short iterations ensure the uprobe
+ * entry fires on every call even if the uprobe is installed after the
+ * program has started.
+ */
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
+static inline int timespec_before(const struct timespec *a,
+				   const struct timespec *b)
+{
+	return a->tv_sec < b->tv_sec ||
+	       (a->tv_sec == b->tv_sec && a->tv_nsec < b->tv_nsec);
+}
+
+static void timespec_add_ms(struct timespec *ts, unsigned long ms)
+{
+	ts->tv_sec  += ms / 1000;
+	ts->tv_nsec += (long)(ms % 1000) * 1000000L;
+	if (ts->tv_nsec >= 1000000000L) {
+		ts->tv_sec++;
+		ts->tv_nsec -= 1000000000L;
+	}
+}
+
+/*
+ * tlob_busy_work_done - stop-probe target.
+ *
+ * Called by tlob_busy_work() after the busy loop.  The uprobe on this
+ * function's entry fires tlob_stop_task(), cancelling the budget timer.
+ * noinline ensures the compiler never merges this function with its caller,
+ * guaranteeing the entry uprobe always fires.
+ */
+noinline void tlob_busy_work_done(void)
+{
+	/* empty: the uprobe fires on entry */
+}
+
+/*
+ * tlob_busy_work - start-probe target.
+ *
+ * The uprobe on this function's entry fires tlob_start_task(), arming the
+ * budget timer.  noinline prevents the compiler and linker (including LTO)
+ * from inlining this function into its callers, ensuring the entry uprobe
+ * fires on every call.
+ */
+noinline void tlob_busy_work(unsigned long duration_ns)
+{
+	struct timespec start, now;
+	unsigned long elapsed;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	do {
+		clock_gettime(CLOCK_MONOTONIC, &now);
+		elapsed = (unsigned long)(now.tv_sec - start.tv_sec)
+			  * 1000000000UL
+			+ (unsigned long)(now.tv_nsec - start.tv_nsec);
+	} while (elapsed < duration_ns);
+
+	tlob_busy_work_done();
+}
+
+int main(int argc, char *argv[])
+{
+	unsigned long duration_ms = 0;
+	struct timespec deadline, now;
+
+	if (argc >= 2)
+		duration_ms = strtoul(argv[1], NULL, 10);
+
+	clock_gettime(CLOCK_MONOTONIC, &deadline);
+	timespec_add_ms(&deadline, duration_ms ? duration_ms : 86400000UL);
+
+	do {
+		tlob_busy_work(200 * 1000000UL); /* 200 ms per iteration */
+		clock_gettime(CLOCK_MONOTONIC, &now);
+	} while (timespec_before(&now, &deadline));
+
+	return 0;
+}
-- 
2.43.0


  parent reply	other threads:[~2026-04-12 19:28 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-12 19:27 [RFC PATCH 0/4] rv/tlob: Add task latency over budget RV monitor wen.yang
2026-04-12 19:27 ` [RFC PATCH 1/4] rv/tlob: Add tlob model DOT file wen.yang
2026-04-13  8:19   ` Gabriele Monaco
2026-04-12 19:27 ` [RFC PATCH 2/4] rv/tlob: Add tlob deterministic automaton monitor wen.yang
2026-04-13  8:19   ` Gabriele Monaco
2026-04-16 15:09     ` Wen Yang
2026-04-16 15:35       ` Gabriele Monaco
2026-04-12 19:27 ` [RFC PATCH 3/4] rv/tlob: Add KUnit tests for the tlob monitor wen.yang
2026-04-16 12:09   ` Gabriele Monaco
2026-04-12 19:27 ` wen.yang [this message]
2026-04-16 12:00   ` [RFC PATCH 4/4] selftests/rv: Add selftest " Gabriele Monaco

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5bdd82dd8aeb1d3f955b727ae1fce9819b35c170.1776020428.git.wen.yang@linux.dev \
    --to=wen.yang@linux.dev \
    --cc=gmonaco@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mhiramat@kernel.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox