* [PATCH 0/5] rv: LTL per-cpu monitor type and real-time scheduling monitor
@ 2025-07-30 12:45 Nam Cao
2025-07-30 12:45 ` [PATCH 1/5] rv/ltl: Prepare for other monitor types Nam Cao
` (4 more replies)
0 siblings, 5 replies; 32+ messages in thread
From: Nam Cao @ 2025-07-30 12:45 UTC (permalink / raw)
To: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Gabriele Monaco, linux-trace-kernel, linux-kernel
Cc: Nam Cao
Hi,
This series adds support for linear temporal logic per-cpu monitor type,
analogous to deterministic automaton per-cpu monitor. Then a new per-cpu
monitor is added which validates real-time scheduling.
Nam Cao (5):
rv/ltl: Prepare for other monitor types
rv/ltl: Support per-cpu monitors
verification/rvgen/ltl: Support per-cpu monitor generation
sched: Add rt task enqueue/dequeue trace points
rv: Add rts monitor
Documentation/trace/rv/monitor_sched.rst | 19 +++
include/linux/rv.h | 2 +
include/rv/ltl_monitor.h | 117 +++++++++++-----
include/trace/events/sched.h | 8 ++
kernel/sched/rt.c | 4 +
kernel/trace/rv/Kconfig | 5 +
kernel/trace/rv/Makefile | 1 +
.../trace/rv/monitors/pagefault/pagefault.h | 2 +
kernel/trace/rv/monitors/rts/Kconfig | 17 +++
kernel/trace/rv/monitors/rts/rts.c | 131 ++++++++++++++++++
kernel/trace/rv/monitors/rts/rts.h | 126 +++++++++++++++++
kernel/trace/rv/monitors/rts/rts_trace.h | 15 ++
kernel/trace/rv/monitors/sleep/sleep.h | 2 +
kernel/trace/rv/rv_trace.h | 47 +++++++
tools/verification/models/sched/rts.ltl | 5 +
tools/verification/rvgen/rvgen/ltl2k.py | 48 ++++++-
.../rvgen/rvgen/templates/ltl2k/main.c | 9 +-
.../rvgen/rvgen/templates/ltl2k/trace.h | 7 +-
18 files changed, 516 insertions(+), 49 deletions(-)
create mode 100644 kernel/trace/rv/monitors/rts/Kconfig
create mode 100644 kernel/trace/rv/monitors/rts/rts.c
create mode 100644 kernel/trace/rv/monitors/rts/rts.h
create mode 100644 kernel/trace/rv/monitors/rts/rts_trace.h
create mode 100644 tools/verification/models/sched/rts.ltl
--
2.39.5
^ permalink raw reply [flat|nested] 32+ messages in thread
* [PATCH 1/5] rv/ltl: Prepare for other monitor types
2025-07-30 12:45 [PATCH 0/5] rv: LTL per-cpu monitor type and real-time scheduling monitor Nam Cao
@ 2025-07-30 12:45 ` Nam Cao
2025-07-31 9:04 ` Gabriele Monaco
2025-07-30 12:45 ` [PATCH 2/5] rv/ltl: Support per-cpu monitors Nam Cao
` (3 subsequent siblings)
4 siblings, 1 reply; 32+ messages in thread
From: Nam Cao @ 2025-07-30 12:45 UTC (permalink / raw)
To: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Gabriele Monaco, linux-trace-kernel, linux-kernel
Cc: Nam Cao
rv/ltl_monitor.h is the template file used by all LTL monitors. But only
per-task monitor is supported.
Prepare to support for other monitor types:
- Change the monitored target type into an opaque type which will be
defined differently depending on the monitor type. This type is only
defined as struct task_struct * for now.
- Separate out the per-task-specific printf format and arguments, so that
rv_cond_react() can be shared with other monitor types.
No functional change intended.
Signed-off-by: Nam Cao <namcao@linutronix.de>
---
include/linux/rv.h | 1 +
include/rv/ltl_monitor.h | 85 +++++++++++--------
.../trace/rv/monitors/pagefault/pagefault.h | 2 +
kernel/trace/rv/monitors/sleep/sleep.h | 2 +
4 files changed, 55 insertions(+), 35 deletions(-)
diff --git a/include/linux/rv.h b/include/linux/rv.h
index 14410a42faef..175438a22641 100644
--- a/include/linux/rv.h
+++ b/include/linux/rv.h
@@ -28,6 +28,7 @@ struct da_monitor {
#ifdef CONFIG_RV_LTL_MONITOR
+#define LTL_TASK_MONITOR 0
/*
* In the future, if the number of atomic propositions or the size of Buchi
* automaton is larger, we can switch to dynamic allocation. For now, the code
diff --git a/include/rv/ltl_monitor.h b/include/rv/ltl_monitor.h
index 67031a774e3d..466155fd53e6 100644
--- a/include/rv/ltl_monitor.h
+++ b/include/rv/ltl_monitor.h
@@ -16,49 +16,63 @@
#error "Please include $(MODEL_NAME).h generated by rvgen"
#endif
+#if LTL_MONITOR_TYPE == LTL_TASK_MONITOR
+
+#define TARGET_PRINT_FORMAT "%s[%d]"
+#define TARGET_PRINT_ARGS(task) task->comm, task->pid
+
+typedef struct task_struct *monitor_target;
+
+#endif
+
#ifdef CONFIG_RV_REACTORS
#define RV_MONITOR_NAME CONCATENATE(rv_, MONITOR_NAME)
static struct rv_monitor RV_MONITOR_NAME;
-static void rv_cond_react(struct task_struct *task)
+static void rv_cond_react(monitor_target target)
{
if (!rv_reacting_on() || !RV_MONITOR_NAME.react)
return;
- RV_MONITOR_NAME.react("rv: "__stringify(MONITOR_NAME)": %s[%d]: violation detected\n",
- task->comm, task->pid);
+
+ RV_MONITOR_NAME.react(
+ "rv: "__stringify(MONITOR_NAME)": "TARGET_PRINT_FORMAT": violation detected\n",
+ TARGET_PRINT_ARGS(target));
}
#else
-static void rv_cond_react(struct task_struct *task)
+static void rv_cond_react(monitor_target target)
{
}
#endif
-static int ltl_monitor_slot = RV_PER_TASK_MONITOR_INIT;
+static void ltl_atoms_fetch(monitor_target target, struct ltl_monitor *mon);
+static void ltl_atoms_init(monitor_target target, struct ltl_monitor *mon, bool target_creation);
-static void ltl_atoms_fetch(struct task_struct *task, struct ltl_monitor *mon);
-static void ltl_atoms_init(struct task_struct *task, struct ltl_monitor *mon, bool task_creation);
+#if LTL_MONITOR_TYPE == LTL_TASK_MONITOR
+static int ltl_monitor_slot = RV_PER_TASK_MONITOR_INIT;
-static struct ltl_monitor *ltl_get_monitor(struct task_struct *task)
+static struct ltl_monitor *ltl_get_monitor(monitor_target target)
{
- return &task->rv[ltl_monitor_slot].ltl_mon;
+ return &target->rv[ltl_monitor_slot].ltl_mon;
}
+#endif
-static void ltl_task_init(struct task_struct *task, bool task_creation)
+static void ltl_target_init(monitor_target target, bool target_creation)
{
- struct ltl_monitor *mon = ltl_get_monitor(task);
+ struct ltl_monitor *mon = ltl_get_monitor(target);
memset(&mon->states, 0, sizeof(mon->states));
for (int i = 0; i < LTL_NUM_ATOM; ++i)
__set_bit(i, mon->unknown_atoms);
- ltl_atoms_init(task, mon, task_creation);
- ltl_atoms_fetch(task, mon);
+ ltl_atoms_init(target, mon, target_creation);
+ ltl_atoms_fetch(target, mon);
}
+#if LTL_MONITOR_TYPE == LTL_TASK_MONITOR
static void handle_task_newtask(void *data, struct task_struct *task, unsigned long flags)
{
- ltl_task_init(task, true);
+ ltl_target_init(task, true);
}
static int ltl_monitor_init(void)
@@ -77,10 +91,10 @@ static int ltl_monitor_init(void)
read_lock(&tasklist_lock);
for_each_process_thread(g, p)
- ltl_task_init(p, false);
+ ltl_target_init(p, false);
for_each_present_cpu(cpu)
- ltl_task_init(idle_task(cpu), false);
+ ltl_target_init(idle_task(cpu), false);
read_unlock(&tasklist_lock);
@@ -94,17 +108,18 @@ static void ltl_monitor_destroy(void)
rv_put_task_monitor_slot(ltl_monitor_slot);
ltl_monitor_slot = RV_PER_TASK_MONITOR_INIT;
}
+#endif
-static void ltl_illegal_state(struct task_struct *task, struct ltl_monitor *mon)
+static void ltl_illegal_state(monitor_target target, struct ltl_monitor *mon)
{
- CONCATENATE(trace_error_, MONITOR_NAME)(task);
- rv_cond_react(task);
+ CONCATENATE(trace_error_, MONITOR_NAME)(target);
+ rv_cond_react(target);
}
-static void ltl_attempt_start(struct task_struct *task, struct ltl_monitor *mon)
+static void ltl_attempt_start(monitor_target target, struct ltl_monitor *mon)
{
if (rv_ltl_all_atoms_known(mon))
- ltl_start(task, mon);
+ ltl_start(target, mon);
}
static inline void ltl_atom_set(struct ltl_monitor *mon, enum ltl_atom atom, bool value)
@@ -117,7 +132,7 @@ static inline void ltl_atom_set(struct ltl_monitor *mon, enum ltl_atom atom, boo
}
static void
-ltl_trace_event(struct task_struct *task, struct ltl_monitor *mon, unsigned long *next_state)
+ltl_trace_event(monitor_target target, struct ltl_monitor *mon, unsigned long *next_state)
{
const char *format_str = "%s";
DECLARE_SEQ_BUF(atoms, 64);
@@ -137,10 +152,10 @@ ltl_trace_event(struct task_struct *task, struct ltl_monitor *mon, unsigned long
}
}
- CONCATENATE(trace_event_, MONITOR_NAME)(task, states, atoms.buffer, next);
+ CONCATENATE(trace_event_, MONITOR_NAME)(target, states, atoms.buffer, next);
}
-static void ltl_validate(struct task_struct *task, struct ltl_monitor *mon)
+static void ltl_validate(monitor_target target, struct ltl_monitor *mon)
{
DECLARE_BITMAP(next_states, RV_MAX_BA_STATES) = {0};
@@ -152,35 +167,35 @@ static void ltl_validate(struct task_struct *task, struct ltl_monitor *mon)
ltl_possible_next_states(mon, i, next_states);
}
- ltl_trace_event(task, mon, next_states);
+ ltl_trace_event(target, mon, next_states);
memcpy(mon->states, next_states, sizeof(next_states));
if (!rv_ltl_valid_state(mon))
- ltl_illegal_state(task, mon);
+ ltl_illegal_state(target, mon);
}
-static void ltl_atom_update(struct task_struct *task, enum ltl_atom atom, bool value)
+static void ltl_atom_update(monitor_target target, enum ltl_atom atom, bool value)
{
- struct ltl_monitor *mon = ltl_get_monitor(task);
+ struct ltl_monitor *mon = ltl_get_monitor(target);
ltl_atom_set(mon, atom, value);
- ltl_atoms_fetch(task, mon);
+ ltl_atoms_fetch(target, mon);
if (!rv_ltl_valid_state(mon)) {
- ltl_attempt_start(task, mon);
+ ltl_attempt_start(target, mon);
return;
}
- ltl_validate(task, mon);
+ ltl_validate(target, mon);
}
-static void __maybe_unused ltl_atom_pulse(struct task_struct *task, enum ltl_atom atom, bool value)
+static void __maybe_unused ltl_atom_pulse(monitor_target target, enum ltl_atom atom, bool value)
{
- struct ltl_monitor *mon = ltl_get_monitor(task);
+ struct ltl_monitor *mon = ltl_get_monitor(target);
- ltl_atom_update(task, atom, value);
+ ltl_atom_update(target, atom, value);
ltl_atom_set(mon, atom, !value);
- ltl_validate(task, mon);
+ ltl_validate(target, mon);
}
diff --git a/kernel/trace/rv/monitors/pagefault/pagefault.h b/kernel/trace/rv/monitors/pagefault/pagefault.h
index c580ec194009..cc825be51ffc 100644
--- a/kernel/trace/rv/monitors/pagefault/pagefault.h
+++ b/kernel/trace/rv/monitors/pagefault/pagefault.h
@@ -11,6 +11,8 @@
#define MONITOR_NAME pagefault
+#define LTL_MONITOR_TYPE LTL_TASK_MONITOR
+
enum ltl_atom {
LTL_PAGEFAULT,
LTL_RT,
diff --git a/kernel/trace/rv/monitors/sleep/sleep.h b/kernel/trace/rv/monitors/sleep/sleep.h
index 2ab46fd218d2..c3752bc9f93f 100644
--- a/kernel/trace/rv/monitors/sleep/sleep.h
+++ b/kernel/trace/rv/monitors/sleep/sleep.h
@@ -11,6 +11,8 @@
#define MONITOR_NAME sleep
+#define LTL_MONITOR_TYPE LTL_TASK_MONITOR
+
enum ltl_atom {
LTL_ABORT_SLEEP,
LTL_BLOCK_ON_RT_MUTEX,
--
2.39.5
^ permalink raw reply related [flat|nested] 32+ messages in thread
* [PATCH 2/5] rv/ltl: Support per-cpu monitors
2025-07-30 12:45 [PATCH 0/5] rv: LTL per-cpu monitor type and real-time scheduling monitor Nam Cao
2025-07-30 12:45 ` [PATCH 1/5] rv/ltl: Prepare for other monitor types Nam Cao
@ 2025-07-30 12:45 ` Nam Cao
2025-07-31 8:02 ` Gabriele Monaco
2025-07-30 12:45 ` [PATCH 3/5] verification/rvgen/ltl: Support per-cpu monitor generation Nam Cao
` (2 subsequent siblings)
4 siblings, 1 reply; 32+ messages in thread
From: Nam Cao @ 2025-07-30 12:45 UTC (permalink / raw)
To: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Gabriele Monaco, linux-trace-kernel, linux-kernel
Cc: Nam Cao
Add support for per-cpu run-time verification linear temporal logic
monitors. This is analogous to deterministic automaton per-cpu monitors.
Signed-off-by: Nam Cao <namcao@linutronix.de>
---
include/linux/rv.h | 1 +
include/rv/ltl_monitor.h | 32 ++++++++++++++++++++++++++
kernel/trace/rv/Kconfig | 4 ++++
kernel/trace/rv/rv_trace.h | 47 ++++++++++++++++++++++++++++++++++++++
4 files changed, 84 insertions(+)
diff --git a/include/linux/rv.h b/include/linux/rv.h
index 175438a22641..a41ae5980099 100644
--- a/include/linux/rv.h
+++ b/include/linux/rv.h
@@ -29,6 +29,7 @@ struct da_monitor {
#ifdef CONFIG_RV_LTL_MONITOR
#define LTL_TASK_MONITOR 0
+#define LTL_CPU_MONITOR 1
/*
* In the future, if the number of atomic propositions or the size of Buchi
* automaton is larger, we can switch to dynamic allocation. For now, the code
diff --git a/include/rv/ltl_monitor.h b/include/rv/ltl_monitor.h
index 466155fd53e6..29bbf86d1a52 100644
--- a/include/rv/ltl_monitor.h
+++ b/include/rv/ltl_monitor.h
@@ -23,12 +23,21 @@
typedef struct task_struct *monitor_target;
+#elif LTL_MONITOR_TYPE == LTL_CPU_MONITOR
+
+#define TARGET_PRINT_FORMAT "%u"
+#define TARGET_PRINT_ARGS(cpu) cpu
+
+typedef unsigned int monitor_target;
+
#endif
#ifdef CONFIG_RV_REACTORS
#define RV_MONITOR_NAME CONCATENATE(rv_, MONITOR_NAME)
static struct rv_monitor RV_MONITOR_NAME;
+static struct ltl_monitor *ltl_get_monitor(monitor_target target);
+
static void rv_cond_react(monitor_target target)
{
if (!rv_reacting_on() || !RV_MONITOR_NAME.react)
@@ -54,6 +63,13 @@ static struct ltl_monitor *ltl_get_monitor(monitor_target target)
{
return &target->rv[ltl_monitor_slot].ltl_mon;
}
+#elif LTL_MONITOR_TYPE == LTL_CPU_MONITOR
+static struct ltl_monitor *ltl_get_monitor(unsigned int cpu)
+{
+ static DEFINE_PER_CPU(struct ltl_monitor, ltl_monitor);
+
+ return per_cpu_ptr(<l_monitor, cpu);
+}
#endif
static void ltl_target_init(monitor_target target, bool target_creation)
@@ -108,6 +124,22 @@ static void ltl_monitor_destroy(void)
rv_put_task_monitor_slot(ltl_monitor_slot);
ltl_monitor_slot = RV_PER_TASK_MONITOR_INIT;
}
+
+#elif LTL_MONITOR_TYPE == LTL_CPU_MONITOR
+
+static int ltl_monitor_init(void)
+{
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu)
+ ltl_target_init(cpu, false);
+ return 0;
+}
+
+static void ltl_monitor_destroy(void)
+{
+}
+
#endif
static void ltl_illegal_state(monitor_target target, struct ltl_monitor *mon)
diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig
index 5b4be87ba59d..951c2e0cda25 100644
--- a/kernel/trace/rv/Kconfig
+++ b/kernel/trace/rv/Kconfig
@@ -16,6 +16,10 @@ config DA_MON_EVENTS_ID
select RV_MON_MAINTENANCE_EVENTS
bool
+config LTL_MON_EVENTS_IMPLICIT
+ select RV_MON_EVENTS
+ bool
+
config LTL_MON_EVENTS_ID
select RV_MON_EVENTS
bool
diff --git a/kernel/trace/rv/rv_trace.h b/kernel/trace/rv/rv_trace.h
index 4a6faddac614..f9e5fd044c45 100644
--- a/kernel/trace/rv/rv_trace.h
+++ b/kernel/trace/rv/rv_trace.h
@@ -177,6 +177,53 @@ DECLARE_EVENT_CLASS(error_ltl_monitor_id,
#include <monitors/pagefault/pagefault_trace.h>
#include <monitors/sleep/sleep_trace.h>
// Add new monitors based on CONFIG_LTL_MON_EVENTS_ID here
+
+#ifdef CONFIG_LTL_MON_EVENTS_IMPLICIT
+DECLARE_EVENT_CLASS(event_ltl_monitor,
+
+ TP_PROTO(unsigned int cpu, char *states, char *atoms, char *next),
+
+ TP_ARGS(cpu, states, atoms, next),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, cpu)
+ __string(states, states)
+ __string(atoms, atoms)
+ __string(next, next)
+ ),
+
+ TP_fast_assign(
+ __entry->cpu = cpu;
+ __assign_str(states);
+ __assign_str(atoms);
+ __assign_str(next);
+ ),
+
+ TP_printk("cpu%u: (%s) x (%s) -> (%s)", __entry->cpu,
+ __get_str(states), __get_str(atoms), __get_str(next))
+);
+
+DECLARE_EVENT_CLASS(error_ltl_monitor,
+
+ TP_PROTO(unsigned int cpu),
+
+ TP_ARGS(cpu),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, cpu)
+ ),
+
+ TP_fast_assign(
+ __entry->cpu = cpu;
+ ),
+
+ TP_printk("cpu%u: violation detected", __entry->cpu)
+);
+#include <monitors/rts/rts_trace.h>
+// Add new monitors based on CONFIG_LTL_MON_EVENTS_IMPLICIT here
+
+#endif /* CONFIG_LTL_MON_EVENTS_IMPLICIT */
+
#endif /* CONFIG_LTL_MON_EVENTS_ID */
#ifdef CONFIG_RV_MON_MAINTENANCE_EVENTS
--
2.39.5
^ permalink raw reply related [flat|nested] 32+ messages in thread
* [PATCH 3/5] verification/rvgen/ltl: Support per-cpu monitor generation
2025-07-30 12:45 [PATCH 0/5] rv: LTL per-cpu monitor type and real-time scheduling monitor Nam Cao
2025-07-30 12:45 ` [PATCH 1/5] rv/ltl: Prepare for other monitor types Nam Cao
2025-07-30 12:45 ` [PATCH 2/5] rv/ltl: Support per-cpu monitors Nam Cao
@ 2025-07-30 12:45 ` Nam Cao
2025-07-30 12:45 ` [PATCH 4/5] sched: Add rt task enqueue/dequeue trace points Nam Cao
2025-07-30 12:45 ` [PATCH 5/5] rv: Add rts monitor Nam Cao
4 siblings, 0 replies; 32+ messages in thread
From: Nam Cao @ 2025-07-30 12:45 UTC (permalink / raw)
To: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Gabriele Monaco, linux-trace-kernel, linux-kernel
Cc: Nam Cao
Add support to generate per-cpu LTL monitors. Similar to generating per-cpu
monitors from .dot files, the "-t per_cpu" parameter can be used to
generate per-cpu monitors from .ltl files.
Signed-off-by: Nam Cao <namcao@linutronix.de>
---
tools/verification/rvgen/rvgen/ltl2k.py | 48 ++++++++++++++++---
.../rvgen/rvgen/templates/ltl2k/main.c | 9 ++--
.../rvgen/rvgen/templates/ltl2k/trace.h | 7 ++-
3 files changed, 50 insertions(+), 14 deletions(-)
diff --git a/tools/verification/rvgen/rvgen/ltl2k.py b/tools/verification/rvgen/rvgen/ltl2k.py
index b075f98d50c4..0fd3f9963a87 100644
--- a/tools/verification/rvgen/rvgen/ltl2k.py
+++ b/tools/verification/rvgen/rvgen/ltl2k.py
@@ -57,9 +57,16 @@ class ltl2k(generator.Monitor):
template_dir = "ltl2k"
def __init__(self, file_path, MonitorType, extra_params={}):
- if MonitorType != "per_task":
- raise NotImplementedError("Only per_task monitor is supported for LTL")
+ if MonitorType == "per_task":
+ self._target_arg = "struct task_struct *task"
+ self._target = "task"
+ elif MonitorType == "per_cpu":
+ self._target_arg = "unsigned int cpu"
+ self._target = "cpu"
+ else:
+ raise NotImplementedError(f"LTL does not support monitor type {MonitorType}")
super().__init__(extra_params)
+ self.monitor_type = MonitorType
with open(file_path) as f:
self.atoms, self.ba, self.ltl = ltl2ba.create_graph(f.read())
self.atoms_abbr = abbreviate_atoms(self.atoms)
@@ -67,6 +74,13 @@ class ltl2k(generator.Monitor):
if not self.name:
self.name = Path(file_path).stem
+ def _fill_monitor_type(self) -> str:
+ if self.monitor_type == "per_task":
+ return "#define LTL_MONITOR_TYPE LTL_TASK_MONITOR"
+ if self.monitor_type == "per_cpu":
+ return "#define LTL_MONITOR_TYPE LTL_CPU_MONITOR"
+ assert False
+
def _fill_states(self) -> str:
buf = [
"enum ltl_buchi_state {",
@@ -174,7 +188,7 @@ class ltl2k(generator.Monitor):
def _fill_start(self):
buf = [
- "static void ltl_start(struct task_struct *task, struct ltl_monitor *mon)",
+ "static void ltl_start(%s, struct ltl_monitor *mon)" % self._target_arg,
"{"
]
@@ -205,7 +219,7 @@ class ltl2k(generator.Monitor):
buff = []
buff.append("static void handle_example_event(void *data, /* XXX: fill header */)")
buff.append("{")
- buff.append("\tltl_atom_update(task, LTL_%s, true/false);" % self.atoms[0])
+ buff.append("\tltl_atom_update(%s, LTL_%s, true/false);" % (self._target, self.atoms[0]))
buff.append("}")
buff.append("")
return '\n'.join(buff)
@@ -241,6 +255,9 @@ class ltl2k(generator.Monitor):
""
]
+ buf.append(self._fill_monitor_type())
+ buf.append('')
+
buf.extend(self._fill_atoms())
buf.append('')
@@ -259,13 +276,32 @@ class ltl2k(generator.Monitor):
return '\n'.join(buf)
def fill_monitor_class_type(self):
- return "LTL_MON_EVENTS_ID"
+ if self.monitor_type == "per_task":
+ return "LTL_MON_EVENTS_ID"
+ if self.monitor_type == "per_cpu":
+ return "LTL_MON_EVENTS_IMPLICIT"
+ assert False
def fill_monitor_class(self):
- return "ltl_monitor_id"
+ if self.monitor_type == "per_task":
+ return "ltl_monitor_id"
+ if self.monitor_type == "per_cpu":
+ return "ltl_monitor"
+ assert False
+
+ def fill_tracepoint_args_skel(self, tp_type):
+ if tp_type == "event":
+ return \
+ ("\tTP_PROTO(%s, char *states, char *atoms, char *next),\n" % self._target_arg) + \
+ ("\tTP_ARGS(%s, states, atoms, next)" % self._target)
+ if tp_type == "error":
+ return \
+ ("\tTP_PROTO(%s),\n" % self._target_arg) + \
+ ("\tTP_ARGS(%s)" % self._target)
def fill_main_c(self):
main_c = super().fill_main_c()
main_c = main_c.replace("%%ATOMS_INIT%%", self.fill_atoms_init())
+ main_c = main_c.replace("%%TARGET_ARG%%", self._target_arg)
return main_c
diff --git a/tools/verification/rvgen/rvgen/templates/ltl2k/main.c b/tools/verification/rvgen/rvgen/templates/ltl2k/main.c
index f85d076fbf78..09167efbfbf0 100644
--- a/tools/verification/rvgen/rvgen/templates/ltl2k/main.c
+++ b/tools/verification/rvgen/rvgen/templates/ltl2k/main.c
@@ -23,7 +23,7 @@
#include "%%MODEL_NAME%%.h"
#include <rv/ltl_monitor.h>
-static void ltl_atoms_fetch(struct task_struct *task, struct ltl_monitor *mon)
+static void ltl_atoms_fetch(%%TARGET_ARG%%, struct ltl_monitor *mon)
{
/*
* This is called everytime the Buchi automaton is triggered.
@@ -36,13 +36,14 @@ static void ltl_atoms_fetch(struct task_struct *task, struct ltl_monitor *mon)
*/
}
-static void ltl_atoms_init(struct task_struct *task, struct ltl_monitor *mon, bool task_creation)
+static void ltl_atoms_init(%%TARGET_ARG%%, struct ltl_monitor *mon, bool target_creation)
{
/*
* This should initialize as many atomic propositions as possible.
*
- * @task_creation indicates whether the task is being created. This is
- * false if the task is already running before the monitor is enabled.
+ * @target_creation indicates whether the monitored target is being
+ * created. This is false if the monitor target is already online before
+ * the monitor is enabled.
*/
%%ATOMS_INIT%%
}
diff --git a/tools/verification/rvgen/rvgen/templates/ltl2k/trace.h b/tools/verification/rvgen/rvgen/templates/ltl2k/trace.h
index 49394c4b0f1c..87d3a1308926 100644
--- a/tools/verification/rvgen/rvgen/templates/ltl2k/trace.h
+++ b/tools/verification/rvgen/rvgen/templates/ltl2k/trace.h
@@ -6,9 +6,8 @@
#ifdef CONFIG_RV_MON_%%MODEL_NAME_UP%%
DEFINE_EVENT(event_%%MONITOR_CLASS%%, event_%%MODEL_NAME%%,
- TP_PROTO(struct task_struct *task, char *states, char *atoms, char *next),
- TP_ARGS(task, states, atoms, next));
+%%TRACEPOINT_ARGS_SKEL_EVENT%%);
+
DEFINE_EVENT(error_%%MONITOR_CLASS%%, error_%%MODEL_NAME%%,
- TP_PROTO(struct task_struct *task),
- TP_ARGS(task));
+%%TRACEPOINT_ARGS_SKEL_ERROR%%);
#endif /* CONFIG_RV_MON_%%MODEL_NAME_UP%% */
--
2.39.5
^ permalink raw reply related [flat|nested] 32+ messages in thread
* [PATCH 4/5] sched: Add rt task enqueue/dequeue trace points
2025-07-30 12:45 [PATCH 0/5] rv: LTL per-cpu monitor type and real-time scheduling monitor Nam Cao
` (2 preceding siblings ...)
2025-07-30 12:45 ` [PATCH 3/5] verification/rvgen/ltl: Support per-cpu monitor generation Nam Cao
@ 2025-07-30 12:45 ` Nam Cao
2025-07-30 13:53 ` Gabriele Monaco
2025-07-30 12:45 ` [PATCH 5/5] rv: Add rts monitor Nam Cao
4 siblings, 1 reply; 32+ messages in thread
From: Nam Cao @ 2025-07-30 12:45 UTC (permalink / raw)
To: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Gabriele Monaco, linux-trace-kernel, linux-kernel
Cc: Nam Cao, Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
Dietmar Eggemann, Ben Segall, Mel Gorman, Valentin Schneider
Add trace points into enqueue_task_rt() and dequeue_task_rt(). They are
useful to implement RV monitor which validates RT scheduling.
Signed-off-by: Nam Cao <namcao@linutronix.de>
---
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Ben Segall <bsegall@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Valentin Schneider <vschneid@redhat.com>
---
include/trace/events/sched.h | 8 ++++++++
kernel/sched/rt.c | 4 ++++
2 files changed, 12 insertions(+)
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index c08893bde255..c38f12f7f903 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -898,6 +898,14 @@ DECLARE_TRACE(sched_set_need_resched,
TP_PROTO(struct task_struct *tsk, int cpu, int tif),
TP_ARGS(tsk, cpu, tif));
+DECLARE_TRACE(enqueue_task_rt,
+ TP_PROTO(int cpu, struct task_struct *task),
+ TP_ARGS(cpu, task));
+
+DECLARE_TRACE(dequeue_task_rt,
+ TP_PROTO(int cpu, struct task_struct *task),
+ TP_ARGS(cpu, task));
+
#endif /* _TRACE_SCHED_H */
/* This part must be outside protection */
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index e40422c37033..f4d3f5e7fbec 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1480,6 +1480,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{
struct sched_rt_entity *rt_se = &p->rt;
+ trace_enqueue_task_rt_tp(rq->cpu, p);
+
if (flags & ENQUEUE_WAKEUP)
rt_se->timeout = 0;
@@ -1501,6 +1503,8 @@ static bool dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
dequeue_pushable_task(rq, p);
+ trace_dequeue_task_rt_tp(rq->cpu, p);
+
return true;
}
--
2.39.5
^ permalink raw reply related [flat|nested] 32+ messages in thread
* [PATCH 5/5] rv: Add rts monitor
2025-07-30 12:45 [PATCH 0/5] rv: LTL per-cpu monitor type and real-time scheduling monitor Nam Cao
` (3 preceding siblings ...)
2025-07-30 12:45 ` [PATCH 4/5] sched: Add rt task enqueue/dequeue trace points Nam Cao
@ 2025-07-30 12:45 ` Nam Cao
2025-07-31 7:47 ` Gabriele Monaco
2025-08-05 8:40 ` Gabriele Monaco
4 siblings, 2 replies; 32+ messages in thread
From: Nam Cao @ 2025-07-30 12:45 UTC (permalink / raw)
To: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Gabriele Monaco, linux-trace-kernel, linux-kernel
Cc: Nam Cao, Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
Dietmar Eggemann, Ben Segall, Mel Gorman, Valentin Schneider
Add "real-time scheduling" monitor, which validates that SCHED_RR and
SCHED_FIFO tasks are scheduled before tasks with normal and extensible
scheduling policies
Signed-off-by: Nam Cao <namcao@linutronix.de>
---
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Ben Segall <bsegall@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Valentin Schneider <vschneid@redhat.com>
---
Documentation/trace/rv/monitor_sched.rst | 19 ++++
kernel/trace/rv/Kconfig | 1 +
kernel/trace/rv/Makefile | 1 +
kernel/trace/rv/monitors/rts/Kconfig | 17 +++
kernel/trace/rv/monitors/rts/rts.c | 131 +++++++++++++++++++++++
kernel/trace/rv/monitors/rts/rts.h | 126 ++++++++++++++++++++++
kernel/trace/rv/monitors/rts/rts_trace.h | 15 +++
tools/verification/models/sched/rts.ltl | 5 +
8 files changed, 315 insertions(+)
create mode 100644 kernel/trace/rv/monitors/rts/Kconfig
create mode 100644 kernel/trace/rv/monitors/rts/rts.c
create mode 100644 kernel/trace/rv/monitors/rts/rts.h
create mode 100644 kernel/trace/rv/monitors/rts/rts_trace.h
create mode 100644 tools/verification/models/sched/rts.ltl
diff --git a/Documentation/trace/rv/monitor_sched.rst b/Documentation/trace/rv/monitor_sched.rst
index 3f8381ad9ec7..2f9d62a1af1f 100644
--- a/Documentation/trace/rv/monitor_sched.rst
+++ b/Documentation/trace/rv/monitor_sched.rst
@@ -396,6 +396,25 @@ preemption is always disabled. On non- ``PREEMPT_RT`` kernels, the interrupts
might invoke a softirq to set ``need_resched`` and wake up a task. This is
another special case that is currently not supported by the monitor.
+Monitor rts
+-----------
+
+The real-time scheduling monitor validates that tasks with real-time scheduling
+policies (`SCHED_FIFO` and `SCHED_RR`) are always scheduled before tasks with
+normal and extensible scheduling policies (`SCHED_OTHER`, `SCHED_BATCH`,
+`SCHED_IDLE`, `SCHED_EXT`):
+
+.. literalinclude:: ../../../tools/verification/models/sched/rts.ltl
+
+Note that this monitor may report errors if real-time throttling or fair
+deadline server is enabled. These mechanisms prevent real-time tasks from
+monopolying the CPU by giving tasks with normal and extensible scheduling
+policies a chance to run. They give system administrators a chance to kill a
+misbehaved real-time task. However, they violate the scheduling priorities and
+may cause latency to well-behaved real-time tasks. Thus, if you see errors from
+this monitor, consider disabling real-time throttling and the fair deadline
+server.
+
References
----------
diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig
index 951c2e0cda25..3992ff6ac8b1 100644
--- a/kernel/trace/rv/Kconfig
+++ b/kernel/trace/rv/Kconfig
@@ -62,6 +62,7 @@ source "kernel/trace/rv/monitors/sts/Kconfig"
source "kernel/trace/rv/monitors/nrp/Kconfig"
source "kernel/trace/rv/monitors/sssw/Kconfig"
source "kernel/trace/rv/monitors/opid/Kconfig"
+source "kernel/trace/rv/monitors/rts/Kconfig"
# Add new sched monitors here
source "kernel/trace/rv/monitors/rtapp/Kconfig"
diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile
index 750e4ad6fa0f..d7bfc7ae6677 100644
--- a/kernel/trace/rv/Makefile
+++ b/kernel/trace/rv/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_RV_MON_STS) += monitors/sts/sts.o
obj-$(CONFIG_RV_MON_NRP) += monitors/nrp/nrp.o
obj-$(CONFIG_RV_MON_SSSW) += monitors/sssw/sssw.o
obj-$(CONFIG_RV_MON_OPID) += monitors/opid/opid.o
+obj-$(CONFIG_RV_MON_RTS) += monitors/rts/rts.o
# Add new monitors here
obj-$(CONFIG_RV_REACTORS) += rv_reactors.o
obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o
diff --git a/kernel/trace/rv/monitors/rts/Kconfig b/kernel/trace/rv/monitors/rts/Kconfig
new file mode 100644
index 000000000000..1b780bce6133
--- /dev/null
+++ b/kernel/trace/rv/monitors/rts/Kconfig
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+config RV_MON_RTS
+ depends on RV
+ select RV_LTL_MONITOR
+ depends on RV_MON_SCHED
+ default y
+ select LTL_MON_EVENTS_IMPLICIT
+ bool "rts monitor"
+ help
+ Add support for RTS (real-time scheduling) monitor which validates
+ that real-time-priority tasks are scheduled before SCHED_OTHER tasks.
+
+ This monitor may report an error if RT throttling or deadline server
+ is enabled.
+
+ Say Y if you are debugging or testing a real-time system.
diff --git a/kernel/trace/rv/monitors/rts/rts.c b/kernel/trace/rv/monitors/rts/rts.c
new file mode 100644
index 000000000000..e23563c47eb1
--- /dev/null
+++ b/kernel/trace/rv/monitors/rts/rts.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+#include <linux/sched/deadline.h>
+#include <linux/sched/rt.h>
+#include <rv/instrumentation.h>
+
+#define MODULE_NAME "rts"
+
+#include <trace/events/sched.h>
+#include <rv_trace.h>
+#include <monitors/sched/sched.h>
+
+#include "rts.h"
+#include <rv/ltl_monitor.h>
+
+static DEFINE_PER_CPU(unsigned int, nr_queued);
+
+static void ltl_atoms_fetch(unsigned int cpu, struct ltl_monitor *mon)
+{
+}
+
+static void ltl_atoms_init(unsigned int cpu, struct ltl_monitor *mon,
+ bool target_creation)
+{
+ ltl_atom_set(mon, LTL_SCHED_SWITCH, false);
+ ltl_atom_set(mon, LTL_SCHED_SWITCH_DL, false);
+ ltl_atom_set(mon, LTL_SCHED_SWITCH_RT, false);
+
+ /*
+ * This may not be accurate, there may be enqueued RT tasks. But that's
+ * okay, the worst we get is a false negative. It will be accurate as
+ * soon as the CPU no longer has any queued RT task.
+ */
+ ltl_atom_set(mon, LTL_RT_TASK_ENQUEUED, false);
+}
+
+static void handle_enqueue_task_rt(void *data, int cpu, struct task_struct *task)
+{
+ unsigned int *queued = per_cpu_ptr(&nr_queued, cpu);
+
+ (*queued)++;
+ ltl_atom_update(cpu, LTL_RT_TASK_ENQUEUED, true);
+}
+
+static void handle_dequeue_task_rt(void *data, int cpu, struct task_struct *task)
+{
+ unsigned int *queued = per_cpu_ptr(&nr_queued, cpu);
+
+ /*
+ * This may not be accurate for a short time after the monitor is
+ * enabled, because there may be enqueued RT tasks which are not counted
+ * torward nr_queued. But that's okay, the worst we get is a false
+ * negative. nr_queued will be accurate as soon as the CPU no longer has
+ * any queued RT task.
+ */
+ if (*queued)
+ (*queued)--;
+ if (!*queued)
+ ltl_atom_update(cpu, LTL_RT_TASK_ENQUEUED, false);
+}
+
+static void handle_sched_switch(void *data, bool preempt, struct task_struct *prev,
+ struct task_struct *next, unsigned int prev_state)
+{
+ unsigned int cpu = smp_processor_id();
+ struct ltl_monitor *mon = ltl_get_monitor(cpu);
+
+ ltl_atom_set(mon, LTL_SCHED_SWITCH_RT, rt_task(next));
+ ltl_atom_set(mon, LTL_SCHED_SWITCH_DL, dl_task(next));
+ ltl_atom_update(cpu, LTL_SCHED_SWITCH, true);
+
+ ltl_atom_set(mon, LTL_SCHED_SWITCH_RT, false);
+ ltl_atom_set(mon, LTL_SCHED_SWITCH_DL, false);
+ ltl_atom_update(cpu, LTL_SCHED_SWITCH, false);
+}
+
+static int enable_rts(void)
+{
+ int retval;
+
+ retval = ltl_monitor_init();
+ if (retval)
+ return retval;
+
+ rv_attach_trace_probe("rts", enqueue_task_rt_tp, handle_enqueue_task_rt);
+ rv_attach_trace_probe("rts", dequeue_task_rt_tp, handle_dequeue_task_rt);
+ rv_attach_trace_probe("rts", sched_switch, handle_sched_switch);
+
+ return 0;
+}
+
+static void disable_rts(void)
+{
+ rv_detach_trace_probe("rts", enqueue_task_rt_tp, handle_enqueue_task_rt);
+ rv_detach_trace_probe("rts", dequeue_task_rt_tp, handle_dequeue_task_rt);
+ rv_detach_trace_probe("rts", sched_switch, handle_sched_switch);
+
+ ltl_monitor_destroy();
+}
+
+/*
+ * This is the monitor register section.
+ */
+static struct rv_monitor rv_rts = {
+ .name = "rts",
+ .description = "Validate that real-time tasks are scheduled before lower-priority tasks",
+ .enable = enable_rts,
+ .disable = disable_rts,
+};
+
+static int __init register_rts(void)
+{
+ return rv_register_monitor(&rv_rts, &rv_sched);
+}
+
+static void __exit unregister_rts(void)
+{
+ rv_unregister_monitor(&rv_rts);
+}
+
+module_init(register_rts);
+module_exit(unregister_rts);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Nam Cao <namcao@linutronix.de>");
+MODULE_DESCRIPTION("rts: Validate that real-time tasks are scheduled before lower-priority tasks");
diff --git a/kernel/trace/rv/monitors/rts/rts.h b/kernel/trace/rv/monitors/rts/rts.h
new file mode 100644
index 000000000000..9fbf0d97c1a7
--- /dev/null
+++ b/kernel/trace/rv/monitors/rts/rts.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * C implementation of Buchi automaton, automatically generated by
+ * tools/verification/rvgen from the linear temporal logic specification.
+ * For further information, see kernel documentation:
+ * Documentation/trace/rv/linear_temporal_logic.rst
+ */
+
+#include <linux/rv.h>
+
+#define MONITOR_NAME rts
+
+#define LTL_MONITOR_TYPE LTL_CPU_MONITOR
+
+enum ltl_atom {
+ LTL_RT_TASK_ENQUEUED,
+ LTL_SCHED_SWITCH,
+ LTL_SCHED_SWITCH_DL,
+ LTL_SCHED_SWITCH_RT,
+ LTL_NUM_ATOM
+};
+static_assert(LTL_NUM_ATOM <= RV_MAX_LTL_ATOM);
+
+static const char *ltl_atom_str(enum ltl_atom atom)
+{
+ static const char *const names[] = {
+ "rt_ta_en",
+ "sc_sw",
+ "sc_sw_dl",
+ "sc_sw_rt",
+ };
+
+ return names[atom];
+}
+
+enum ltl_buchi_state {
+ S0,
+ S1,
+ S2,
+ S3,
+ S4,
+ RV_NUM_BA_STATES
+};
+static_assert(RV_NUM_BA_STATES <= RV_MAX_BA_STATES);
+
+static void ltl_start(unsigned int cpu, struct ltl_monitor *mon)
+{
+ bool sched_switch_rt = test_bit(LTL_SCHED_SWITCH_RT, mon->atoms);
+ bool sched_switch_dl = test_bit(LTL_SCHED_SWITCH_DL, mon->atoms);
+ bool sched_switch = test_bit(LTL_SCHED_SWITCH, mon->atoms);
+ bool rt_task_enqueued = test_bit(LTL_RT_TASK_ENQUEUED, mon->atoms);
+ bool val13 = !rt_task_enqueued;
+ bool val8 = sched_switch_dl || val13;
+ bool val9 = sched_switch_rt || val8;
+ bool val6 = !sched_switch;
+ bool val1 = !rt_task_enqueued;
+
+ if (val1)
+ __set_bit(S0, mon->states);
+ if (val6)
+ __set_bit(S1, mon->states);
+ if (val9)
+ __set_bit(S4, mon->states);
+}
+
+static void
+ltl_possible_next_states(struct ltl_monitor *mon, unsigned int state, unsigned long *next)
+{
+ bool sched_switch_rt = test_bit(LTL_SCHED_SWITCH_RT, mon->atoms);
+ bool sched_switch_dl = test_bit(LTL_SCHED_SWITCH_DL, mon->atoms);
+ bool sched_switch = test_bit(LTL_SCHED_SWITCH, mon->atoms);
+ bool rt_task_enqueued = test_bit(LTL_RT_TASK_ENQUEUED, mon->atoms);
+ bool val13 = !rt_task_enqueued;
+ bool val8 = sched_switch_dl || val13;
+ bool val9 = sched_switch_rt || val8;
+ bool val6 = !sched_switch;
+ bool val1 = !rt_task_enqueued;
+
+ switch (state) {
+ case S0:
+ if (val1)
+ __set_bit(S0, next);
+ if (val6)
+ __set_bit(S1, next);
+ if (val9)
+ __set_bit(S4, next);
+ break;
+ case S1:
+ if (val6)
+ __set_bit(S1, next);
+ if (val1 && val6)
+ __set_bit(S2, next);
+ if (val1 && val9)
+ __set_bit(S3, next);
+ if (val9)
+ __set_bit(S4, next);
+ break;
+ case S2:
+ if (val6)
+ __set_bit(S1, next);
+ if (val1 && val6)
+ __set_bit(S2, next);
+ if (val1 && val9)
+ __set_bit(S3, next);
+ if (val9)
+ __set_bit(S4, next);
+ break;
+ case S3:
+ if (val1)
+ __set_bit(S0, next);
+ if (val6)
+ __set_bit(S1, next);
+ if (val9)
+ __set_bit(S4, next);
+ break;
+ case S4:
+ if (val1)
+ __set_bit(S0, next);
+ if (val6)
+ __set_bit(S1, next);
+ if (val9)
+ __set_bit(S4, next);
+ break;
+ }
+}
diff --git a/kernel/trace/rv/monitors/rts/rts_trace.h b/kernel/trace/rv/monitors/rts/rts_trace.h
new file mode 100644
index 000000000000..0ac9e112a8b0
--- /dev/null
+++ b/kernel/trace/rv/monitors/rts/rts_trace.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_RTS
+DEFINE_EVENT(event_ltl_monitor, event_rts,
+ TP_PROTO(unsigned int cpu, char *states, char *atoms, char *next),
+ TP_ARGS(cpu, states, atoms, next));
+
+DEFINE_EVENT(error_ltl_monitor, error_rts,
+ TP_PROTO(unsigned int cpu),
+ TP_ARGS(cpu));
+#endif /* CONFIG_RV_MON_RTS */
diff --git a/tools/verification/models/sched/rts.ltl b/tools/verification/models/sched/rts.ltl
new file mode 100644
index 000000000000..90872bca46b1
--- /dev/null
+++ b/tools/verification/models/sched/rts.ltl
@@ -0,0 +1,5 @@
+RULE = always (RT_TASK_ENQUEUED imply SCHEDULE_RT_NEXT)
+
+SCHEDULE_RT_NEXT = (not SCHED_SWITCH) until (SCHED_SWITCH_RT or EXCEPTIONS)
+
+EXCEPTIONS = SCHED_SWITCH_DL or not RT_TASK_ENQUEUED
--
2.39.5
^ permalink raw reply related [flat|nested] 32+ messages in thread
* Re: [PATCH 4/5] sched: Add rt task enqueue/dequeue trace points
2025-07-30 12:45 ` [PATCH 4/5] sched: Add rt task enqueue/dequeue trace points Nam Cao
@ 2025-07-30 13:53 ` Gabriele Monaco
2025-07-30 15:18 ` Nam Cao
0 siblings, 1 reply; 32+ messages in thread
From: Gabriele Monaco @ 2025-07-30 13:53 UTC (permalink / raw)
To: Nam Cao, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel
Cc: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
Dietmar Eggemann, Ben Segall, Mel Gorman, Valentin Schneider
On Wed, 2025-07-30 at 14:45 +0200, Nam Cao wrote:
> Add trace points into enqueue_task_rt() and dequeue_task_rt(). They
> are useful to implement RV monitor which validates RT scheduling.
>
I get it's much simpler this way, but is it that different to follow
the task's existing tracepoints?
* task going to sleep (switch:prev_state != RUNNING) is dequeued
* task waking up is enqueued
* changing the tasks's policy (setpolicy and setattr syscalls) should
enqueue/dequeue as well
This is more thinking out loud, but I'm doing right now something
rather similar with the deadline tasks and this seems reasonable, at
least on paper.
What do you think?
Thanks,
Gabriele
> Signed-off-by: Nam Cao <namcao@linutronix.de>
> ---
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Juri Lelli <juri.lelli@redhat.com>
> Cc: Vincent Guittot <vincent.guittot@linaro.org>
> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
> Cc: Ben Segall <bsegall@google.com>
> Cc: Mel Gorman <mgorman@suse.de>
> Cc: Valentin Schneider <vschneid@redhat.com>
> ---
> include/trace/events/sched.h | 8 ++++++++
> kernel/sched/rt.c | 4 ++++
> 2 files changed, 12 insertions(+)
>
> diff --git a/include/trace/events/sched.h
> b/include/trace/events/sched.h
> index c08893bde255..c38f12f7f903 100644
> --- a/include/trace/events/sched.h
> +++ b/include/trace/events/sched.h
> @@ -898,6 +898,14 @@ DECLARE_TRACE(sched_set_need_resched,
> TP_PROTO(struct task_struct *tsk, int cpu, int tif),
> TP_ARGS(tsk, cpu, tif));
>
> +DECLARE_TRACE(enqueue_task_rt,
> + TP_PROTO(int cpu, struct task_struct *task),
> + TP_ARGS(cpu, task));
> +
> +DECLARE_TRACE(dequeue_task_rt,
> + TP_PROTO(int cpu, struct task_struct *task),
> + TP_ARGS(cpu, task));
> +
> #endif /* _TRACE_SCHED_H */
>
> /* This part must be outside protection */
> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
> index e40422c37033..f4d3f5e7fbec 100644
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -1480,6 +1480,8 @@ enqueue_task_rt(struct rq *rq, struct
> task_struct *p, int flags)
> {
> struct sched_rt_entity *rt_se = &p->rt;
>
> + trace_enqueue_task_rt_tp(rq->cpu, p);
> +
> if (flags & ENQUEUE_WAKEUP)
> rt_se->timeout = 0;
>
> @@ -1501,6 +1503,8 @@ static bool dequeue_task_rt(struct rq *rq,
> struct task_struct *p, int flags)
>
> dequeue_pushable_task(rq, p);
>
> + trace_dequeue_task_rt_tp(rq->cpu, p);
> +
> return true;
> }
>
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 4/5] sched: Add rt task enqueue/dequeue trace points
2025-07-30 13:53 ` Gabriele Monaco
@ 2025-07-30 15:18 ` Nam Cao
2025-07-30 16:18 ` Gabriele Monaco
0 siblings, 1 reply; 32+ messages in thread
From: Nam Cao @ 2025-07-30 15:18 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel, Ingo Molnar, Peter Zijlstra,
Juri Lelli, Vincent Guittot, Dietmar Eggemann, Ben Segall,
Mel Gorman, Valentin Schneider
On Wed, Jul 30, 2025 at 03:53:14PM +0200, Gabriele Monaco wrote:
> On Wed, 2025-07-30 at 14:45 +0200, Nam Cao wrote:
> > Add trace points into enqueue_task_rt() and dequeue_task_rt(). They
> > are useful to implement RV monitor which validates RT scheduling.
> >
>
> I get it's much simpler this way, but is it that different to follow
> the task's existing tracepoints?
>
> * task going to sleep (switch:prev_state != RUNNING) is dequeued
> * task waking up is enqueued
> * changing the tasks's policy (setpolicy and setattr syscalls) should
> enqueue/dequeue as well
>
> This is more thinking out loud, but I'm doing right now something
> rather similar with the deadline tasks and this seems reasonable, at
> least on paper.
>
> What do you think?
I think more or less the same. The fewer tracepoints, the better. But the
monitor is way more obvious this way.
Let me see how hard it is to use the existing tracepoints...
Nam
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 4/5] sched: Add rt task enqueue/dequeue trace points
2025-07-30 15:18 ` Nam Cao
@ 2025-07-30 16:18 ` Gabriele Monaco
2025-07-31 7:35 ` Nam Cao
0 siblings, 1 reply; 32+ messages in thread
From: Gabriele Monaco @ 2025-07-30 16:18 UTC (permalink / raw)
To: Nam Cao
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel, Ingo Molnar, Peter Zijlstra,
Juri Lelli, Vincent Guittot, Dietmar Eggemann, Ben Segall,
Mel Gorman, Valentin Schneider
On Wed, 2025-07-30 at 17:18 +0200, Nam Cao wrote:
> On Wed, Jul 30, 2025 at 03:53:14PM +0200, Gabriele Monaco wrote:
> > On Wed, 2025-07-30 at 14:45 +0200, Nam Cao wrote:
> > > Add trace points into enqueue_task_rt() and dequeue_task_rt().
> > > They
> > > are useful to implement RV monitor which validates RT scheduling.
> > >
> >
> > I get it's much simpler this way, but is it that different to
> > follow
> > the task's existing tracepoints?
> >
> > * task going to sleep (switch:prev_state != RUNNING) is dequeued
> > * task waking up is enqueued
> > * changing the tasks's policy (setpolicy and setattr syscalls)
> > should
> > enqueue/dequeue as well
> >
> > This is more thinking out loud, but I'm doing right now something
> > rather similar with the deadline tasks and this seems reasonable,
> > at
> > least on paper.
> >
> > What do you think?
>
> I think more or less the same. The fewer tracepoints, the better. But
> the
> monitor is way more obvious this way.
>
> Let me see how hard it is to use the existing tracepoints...
Well, thinking about it again, these tracepoints might simplify things
considerably when tasks change policy..
Syscalls may fail, for that you could register to sys_exit and check
the return value, but at that point the policy changed already, so you
cannot tell if it's a relevant event or not (e.g. same policy).
Also sched_setscheduler_nocheck would be out of the picture here, not
sure how recurrent that is though (and might not matter if you only
focus on userspace tasks).
If you go down the route of adding tracepoints, why not have other
classes benefit too? I believe calling them from the enqueue_task /
dequeue_task in sched/core.c would allow you to easily filter out by
policy anyway (haven't tested).
Thanks,
Gabriele
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 4/5] sched: Add rt task enqueue/dequeue trace points
2025-07-30 16:18 ` Gabriele Monaco
@ 2025-07-31 7:35 ` Nam Cao
2025-07-31 8:39 ` Gabriele Monaco
2025-08-01 3:42 ` K Prateek Nayak
0 siblings, 2 replies; 32+ messages in thread
From: Nam Cao @ 2025-07-31 7:35 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel, Ingo Molnar, Peter Zijlstra,
Juri Lelli, Vincent Guittot, Dietmar Eggemann, Ben Segall,
Mel Gorman, Valentin Schneider
On Wed, Jul 30, 2025 at 06:18:45PM +0200, Gabriele Monaco wrote:
> Well, thinking about it again, these tracepoints might simplify things
> considerably when tasks change policy..
>
> Syscalls may fail, for that you could register to sys_exit and check
> the return value, but at that point the policy changed already, so you
> cannot tell if it's a relevant event or not (e.g. same policy).
> Also sched_setscheduler_nocheck would be out of the picture here, not
> sure how recurrent that is though (and might not matter if you only
> focus on userspace tasks).
>
> If you go down the route of adding tracepoints, why not have other
> classes benefit too? I believe calling them from the enqueue_task /
> dequeue_task in sched/core.c would allow you to easily filter out by
> policy anyway (haven't tested).
Something like the untested patch below?
Will you have a use case for it too? Then I will try to accommodate your
use case, otherwise I will do just enough for my case.
Nam
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index c38f12f7f903..b50668052f99 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -906,6 +906,14 @@ DECLARE_TRACE(dequeue_task_rt,
TP_PROTO(int cpu, struct task_struct *task),
TP_ARGS(cpu, task));
+DECLARE_TRACE(enqueue_task,
+ TP_PROTO(int cpu, struct task_struct *task),
+ TP_ARGS(cpu, task));
+
+DECLARE_TRACE(dequeue_task,
+ TP_PROTO(int cpu, struct task_struct *task),
+ TP_ARGS(cpu, task));
+
#endif /* _TRACE_SCHED_H */
/* This part must be outside protection */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b485e0639616..2af90532982a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2077,6 +2077,8 @@ unsigned long get_wchan(struct task_struct *p)
void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
{
+ trace_enqueue_task_tp(rq->cpu, p);
+
if (!(flags & ENQUEUE_NOCLOCK))
update_rq_clock(rq);
@@ -2103,6 +2105,8 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
*/
inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
{
+ trace_dequeue_task_tp(rq->cpu, p);
+
if (sched_core_enabled(rq))
sched_core_dequeue(rq, p, flags);
^ permalink raw reply related [flat|nested] 32+ messages in thread
* Re: [PATCH 5/5] rv: Add rts monitor
2025-07-30 12:45 ` [PATCH 5/5] rv: Add rts monitor Nam Cao
@ 2025-07-31 7:47 ` Gabriele Monaco
2025-08-01 7:58 ` Nam Cao
2025-08-05 8:40 ` Gabriele Monaco
1 sibling, 1 reply; 32+ messages in thread
From: Gabriele Monaco @ 2025-07-31 7:47 UTC (permalink / raw)
To: Nam Cao, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel
Cc: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
Dietmar Eggemann, Ben Segall, Mel Gorman, Valentin Schneider
On Wed, 2025-07-30 at 14:45 +0200, Nam Cao wrote:
> Add "real-time scheduling" monitor, which validates that SCHED_RR and
> SCHED_FIFO tasks are scheduled before tasks with normal and
> extensible
> scheduling policies
>
Looks a very interesting monitor!
A few questions:
I assume this works with rt-throttle because it implies a dequeue,
right?
And you probably won't see that without explicit tracepoints..
> + /*
> + * This may not be accurate, there may be enqueued RT tasks.
> But
> that's
> + * okay, the worst we get is a false negative. It will be
> accurate
> as
> + * soon as the CPU no longer has any queued RT task.
> + */
> + ltl_atom_set(mon, LTL_RT_TASK_ENQUEUED, false);
>
As far as I understand here the monitor would just miss RT tasks
already running but would perfectly enforce the ones starting after
initialisation, right?
> +RULE = always (RT_TASK_ENQUEUED imply SCHEDULE_RT_NEXT)
> +
> +SCHEDULE_RT_NEXT = (not SCHED_SWITCH) until (SCHED_SWITCH_RT or
> EXCEPTIONS)
> +
> +EXCEPTIONS = SCHED_SWITCH_DL or not RT_TASK_ENQUEUED
This monitor allows non-RT tasks to run indefinitely before the switch,
only when it happens, RT must run, right?
Not sure you can do much about it though. (without falling into the
need resched rabbithole I was trying to untangle)
Thanks,
Gabriele
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 2/5] rv/ltl: Support per-cpu monitors
2025-07-30 12:45 ` [PATCH 2/5] rv/ltl: Support per-cpu monitors Nam Cao
@ 2025-07-31 8:02 ` Gabriele Monaco
2025-08-01 6:26 ` Nam Cao
0 siblings, 1 reply; 32+ messages in thread
From: Gabriele Monaco @ 2025-07-31 8:02 UTC (permalink / raw)
To: Nam Cao
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel
On Wed, 2025-07-30 at 14:45 +0200, Nam Cao wrote:
> Add support for per-cpu run-time verification linear temporal logic
> monitors. This is analogous to deterministic automaton per-cpu
> monitors.
>
> Signed-off-by: Nam Cao <namcao@linutronix.de>
> ---
>
> diff --git a/kernel/trace/rv/rv_trace.h b/kernel/trace/rv/rv_trace.h
> index 4a6faddac614..f9e5fd044c45 100644
> --- a/kernel/trace/rv/rv_trace.h
> +++ b/kernel/trace/rv/rv_trace.h
> @@ -177,6 +177,53 @@ DECLARE_EVENT_CLASS(error_ltl_monitor_id,
> #include <monitors/pagefault/pagefault_trace.h>
> #include <monitors/sleep/sleep_trace.h>
> // Add new monitors based on CONFIG_LTL_MON_EVENTS_ID here
> +
> +#ifdef CONFIG_LTL_MON_EVENTS_IMPLICIT
> +DECLARE_EVENT_CLASS(event_ltl_monitor,
> +
> + TP_PROTO(unsigned int cpu, char *states, char *atoms, char
> *next),
> +
You don't really need to follow to the ID/IMPLICIT convention here.
These LTL per-cpu monitors are, in fact, not implicit since they do
have an id (the CPU), implicit makes sense with the current
implementation of da_get_monitor that uses the current CPU (doesn't
have to stay that way, but there was no need to change so far).
If you don't want to get rid of the task's comm in the tracepoint (and
unify both with an integer id, like with DA), I'd suggest you use
different names like CONFIG_LTL_MON_EVENTS_TASK (in fact that doesn't
just have an ID) and CONFIG_LTL_MON_EVENTS_CPU (or even
CONFIG_LTL_MON_EVENTS_ID, for this it actually makes sense).
I'd prefer it as general as possible to ease new monitor types, but to
be real picky the LTLs per-task are not ID and the per-cpu are not
IMPLICIT.
The id field is what the rv userspace tool uses to differentiate
monitor types, by the way.
> +#endif /* CONFIG_LTL_MON_EVENTS_IMPLICIT */
> +
> #endif /* CONFIG_LTL_MON_EVENTS_ID */
Also, I'm not sure if that was intended, but
CONFIG_LTL_MON_EVENTS_IMPLICIT gets compiled only with
CONFIG_LTL_MON_EVENTS_ID.
Thanks,
Gabriele
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 4/5] sched: Add rt task enqueue/dequeue trace points
2025-07-31 7:35 ` Nam Cao
@ 2025-07-31 8:39 ` Gabriele Monaco
2025-08-01 3:42 ` K Prateek Nayak
1 sibling, 0 replies; 32+ messages in thread
From: Gabriele Monaco @ 2025-07-31 8:39 UTC (permalink / raw)
To: Nam Cao
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel, Ingo Molnar, Peter Zijlstra,
Juri Lelli, Vincent Guittot, Dietmar Eggemann, Ben Segall,
Mel Gorman, Valentin Schneider
On Thu, 2025-07-31 at 09:35 +0200, Nam Cao wrote:
> On Wed, Jul 30, 2025 at 06:18:45PM +0200, Gabriele Monaco wrote:
> > Well, thinking about it again, these tracepoints might simplify
> > things
> > considerably when tasks change policy..
> >
> > Syscalls may fail, for that you could register to sys_exit and
> > check
> > the return value, but at that point the policy changed already, so
> > you
> > cannot tell if it's a relevant event or not (e.g. same policy).
> > Also sched_setscheduler_nocheck would be out of the picture here,
> > not
> > sure how recurrent that is though (and might not matter if you only
> > focus on userspace tasks).
> >
> > If you go down the route of adding tracepoints, why not have other
> > classes benefit too? I believe calling them from the enqueue_task /
> > dequeue_task in sched/core.c would allow you to easily filter out
> > by
> > policy anyway (haven't tested).
>
> Something like the untested patch below?
>
> Will you have a use case for it too? Then I will try to accommodate
> your use case, otherwise I will do just enough for my case.
Well, I'm still defining the best set of tracepoints I need, if you see
it cleaner go ahead the way you're currently doing, then.
Unless anyone else complains let's keep it like this.
Thanks,
Gabriele
>
> Nam
>
> diff --git a/include/trace/events/sched.h
> b/include/trace/events/sched.h
> index c38f12f7f903..b50668052f99 100644
> --- a/include/trace/events/sched.h
> +++ b/include/trace/events/sched.h
> @@ -906,6 +906,14 @@ DECLARE_TRACE(dequeue_task_rt,
> TP_PROTO(int cpu, struct task_struct *task),
> TP_ARGS(cpu, task));
>
> +DECLARE_TRACE(enqueue_task,
> + TP_PROTO(int cpu, struct task_struct *task),
> + TP_ARGS(cpu, task));
> +
> +DECLARE_TRACE(dequeue_task,
> + TP_PROTO(int cpu, struct task_struct *task),
> + TP_ARGS(cpu, task));
> +
> #endif /* _TRACE_SCHED_H */
>
> /* This part must be outside protection */
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index b485e0639616..2af90532982a 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -2077,6 +2077,8 @@ unsigned long get_wchan(struct task_struct *p)
>
> void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> {
> + trace_enqueue_task_tp(rq->cpu, p);
> +
> if (!(flags & ENQUEUE_NOCLOCK))
> update_rq_clock(rq);
>
> @@ -2103,6 +2105,8 @@ void enqueue_task(struct rq *rq, struct
> task_struct *p, int flags)
> */
> inline bool dequeue_task(struct rq *rq, struct task_struct *p, int
> flags)
> {
> + trace_dequeue_task_tp(rq->cpu, p);
> +
> if (sched_core_enabled(rq))
> sched_core_dequeue(rq, p, flags);
>
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 1/5] rv/ltl: Prepare for other monitor types
2025-07-30 12:45 ` [PATCH 1/5] rv/ltl: Prepare for other monitor types Nam Cao
@ 2025-07-31 9:04 ` Gabriele Monaco
2025-07-31 9:28 ` Nam Cao
0 siblings, 1 reply; 32+ messages in thread
From: Gabriele Monaco @ 2025-07-31 9:04 UTC (permalink / raw)
To: Nam Cao
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel
On Wed, 2025-07-30 at 14:45 +0200, Nam Cao wrote:
> rv/ltl_monitor.h is the template file used by all LTL monitors. But
> only per-task monitor is supported.
>
> No functional change intended.
>
> Signed-off-by: Nam Cao <namcao@linutronix.de>
> ---
> include/linux/rv.h | 1 +
> include/rv/ltl_monitor.h | 85 +++++++++++------
> --
> .../trace/rv/monitors/pagefault/pagefault.h | 2 +
> kernel/trace/rv/monitors/sleep/sleep.h | 2 +
> 4 files changed, 55 insertions(+), 35 deletions(-)
>
> diff --git a/include/linux/rv.h b/include/linux/rv.h
> index 14410a42faef..175438a22641 100644
> --- a/include/linux/rv.h
> +++ b/include/linux/rv.h
> @@ -28,6 +28,7 @@ struct da_monitor {
>
> #ifdef CONFIG_RV_LTL_MONITOR
>
> +#define LTL_TASK_MONITOR 0
I stole your solution to get rid of macros for the DA as well (might
post it after this merge window or with the next changes) and I'm
currently running with this:
diff --git a/include/linux/rv.h b/include/linux/rv.h
index 14410a42faef..6a7594080db1 100644
--- a/include/linux/rv.h
+++ b/include/linux/rv.h
@@ -13,6 +13,10 @@
#define MAX_DA_NAME_LEN 32
#define MAX_DA_RETRY_RACING_EVENTS 3
+#define RV_MON_GLOBAL 0
+#define RV_MON_PER_CPU 1
+#define RV_MON_PER_TASK 2
+
The numbers don't really matter and you don't need to implement all, of
course.
I'm not sure how are our patches going to coordinate, but I think it
may make sense to share those values for all monitor types.
What do you think?
For the rest this patch looks good to me, nice use of the typedef.
Reviewed-by: Gabriele Monaco <gmonaco@redhat.com>
Thanks,
Gabriele
^ permalink raw reply related [flat|nested] 32+ messages in thread
* Re: [PATCH 1/5] rv/ltl: Prepare for other monitor types
2025-07-31 9:04 ` Gabriele Monaco
@ 2025-07-31 9:28 ` Nam Cao
2025-07-31 10:14 ` Gabriele Monaco
0 siblings, 1 reply; 32+ messages in thread
From: Nam Cao @ 2025-07-31 9:28 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel
On Thu, Jul 31, 2025 at 11:04:44AM +0200, Gabriele Monaco wrote:
> I stole your solution to get rid of macros for the DA as well (might
> post it after this merge window or with the next changes) and I'm
> currently running with this:
Nice, glad you like it.
For global monitor, you could do
typdef struct {} monitor_target;
static monitor_target rv_global_target;
I didn't check clang, but gcc does not emit anything for this. So
effectively the compiled code does not have the "target" parameter.
> diff --git a/include/linux/rv.h b/include/linux/rv.h
> index 14410a42faef..6a7594080db1 100644
> --- a/include/linux/rv.h
> +++ b/include/linux/rv.h
> @@ -13,6 +13,10 @@
> #define MAX_DA_NAME_LEN 32
> #define MAX_DA_RETRY_RACING_EVENTS 3
>
> +#define RV_MON_GLOBAL 0
> +#define RV_MON_PER_CPU 1
> +#define RV_MON_PER_TASK 2
> +
>
> The numbers don't really matter and you don't need to implement all, of
> course.
That makes sense, will do.
> I'm not sure how are our patches going to coordinate,
Let's just post them. The one whose patches are not applied first will have
to rebase. It is a trivial rebase anyway.
Nam
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 1/5] rv/ltl: Prepare for other monitor types
2025-07-31 9:28 ` Nam Cao
@ 2025-07-31 10:14 ` Gabriele Monaco
0 siblings, 0 replies; 32+ messages in thread
From: Gabriele Monaco @ 2025-07-31 10:14 UTC (permalink / raw)
To: Nam Cao
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel
On Thu, 2025-07-31 at 11:28 +0200, Nam Cao wrote:
> On Thu, Jul 31, 2025 at 11:04:44AM +0200, Gabriele Monaco wrote:
> > I stole your solution to get rid of macros for the DA as well
> > (might
> > post it after this merge window or with the next changes) and I'm
> > currently running with this:
>
> Nice, glad you like it.
>
> For global monitor, you could do
>
> typdef struct {} monitor_target;
>
> static monitor_target rv_global_target;
>
Well, implicit monitors (cpu and global for DA) don't really have a
target but I'll probably be using this for other types if necessary or
in case I'm unifying things. Which might be nice, if it didn't require
modifying all per-cpu monitors (where CPU is not passed because the
current one is assumed).
> I didn't check clang, but gcc does not emit anything for this. So
> effectively the compiled code does not have the "target" parameter.
>
> > diff --git a/include/linux/rv.h b/include/linux/rv.h
> > index 14410a42faef..6a7594080db1 100644
> > --- a/include/linux/rv.h
> > +++ b/include/linux/rv.h
> > @@ -13,6 +13,10 @@
> > #define MAX_DA_NAME_LEN 32
> > #define MAX_DA_RETRY_RACING_EVENTS 3
> >
> > +#define RV_MON_GLOBAL 0
> > +#define RV_MON_PER_CPU 1
> > +#define RV_MON_PER_TASK 2
> > +
> >
> > The numbers don't really matter and you don't need to implement
> > all, of
> > course.
>
> That makes sense, will do.
>
> > I'm not sure how are our patches going to coordinate,
>
> Let's just post them. The one whose patches are not applied first
> will have to rebase. It is a trivial rebase anyway.
Sure then, git may be smart enough to see there aren't conflicts.
Thanks,
Gabriele
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 4/5] sched: Add rt task enqueue/dequeue trace points
2025-07-31 7:35 ` Nam Cao
2025-07-31 8:39 ` Gabriele Monaco
@ 2025-08-01 3:42 ` K Prateek Nayak
2025-08-01 7:29 ` Nam Cao
1 sibling, 1 reply; 32+ messages in thread
From: K Prateek Nayak @ 2025-08-01 3:42 UTC (permalink / raw)
To: Nam Cao, Gabriele Monaco
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel, Ingo Molnar, Peter Zijlstra,
Juri Lelli, Vincent Guittot, Dietmar Eggemann, Ben Segall,
Mel Gorman, Valentin Schneider
On 7/31/2025 1:05 PM, Nam Cao wrote:
> Something like the untested patch below?
>
> Will you have a use case for it too? Then I will try to accommodate your
> use case, otherwise I will do just enough for my case.
>
> Nam
>
> diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
> index c38f12f7f903..b50668052f99 100644
> --- a/include/trace/events/sched.h
> +++ b/include/trace/events/sched.h
> @@ -906,6 +906,14 @@ DECLARE_TRACE(dequeue_task_rt,
> TP_PROTO(int cpu, struct task_struct *task),
> TP_ARGS(cpu, task));
>
> +DECLARE_TRACE(enqueue_task,
> + TP_PROTO(int cpu, struct task_struct *task),
> + TP_ARGS(cpu, task));
> +
> +DECLARE_TRACE(dequeue_task,
> + TP_PROTO(int cpu, struct task_struct *task),
> + TP_ARGS(cpu, task));
> +
> #endif /* _TRACE_SCHED_H */
>
> /* This part must be outside protection */
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index b485e0639616..2af90532982a 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -2077,6 +2077,8 @@ unsigned long get_wchan(struct task_struct *p)
>
> void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> {
> + trace_enqueue_task_tp(rq->cpu, p);
> +
> if (!(flags & ENQUEUE_NOCLOCK))
> update_rq_clock(rq);
>
> @@ -2103,6 +2105,8 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> */
> inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
> {
> + trace_dequeue_task_tp(rq->cpu, p);
Just thinking out loud, putting this tracepoint here can lead to a
"dequeued -> dequeued" transition for fair task when they are in delayed
dequeue state.
dequeue_task(p)
trace_dequeue_task_tp(p) # First time
dequeue_task_fair(p)
p->se.delayed = 1
...
<sched_switch> # p is still delayed
...
sched_setscheduler(p)
if (prev_class != next_class && p->se.sched_delayed)
dequeue_task(p, DEQUEUE_DELAYED);
trace_dequeue_task_tp(p) # Second time
It is not an issue as such but it might come as a surprise if users are
expecting a behavior like below which would be the case for !fair task
currently (and for all tasks before v6.12):
digraph state_automaton {
center = true;
size = "7,11";
{node [shape = plaintext, style=invis, label=""] "__init_enqueue_dequeue_cycle"};
{node [shape = ellipse] "enqueued"};
{node [shape = ellipse] "dequeued"};
"__init_enqueue_dequeue_cycle" -> "enqueued";
"__init_enqueue_dequeue_cycle" -> "dequeued";
"enqueued" [label = "enqueued", color = green3];
"enqueued" -> "dequeued" [ label = "dequeue_task" ];
"dequeued" [label = "dequeued", color = red];
"dequeued" -> "enqueued" [ label = "enqueue_task" ];
{ rank = min ;
"__init_enqueue_dequeue_cycle";
"dequeued";
"enqueued";
}
}
Another:
"dequeued" -> "dequeued" [ label = "dequeue_task" ];
edge would be needed in that case for >= v6.12. It is probably nothing
and can be easily handled by the users if they run into it but just
putting it out there for the record in case you only want to consider a
complete dequeue as "dequeued". Feel free to ignore since I'm completely
out of my depth when it comes to the usage of RV in the field :)
> +
> if (sched_core_enabled(rq))
> sched_core_dequeue(rq, p, flags);
>
>
--
Thanks and Regards,
Prateek
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 2/5] rv/ltl: Support per-cpu monitors
2025-07-31 8:02 ` Gabriele Monaco
@ 2025-08-01 6:26 ` Nam Cao
0 siblings, 0 replies; 32+ messages in thread
From: Nam Cao @ 2025-08-01 6:26 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel
On Thu, Jul 31, 2025 at 10:02:19AM +0200, Gabriele Monaco wrote:
> You don't really need to follow to the ID/IMPLICIT convention here.
>
> These LTL per-cpu monitors are, in fact, not implicit since they do
> have an id (the CPU), implicit makes sense with the current
> implementation of da_get_monitor that uses the current CPU (doesn't
> have to stay that way, but there was no need to change so far).
>
> If you don't want to get rid of the task's comm in the tracepoint (and
> unify both with an integer id, like with DA), I'd suggest you use
> different names like CONFIG_LTL_MON_EVENTS_TASK (in fact that doesn't
> just have an ID) and CONFIG_LTL_MON_EVENTS_CPU (or even
> CONFIG_LTL_MON_EVENTS_ID, for this it actually makes sense).
>
> I'd prefer it as general as possible to ease new monitor types, but to
> be real picky the LTLs per-task are not ID and the per-cpu are not
> IMPLICIT.
>
> The id field is what the rv userspace tool uses to differentiate
> monitor types, by the way.
Ah, these names didn't make sense to me. But DA use them, so I was
"whatever".
Thanks for the explanation, let's use the _CPU names instead.
> > +#endif /* CONFIG_LTL_MON_EVENTS_IMPLICIT */
> > +
> > #endif /* CONFIG_LTL_MON_EVENTS_ID */
>
> Also, I'm not sure if that was intended, but
> CONFIG_LTL_MON_EVENTS_IMPLICIT gets compiled only with
> CONFIG_LTL_MON_EVENTS_ID.
That was certainly not intended.
Nam
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 4/5] sched: Add rt task enqueue/dequeue trace points
2025-08-01 3:42 ` K Prateek Nayak
@ 2025-08-01 7:29 ` Nam Cao
2025-08-01 9:56 ` K Prateek Nayak
0 siblings, 1 reply; 32+ messages in thread
From: Nam Cao @ 2025-08-01 7:29 UTC (permalink / raw)
To: K Prateek Nayak
Cc: Gabriele Monaco, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, linux-trace-kernel, linux-kernel, Ingo Molnar,
Peter Zijlstra, Juri Lelli, Vincent Guittot, Dietmar Eggemann,
Ben Segall, Mel Gorman, Valentin Schneider
On Fri, Aug 01, 2025 at 09:12:08AM +0530, K Prateek Nayak wrote:
> Just thinking out loud, putting this tracepoint here can lead to a
> "dequeued -> dequeued" transition for fair task when they are in delayed
> dequeue state.
>
> dequeue_task(p)
> trace_dequeue_task_tp(p) # First time
> dequeue_task_fair(p)
> p->se.delayed = 1
> ...
> <sched_switch> # p is still delayed
> ...
> sched_setscheduler(p)
> if (prev_class != next_class && p->se.sched_delayed)
> dequeue_task(p, DEQUEUE_DELAYED);
> trace_dequeue_task_tp(p) # Second time
>
> It is not an issue as such but it might come as a surprise if users are
> expecting a behavior like below which would be the case for !fair task
> currently (and for all tasks before v6.12):
>
> digraph state_automaton {
> center = true;
> size = "7,11";
> {node [shape = plaintext, style=invis, label=""] "__init_enqueue_dequeue_cycle"};
> {node [shape = ellipse] "enqueued"};
> {node [shape = ellipse] "dequeued"};
> "__init_enqueue_dequeue_cycle" -> "enqueued";
> "__init_enqueue_dequeue_cycle" -> "dequeued";
> "enqueued" [label = "enqueued", color = green3];
> "enqueued" -> "dequeued" [ label = "dequeue_task" ];
> "dequeued" [label = "dequeued", color = red];
> "dequeued" -> "enqueued" [ label = "enqueue_task" ];
> { rank = min ;
> "__init_enqueue_dequeue_cycle";
> "dequeued";
> "enqueued";
> }
> }
>
>
> Another:
>
> "dequeued" -> "dequeued" [ label = "dequeue_task" ];
>
> edge would be needed in that case for >= v6.12. It is probably nothing
> and can be easily handled by the users if they run into it but just
> putting it out there for the record in case you only want to consider a
> complete dequeue as "dequeued". Feel free to ignore since I'm completely
> out of my depth when it comes to the usage of RV in the field :)
Ah, thanks for pointing this out. I do want to only consider complete
dequeue as "dequeued".
These tracepoints are not visible from userspace, and RV does not care
about enqueue/dequeue of fair tasks at the moment, so it is not a problem
for now. But as a precaution, I trust the below patch will do.
Nam
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index c38f12f7f903..b50668052f99 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -906,6 +906,14 @@ DECLARE_TRACE(dequeue_task_rt,
TP_PROTO(int cpu, struct task_struct *task),
TP_ARGS(cpu, task));
+DECLARE_TRACE(enqueue_task,
+ TP_PROTO(int cpu, struct task_struct *task),
+ TP_ARGS(cpu, task));
+
+DECLARE_TRACE(dequeue_task,
+ TP_PROTO(int cpu, struct task_struct *task),
+ TP_ARGS(cpu, task));
+
#endif /* _TRACE_SCHED_H */
/* This part must be outside protection */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b485e0639616..553c08a63395 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2077,6 +2077,8 @@ unsigned long get_wchan(struct task_struct *p)
void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
{
+ trace_enqueue_task_tp(rq->cpu, p);
+
if (!(flags & ENQUEUE_NOCLOCK))
update_rq_clock(rq);
@@ -2119,7 +2121,11 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
* and mark the task ->sched_delayed.
*/
uclamp_rq_dec(rq, p);
- return p->sched_class->dequeue_task(rq, p, flags);
+ if (p->sched_class->dequeue_task(rq, p, flags)) {
+ trace_dequeue_task_tp(rq->cpu, p);
+ return true;
+ }
+ return false;
}
void activate_task(struct rq *rq, struct task_struct *p, int flags)
^ permalink raw reply related [flat|nested] 32+ messages in thread
* Re: [PATCH 5/5] rv: Add rts monitor
2025-07-31 7:47 ` Gabriele Monaco
@ 2025-08-01 7:58 ` Nam Cao
2025-08-01 9:14 ` Gabriele Monaco
0 siblings, 1 reply; 32+ messages in thread
From: Nam Cao @ 2025-08-01 7:58 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel, Ingo Molnar, Peter Zijlstra,
Juri Lelli, Vincent Guittot, Dietmar Eggemann, Ben Segall,
Mel Gorman, Valentin Schneider
On Thu, Jul 31, 2025 at 09:47:10AM +0200, Gabriele Monaco wrote:
> On Wed, 2025-07-30 at 14:45 +0200, Nam Cao wrote:
> > Add "real-time scheduling" monitor, which validates that SCHED_RR and
> > SCHED_FIFO tasks are scheduled before tasks with normal and
> > extensible
> > scheduling policies
> >
>
> Looks a very interesting monitor!
> A few questions:
>
> I assume this works with rt-throttle because it implies a dequeue,
> right?
> And you probably won't see that without explicit tracepoints..
It does work properly with rt-throttling:
root@yellow:~# ./rt-loop
[ 74.357730] sched: RT throttling activated
[ 74.357745] rv: rts: 0: violation detected
Looking at rt-throlling code, it does not dequeue tasks, only does
rt_rq->rt_throttled = 1;
rt_rq->rt_queued = 0;
so we are fine.
> > + /*
> > + * This may not be accurate, there may be enqueued RT tasks.
> > But
> > that's
> > + * okay, the worst we get is a false negative. It will be
> > accurate
> > as
> > + * soon as the CPU no longer has any queued RT task.
> > + */
> > + ltl_atom_set(mon, LTL_RT_TASK_ENQUEUED, false);
> >
>
> As far as I understand here the monitor would just miss RT tasks
> already running but would perfectly enforce the ones starting after
> initialisation, right?
Not exactly. What could happen is that:
- RT task A already running
- monitor enabled. The monitor isn't aware of task A, therefore it allows
sched_switch to switch to non-RT task.
- RT task B is queued. The monitor now knows at least one RT task is
enqueued, so it disallows sched_switch to switch to non-RT.
- RT task A is dequeued. However, the monitor does not differentiate task
A and task B, therefore I thinks the only enqueued RT task is now gone.
- So now we have task B started after the monitor, but the monitor does
not check it.
The monitor will become accurate once the CPU has no enqueued RT task,
which should happen quite quickly on a sane setup where RT tasks do not
monopoly the CPU.
The monitor could be changed to be accurate from the get-go, by looking at
how many enqueued RT tasks are present. I *think* rt_rq->rt_nr_running
works. But I think the current implementation is fine, so not worth
thinking too much about it.
> > +RULE = always (RT_TASK_ENQUEUED imply SCHEDULE_RT_NEXT)
> > +
> > +SCHEDULE_RT_NEXT = (not SCHED_SWITCH) until (SCHED_SWITCH_RT or
> > EXCEPTIONS)
> > +
> > +EXCEPTIONS = SCHED_SWITCH_DL or not RT_TASK_ENQUEUED
>
> This monitor allows non-RT tasks to run indefinitely before the switch,
> only when it happens, RT must run, right?
Yes.
> Not sure you can do much about it though. (without falling into the
> need resched rabbithole I was trying to untangle)
I would need to look into scheduler code, maybe we could check that the
next scheduler tick implies a sched_switch. Another day.
Nam
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 5/5] rv: Add rts monitor
2025-08-01 7:58 ` Nam Cao
@ 2025-08-01 9:14 ` Gabriele Monaco
2025-08-04 6:05 ` Nam Cao
0 siblings, 1 reply; 32+ messages in thread
From: Gabriele Monaco @ 2025-08-01 9:14 UTC (permalink / raw)
To: Nam Cao
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel, Ingo Molnar, Peter Zijlstra,
Juri Lelli, Vincent Guittot, Dietmar Eggemann, Ben Segall,
Mel Gorman, Valentin Schneider
On Fri, 2025-08-01 at 09:58 +0200, Nam Cao wrote:
> On Thu, Jul 31, 2025 at 09:47:10AM +0200, Gabriele Monaco wrote:
> > On Wed, 2025-07-30 at 14:45 +0200, Nam Cao wrote:
> > > Add "real-time scheduling" monitor, which validates that SCHED_RR
> > > and SCHED_FIFO tasks are scheduled before tasks with normal and
> > > extensible scheduling policies
> >
> > Looks a very interesting monitor!
> > A few questions:
> >
> > I assume this works with rt-throttle because it implies a dequeue,
> > right?
> > And you probably won't see that without explicit tracepoints..
>
> It does work properly with rt-throttling:
> root@yellow:~# ./rt-loop
> [ 74.357730] sched: RT throttling activated
> [ 74.357745] rv: rts: 0: violation detected
>
> Looking at rt-throlling code, it does not dequeue tasks, only does
> rt_rq->rt_throttled = 1;
> rt_rq->rt_queued = 0;
>
> so we are fine.
Wait, by /works properly/ you mean it reports a violation. I just
noticed you mention it in the description.
It's reasonable to request RT throttling disabled on sanely configured
RT systems. But throttling is a /valid/ kernel feature, I get you mark
it as /unwanted/ though.
I guess if that's the case, this monitor doesn't belong in the sched
collection because it's meant to validate the kernel behaviour, not its
configuration for a specific purpose (RT).
Isn't it better off with the rtapp ones (which validate the system
configuration to run in an RT scenario).
Does it make sense to you?
> >
> > As far as I understand here the monitor would just miss RT tasks
> > already running but would perfectly enforce the ones starting after
> > initialisation, right?
>
> Not exactly. What could happen is that:
>
> - RT task A already running
>
> - monitor enabled. The monitor isn't aware of task A, therefore it
> allows
> sched_switch to switch to non-RT task.
>
> - RT task B is queued. The monitor now knows at least one RT task is
> enqueued, so it disallows sched_switch to switch to non-RT.
>
> - RT task A is dequeued. However, the monitor does not differentiate
> task
> A and task B, therefore I thinks the only enqueued RT task is now
> gone.
>
> - So now we have task B started after the monitor, but the monitor
> does not check it.
>
> The monitor will become accurate once the CPU has no enqueued RT
> task, which should happen quite quickly on a sane setup where RT
> tasks do not monopoly the CPU.
>
> The monitor could be changed to be accurate from the get-go, by
> looking at how many enqueued RT tasks are present. I *think* rt_rq-
> >rt_nr_running works. But I think the current implementation is
> fine, so not worth thinking too much about it.
Yeah if it's something quickly reached it shouldn't be a problem, also
rt throttle would run in case there's an RT monopoly and you'd see a
violation already.
> >
> > Not sure you can do much about it though. (without falling into the
> > need resched rabbithole I was trying to untangle)
>
> I would need to look into scheduler code, maybe we could check that
> the next scheduler tick implies a sched_switch. Another day.
Agree, the monitor looks good for now.
I still want to give it a run when I have a bit more time, besides with
RT throttle, can the monitor really fail on a working system?
Thanks,
Gabriele
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 4/5] sched: Add rt task enqueue/dequeue trace points
2025-08-01 7:29 ` Nam Cao
@ 2025-08-01 9:56 ` K Prateek Nayak
2025-08-01 11:04 ` Gabriele Monaco
0 siblings, 1 reply; 32+ messages in thread
From: K Prateek Nayak @ 2025-08-01 9:56 UTC (permalink / raw)
To: Nam Cao
Cc: Gabriele Monaco, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, linux-trace-kernel, linux-kernel, Ingo Molnar,
Peter Zijlstra, Juri Lelli, Vincent Guittot, Dietmar Eggemann,
Ben Segall, Mel Gorman, Valentin Schneider
Hello Nam,
On 8/1/2025 12:59 PM, Nam Cao wrote:
> On Fri, Aug 01, 2025 at 09:12:08AM +0530, K Prateek Nayak wrote:
>> Just thinking out loud, putting this tracepoint here can lead to a
>> "dequeued -> dequeued" transition for fair task when they are in delayed
>> dequeue state.
>>
>> dequeue_task(p)
>> trace_dequeue_task_tp(p) # First time
>> dequeue_task_fair(p)
>> p->se.delayed = 1
>> ...
>> <sched_switch> # p is still delayed
>> ...
>> sched_setscheduler(p)
>> if (prev_class != next_class && p->se.sched_delayed)
>> dequeue_task(p, DEQUEUE_DELAYED);
>> trace_dequeue_task_tp(p) # Second time
>>
>> It is not an issue as such but it might come as a surprise if users are
>> expecting a behavior like below which would be the case for !fair task
>> currently (and for all tasks before v6.12):
>>
>> digraph state_automaton {
>> center = true;
>> size = "7,11";
>> {node [shape = plaintext, style=invis, label=""] "__init_enqueue_dequeue_cycle"};
>> {node [shape = ellipse] "enqueued"};
>> {node [shape = ellipse] "dequeued"};
>> "__init_enqueue_dequeue_cycle" -> "enqueued";
>> "__init_enqueue_dequeue_cycle" -> "dequeued";
>> "enqueued" [label = "enqueued", color = green3];
>> "enqueued" -> "dequeued" [ label = "dequeue_task" ];
>> "dequeued" [label = "dequeued", color = red];
>> "dequeued" -> "enqueued" [ label = "enqueue_task" ];
>> { rank = min ;
>> "__init_enqueue_dequeue_cycle";
>> "dequeued";
>> "enqueued";
>> }
>> }
>>
>>
>> Another:
>>
>> "dequeued" -> "dequeued" [ label = "dequeue_task" ];
>>
>> edge would be needed in that case for >= v6.12. It is probably nothing
>> and can be easily handled by the users if they run into it but just
>> putting it out there for the record in case you only want to consider a
>> complete dequeue as "dequeued". Feel free to ignore since I'm completely
>> out of my depth when it comes to the usage of RV in the field :)
>
> Ah, thanks for pointing this out. I do want to only consider complete
> dequeue as "dequeued".
>
> These tracepoints are not visible from userspace, and RV does not care
> about enqueue/dequeue of fair tasks at the moment, so it is not a problem
> for now. But as a precaution, I trust the below patch will do.
There are a few more cases with delayed dequeue:
1. A delayed task can be reenqueued before it is completely dequeued and
will lead to a enqueue -> enqueue transition if we don't trace the
first dequeue.
2. There are cases in set_user_nice() and __sched_setscheduler() where
a delayed task is dequeued in delayed state and be put back in the
cfs_rq in delayed state - an attempt to handle 1. can trip this.
Best I could think of is the below diff on top of your suggestion where
a "delayed -> reenqueue" is skipped but the case 2. is captures in case
one needs to inspect some properties from set_user_nice() /
__sched_setscheduler():
(only build tested on top of the diff you had pasted)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9598984bee8d..1fc5a97bba6b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2071,7 +2071,8 @@ unsigned long get_wchan(struct task_struct *p)
void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
{
- trace_enqueue_task_tp(rq->cpu, p);
+ if (!p->se.sched_delayed || !move_entity(flags))
+ trace_enqueue_task_tp(rq->cpu, p);
if (!(flags & ENQUEUE_NOCLOCK))
update_rq_clock(rq);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b173a059315c..1e2a636d6804 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5444,7 +5444,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* put back on, and if we advance min_vruntime, we'll be placed back
* further than we started -- i.e. we'll be penalized.
*/
- if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
+ if (move_entity(flags))
update_min_vruntime(cfs_rq);
if (flags & DEQUEUE_DELAYED)
@@ -7054,6 +7054,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
/* Fix-up what dequeue_task_fair() skipped */
hrtick_update(rq);
+ trace_dequeue_task_tp(rq->cpu, p);
/*
* Fix-up what block_task() skipped.
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 7936d4333731..33897d35744a 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1196,19 +1196,6 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
dec_rt_group(rt_se, rt_rq);
}
-/*
- * Change rt_se->run_list location unless SAVE && !MOVE
- *
- * assumes ENQUEUE/DEQUEUE flags match
- */
-static inline bool move_entity(unsigned int flags)
-{
- if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
- return false;
-
- return true;
-}
-
static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
{
list_del_init(&rt_se->run_list);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d3f33d10c58c..37730cd834ba 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2361,6 +2361,20 @@ extern const u32 sched_prio_to_wmult[40];
#define RETRY_TASK ((void *)-1UL)
+/*
+ * Checks for a SAVE/RESTORE without MOVE. Returns false if
+ * SAVE and !MOVE.
+ *
+ * Assumes ENQUEUE/DEQUEUE flags match.
+ */
+static inline bool move_entity(unsigned int flags)
+{
+ if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
+ return false;
+
+ return true;
+}
+
struct affinity_context {
const struct cpumask *new_mask;
struct cpumask *user_mask;
---
Thoughts?
--
Thanks and Regards,
Prateek
P.S. move_entity() probably requires a better naming and perhaps can
even be simplified. I wrote out the below table just to convince myself
to reuse move_entity() in fair.c
flags contains (SAVE | MOVE) (SAVE | MOVE)
== SAVE != SAVE
neiter false true
SAVE true false
MOVE false true
SAVE | MOVE false true
>
> Nam
>
> diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
> index c38f12f7f903..b50668052f99 100644
> --- a/include/trace/events/sched.h
> +++ b/include/trace/events/sched.h
> @@ -906,6 +906,14 @@ DECLARE_TRACE(dequeue_task_rt,
> TP_PROTO(int cpu, struct task_struct *task),
> TP_ARGS(cpu, task));
>
> +DECLARE_TRACE(enqueue_task,
> + TP_PROTO(int cpu, struct task_struct *task),
> + TP_ARGS(cpu, task));
> +
> +DECLARE_TRACE(dequeue_task,
> + TP_PROTO(int cpu, struct task_struct *task),
> + TP_ARGS(cpu, task));
> +
> #endif /* _TRACE_SCHED_H */
>
> /* This part must be outside protection */
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index b485e0639616..553c08a63395 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -2077,6 +2077,8 @@ unsigned long get_wchan(struct task_struct *p)
>
> void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> {
> + trace_enqueue_task_tp(rq->cpu, p);
> +
> if (!(flags & ENQUEUE_NOCLOCK))
> update_rq_clock(rq);
>
> @@ -2119,7 +2121,11 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
> * and mark the task ->sched_delayed.
> */
> uclamp_rq_dec(rq, p);
> - return p->sched_class->dequeue_task(rq, p, flags);
> + if (p->sched_class->dequeue_task(rq, p, flags)) {
> + trace_dequeue_task_tp(rq->cpu, p);
> + return true;
> + }
> + return false;
> }
>
> void activate_task(struct rq *rq, struct task_struct *p, int flags)
^ permalink raw reply related [flat|nested] 32+ messages in thread
* Re: [PATCH 4/5] sched: Add rt task enqueue/dequeue trace points
2025-08-01 9:56 ` K Prateek Nayak
@ 2025-08-01 11:04 ` Gabriele Monaco
2025-08-04 3:07 ` K Prateek Nayak
0 siblings, 1 reply; 32+ messages in thread
From: Gabriele Monaco @ 2025-08-01 11:04 UTC (permalink / raw)
To: K Prateek Nayak, Nam Cao
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel, Ingo Molnar, Peter Zijlstra,
Juri Lelli, Vincent Guittot, Dietmar Eggemann, Ben Segall,
Mel Gorman, Valentin Schneider
On Fri, 2025-08-01 at 15:26 +0530, K Prateek Nayak wrote:
> There are a few more cases with delayed dequeue:
>
> 1. A delayed task can be reenqueued before it is completely dequeued
> and
> will lead to a enqueue -> enqueue transition if we don't trace the
> first dequeue.
>
> 2. There are cases in set_user_nice() and __sched_setscheduler()
> where
> a delayed task is dequeued in delayed state and be put back in the
> cfs_rq in delayed state - an attempt to handle 1. can trip this.
>
> Best I could think of is the below diff on top of your suggestion
> where
> a "delayed -> reenqueue" is skipped but the case 2. is captures in
> case
> one needs to inspect some properties from set_user_nice() /
> __sched_setscheduler():
>
> (only build tested on top of the diff you had pasted)
>
Hello Prateek,
thanks for the comments, this looks much more convoluted than I would
have expected.
As Nam pointed out, currently RV is not going to rely on those events
for fair tasks (existing monitors are fine with tracepoints like
wakeup/set_state/switch).
For the time being it might be better just add the tracepoints in the
RT enqueue/dequeue (the only needed for this series) and not complicate
things.
At most we may want to keep tracepoint names general, allowing the
tracing call to be added later to other locations (or moved to a
general one) without changing too much, besides existing users.
And perhaps a comment saying the tracepoints are currently only
supported on RT would do.
Anyway, that's your call Nam, I'm fine with your initial proposal as
well, unless some scheduler guys complain.
Thanks,
Gabriele
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 9598984bee8d..1fc5a97bba6b 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -2071,7 +2071,8 @@ unsigned long get_wchan(struct task_struct *p)
>
> void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> {
> - trace_enqueue_task_tp(rq->cpu, p);
> + if (!p->se.sched_delayed || !move_entity(flags))
> + trace_enqueue_task_tp(rq->cpu, p);
>
> if (!(flags & ENQUEUE_NOCLOCK))
> update_rq_clock(rq);
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index b173a059315c..1e2a636d6804 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5444,7 +5444,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct
> sched_entity *se, int flags)
> * put back on, and if we advance min_vruntime, we'll be
> placed back
> * further than we started -- i.e. we'll be penalized.
> */
> - if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
> + if (move_entity(flags))
> update_min_vruntime(cfs_rq);
>
> if (flags & DEQUEUE_DELAYED)
> @@ -7054,6 +7054,7 @@ static int dequeue_entities(struct rq *rq,
> struct sched_entity *se, int flags)
>
> /* Fix-up what dequeue_task_fair() skipped */
> hrtick_update(rq);
> + trace_dequeue_task_tp(rq->cpu, p);
>
> /*
> * Fix-up what block_task() skipped.
> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
> index 7936d4333731..33897d35744a 100644
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -1196,19 +1196,6 @@ void dec_rt_tasks(struct sched_rt_entity
> *rt_se, struct rt_rq *rt_rq)
> dec_rt_group(rt_se, rt_rq);
> }
>
> -/*
> - * Change rt_se->run_list location unless SAVE && !MOVE
> - *
> - * assumes ENQUEUE/DEQUEUE flags match
> - */
> -static inline bool move_entity(unsigned int flags)
> -{
> - if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
> - return false;
> -
> - return true;
> -}
> -
> static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct
> rt_prio_array *array)
> {
> list_del_init(&rt_se->run_list);
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index d3f33d10c58c..37730cd834ba 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2361,6 +2361,20 @@ extern const
> u32 sched_prio_to_wmult[40];
>
> #define RETRY_TASK ((void *)-1UL)
>
> +/*
> + * Checks for a SAVE/RESTORE without MOVE. Returns false if
> + * SAVE and !MOVE.
> + *
> + * Assumes ENQUEUE/DEQUEUE flags match.
> + */
> +static inline bool move_entity(unsigned int flags)
> +{
> + if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
> + return false;
> +
> + return true;
> +}
> +
> struct affinity_context {
> const struct cpumask *new_mask;
> struct cpumask *user_mask;
> ---
>
> Thoughts?
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 4/5] sched: Add rt task enqueue/dequeue trace points
2025-08-01 11:04 ` Gabriele Monaco
@ 2025-08-04 3:07 ` K Prateek Nayak
2025-08-04 5:49 ` Nam Cao
0 siblings, 1 reply; 32+ messages in thread
From: K Prateek Nayak @ 2025-08-04 3:07 UTC (permalink / raw)
To: Gabriele Monaco, Nam Cao
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel, Ingo Molnar, Peter Zijlstra,
Juri Lelli, Vincent Guittot, Dietmar Eggemann, Ben Segall,
Mel Gorman, Valentin Schneider
Hello Gabriele,
On 8/1/2025 4:34 PM, Gabriele Monaco wrote:
> On Fri, 2025-08-01 at 15:26 +0530, K Prateek Nayak wrote:
>> There are a few more cases with delayed dequeue:
>>
>> 1. A delayed task can be reenqueued before it is completely dequeued
>> and
>> will lead to a enqueue -> enqueue transition if we don't trace the
>> first dequeue.
>>
>> 2. There are cases in set_user_nice() and __sched_setscheduler()
>> where
>> a delayed task is dequeued in delayed state and be put back in the
>> cfs_rq in delayed state - an attempt to handle 1. can trip this.
>>
>> Best I could think of is the below diff on top of your suggestion
>> where
>> a "delayed -> reenqueue" is skipped but the case 2. is captures in
>> case
>> one needs to inspect some properties from set_user_nice() /
>> __sched_setscheduler():
>>
>> (only build tested on top of the diff you had pasted)
>>
>
> Hello Prateek,
>
> thanks for the comments, this looks much more convoluted than I would
> have expected.
> As Nam pointed out, currently RV is not going to rely on those events
> for fair tasks (existing monitors are fine with tracepoints like
> wakeup/set_state/switch).
>
> For the time being it might be better just add the tracepoints in the
> RT enqueue/dequeue (the only needed for this series) and not complicate
> things.
>
> At most we may want to keep tracepoint names general, allowing the
> tracing call to be added later to other locations (or moved to a
> general one) without changing too much, besides existing users.
> And perhaps a comment saying the tracepoints are currently only
> supported on RT would do.
Most of my comments was just thinking out loud around fair tasks being
delayed on the dequeue path. If RV filters out RT tasks and the use-case
one concerns them, then Nam's suggestion is good.
I was just being cautious of folks expecting a "enqueued <--> dequeued"
transition for *all* tasks and finding it doesn't hold after delayed
dequeue. Since these are internal tracepoints, I'm sure folks using them
with RV would do their due diligence when testing these monitors before
deployment.
>
> Anyway, that's your call Nam, I'm fine with your initial proposal as
> well, unless some scheduler guys complain.
I would be happy to help test alternate approaches if others have
concerns around delayed dequeue but for all other cases, Nam's approach
looks good. Sorry if my paranoia caused you folks any trouble!
--
Thanks and Regards,
Prateek
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 4/5] sched: Add rt task enqueue/dequeue trace points
2025-08-04 3:07 ` K Prateek Nayak
@ 2025-08-04 5:49 ` Nam Cao
0 siblings, 0 replies; 32+ messages in thread
From: Nam Cao @ 2025-08-04 5:49 UTC (permalink / raw)
To: K Prateek Nayak, Gabriele Monaco
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel, Ingo Molnar, Peter Zijlstra,
Juri Lelli, Vincent Guittot, Dietmar Eggemann, Ben Segall,
Mel Gorman, Valentin Schneider
K Prateek Nayak <kprateek.nayak@amd.com> writes:
> Hello Gabriele,
>
> On 8/1/2025 4:34 PM, Gabriele Monaco wrote:
>> Hello Prateek,
>>
>> thanks for the comments, this looks much more convoluted than I would
>> have expected.
>> As Nam pointed out, currently RV is not going to rely on those events
>> for fair tasks (existing monitors are fine with tracepoints like
>> wakeup/set_state/switch).
>>
>> For the time being it might be better just add the tracepoints in the
>> RT enqueue/dequeue (the only needed for this series) and not complicate
>> things.
>>
>> At most we may want to keep tracepoint names general, allowing the
>> tracing call to be added later to other locations (or moved to a
>> general one) without changing too much, besides existing users.
>> And perhaps a comment saying the tracepoints are currently only
>> supported on RT would do.
>
> Most of my comments was just thinking out loud around fair tasks being
> delayed on the dequeue path. If RV filters out RT tasks and the use-case
> one concerns them, then Nam's suggestion is good.
>
> I was just being cautious of folks expecting a "enqueued <--> dequeued"
> transition for *all* tasks and finding it doesn't hold after delayed
> dequeue. Since these are internal tracepoints, I'm sure folks using them
> with RV would do their due diligence when testing these monitors before
> deployment.
>
>>
>> Anyway, that's your call Nam, I'm fine with your initial proposal as
>> well, unless some scheduler guys complain.
>
> I would be happy to help test alternate approaches if others have
> concerns around delayed dequeue but for all other cases, Nam's approach
> looks good. Sorry if my paranoia caused you folks any trouble!
No trouble at all, it was all helpful comments.
I agree with Gabriele, it is not important right now, so I will stick to
the latest diff I sent. Leaving it to the poor soul who needs this for
fair tasks to figure it out (which will probably be future me).
Thanks for the insights,
Nam
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 5/5] rv: Add rts monitor
2025-08-01 9:14 ` Gabriele Monaco
@ 2025-08-04 6:05 ` Nam Cao
0 siblings, 0 replies; 32+ messages in thread
From: Nam Cao @ 2025-08-04 6:05 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel, Ingo Molnar, Peter Zijlstra,
Juri Lelli, Vincent Guittot, Dietmar Eggemann, Ben Segall,
Mel Gorman, Valentin Schneider
Gabriele Monaco <gmonaco@redhat.com> writes:
> Wait, by /works properly/ you mean it reports a violation. I just
> noticed you mention it in the description.
>
> It's reasonable to request RT throttling disabled on sanely configured
> RT systems. But throttling is a /valid/ kernel feature, I get you mark
> it as /unwanted/ though.
True.
> I guess if that's the case, this monitor doesn't belong in the sched
> collection because it's meant to validate the kernel behaviour, not its
> configuration for a specific purpose (RT).
> Isn't it better off with the rtapp ones (which validate the system
> configuration to run in an RT scenario).
>
> Does it make sense to you?
Yeah I was a bit unsure where to put this monitor. But under rtapp makes
sense, if you prefer it there.
> I still want to give it a run when I have a bit more time, besides with
> RT throttle, can the monitor really fail on a working system?
RT throttling and fair deadline server are the only two known mechanisms
which would fail the monitor.
In the future, there may also be sched_ext deadline server:
https://lore.kernel.org/all/20250702232944.3221001-1-joelagnelf@nvidia.com/#t
They exist for good reasons, but they are also a problem to real-time
tasks. I am posting this monitor because we did a cyclic test the other
day and observed some big latencies, and we had no idea why. It turned
out it was the fair deadline server. So we need this monitor to tell us
if some similar mechanisms exist or will appear in the future.
If you try the monitor and see problems, let me know. Most likely it
would be a flaw in the monitor, but there is also a chance there is
another throttling mechanism we are not yet aware of.
Nam
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 5/5] rv: Add rts monitor
2025-07-30 12:45 ` [PATCH 5/5] rv: Add rts monitor Nam Cao
2025-07-31 7:47 ` Gabriele Monaco
@ 2025-08-05 8:40 ` Gabriele Monaco
2025-08-05 12:22 ` Nam Cao
1 sibling, 1 reply; 32+ messages in thread
From: Gabriele Monaco @ 2025-08-05 8:40 UTC (permalink / raw)
To: Nam Cao, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel
On Wed, 2025-07-30 at 14:45 +0200, Nam Cao wrote:
> Add "real-time scheduling" monitor, which validates that SCHED_RR and
> SCHED_FIFO tasks are scheduled before tasks with normal and
> extensible
> scheduling policies
>
> Signed-off-by: Nam Cao <namcao@linutronix.de>
Hello Nam,
I just built and booted up the monitor in a VM (virtme-ng), the
configuration has preemptirq tracepoints and all monitors so far (as we
have seen earlier, it doesn't build if rtapp monitors are not there
because of the circular dependency in the tracepoints).
All I did was to enable the monitor and printk reactor, but I get a
whole lot of errors (as in, I need to quit the VM for it to stop):
[ 1537.699834] rv: rts: 7: violation detected
[ 1537.699930] rv: rts: 3: violation detected
[ 1537.701827] rv: rts: 6: violation detected
[ 1537.704894] rv: rts: 0: violation detected
[ 1537.704925] rv: rts: 0: violation detected
[ 1537.704988] rv: rts: 3: violation detected
[ 1537.705019] rv: rts: 3: violation detected
[ 1537.705998] rv: rts: 0: violation detected
[ 1537.706024] rv: rts: 0: violation detected
[ 1537.709875] rv: rts: 6: violation detected
[ 1537.709921] rv: rts: 6: violation detected
[ 1537.711241] rv: rts: 6: violation detected
Curiously enough, I only see those CPUs (0, 3, 6 and 7).
Other runs have different CPUs but always a small subset (e.g. 10-15,
6-7 only 2).
It doesn't always occur but enabling/disabling the monitor might help
triggering it.
Any idea what is happening?
Thanks,
Gabriele
> ---
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Juri Lelli <juri.lelli@redhat.com>
> Cc: Vincent Guittot <vincent.guittot@linaro.org>
> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
> Cc: Ben Segall <bsegall@google.com>
> Cc: Mel Gorman <mgorman@suse.de>
> Cc: Valentin Schneider <vschneid@redhat.com>
> ---
> Documentation/trace/rv/monitor_sched.rst | 19 ++++
> kernel/trace/rv/Kconfig | 1 +
> kernel/trace/rv/Makefile | 1 +
> kernel/trace/rv/monitors/rts/Kconfig | 17 +++
> kernel/trace/rv/monitors/rts/rts.c | 131
> +++++++++++++++++++++++
> kernel/trace/rv/monitors/rts/rts.h | 126
> ++++++++++++++++++++++
> kernel/trace/rv/monitors/rts/rts_trace.h | 15 +++
> tools/verification/models/sched/rts.ltl | 5 +
> 8 files changed, 315 insertions(+)
> create mode 100644 kernel/trace/rv/monitors/rts/Kconfig
> create mode 100644 kernel/trace/rv/monitors/rts/rts.c
> create mode 100644 kernel/trace/rv/monitors/rts/rts.h
> create mode 100644 kernel/trace/rv/monitors/rts/rts_trace.h
> create mode 100644 tools/verification/models/sched/rts.ltl
>
> diff --git a/Documentation/trace/rv/monitor_sched.rst
> b/Documentation/trace/rv/monitor_sched.rst
> index 3f8381ad9ec7..2f9d62a1af1f 100644
> --- a/Documentation/trace/rv/monitor_sched.rst
> +++ b/Documentation/trace/rv/monitor_sched.rst
> @@ -396,6 +396,25 @@ preemption is always disabled. On non-
> ``PREEMPT_RT`` kernels, the interrupts
> might invoke a softirq to set ``need_resched`` and wake up a task.
> This is
> another special case that is currently not supported by the monitor.
>
> +Monitor rts
> +-----------
> +
> +The real-time scheduling monitor validates that tasks with real-time
> scheduling
> +policies (`SCHED_FIFO` and `SCHED_RR`) are always scheduled before
> tasks with
> +normal and extensible scheduling policies (`SCHED_OTHER`,
> `SCHED_BATCH`,
> +`SCHED_IDLE`, `SCHED_EXT`):
> +
> +.. literalinclude:: ../../../tools/verification/models/sched/rts.ltl
> +
> +Note that this monitor may report errors if real-time throttling or
> fair
> +deadline server is enabled. These mechanisms prevent real-time tasks
> from
> +monopolying the CPU by giving tasks with normal and extensible
> scheduling
> +policies a chance to run. They give system administrators a chance
> to kill a
> +misbehaved real-time task. However, they violate the scheduling
> priorities and
> +may cause latency to well-behaved real-time tasks. Thus, if you see
> errors from
> +this monitor, consider disabling real-time throttling and the fair
> deadline
> +server.
> +
> References
> ----------
>
> diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig
> index 951c2e0cda25..3992ff6ac8b1 100644
> --- a/kernel/trace/rv/Kconfig
> +++ b/kernel/trace/rv/Kconfig
> @@ -62,6 +62,7 @@ source "kernel/trace/rv/monitors/sts/Kconfig"
> source "kernel/trace/rv/monitors/nrp/Kconfig"
> source "kernel/trace/rv/monitors/sssw/Kconfig"
> source "kernel/trace/rv/monitors/opid/Kconfig"
> +source "kernel/trace/rv/monitors/rts/Kconfig"
> # Add new sched monitors here
>
> source "kernel/trace/rv/monitors/rtapp/Kconfig"
> diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile
> index 750e4ad6fa0f..d7bfc7ae6677 100644
> --- a/kernel/trace/rv/Makefile
> +++ b/kernel/trace/rv/Makefile
> @@ -17,6 +17,7 @@ obj-$(CONFIG_RV_MON_STS) += monitors/sts/sts.o
> obj-$(CONFIG_RV_MON_NRP) += monitors/nrp/nrp.o
> obj-$(CONFIG_RV_MON_SSSW) += monitors/sssw/sssw.o
> obj-$(CONFIG_RV_MON_OPID) += monitors/opid/opid.o
> +obj-$(CONFIG_RV_MON_RTS) += monitors/rts/rts.o
> # Add new monitors here
> obj-$(CONFIG_RV_REACTORS) += rv_reactors.o
> obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o
> diff --git a/kernel/trace/rv/monitors/rts/Kconfig
> b/kernel/trace/rv/monitors/rts/Kconfig
> new file mode 100644
> index 000000000000..1b780bce6133
> --- /dev/null
> +++ b/kernel/trace/rv/monitors/rts/Kconfig
> @@ -0,0 +1,17 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +#
> +config RV_MON_RTS
> + depends on RV
> + select RV_LTL_MONITOR
> + depends on RV_MON_SCHED
> + default y
> + select LTL_MON_EVENTS_IMPLICIT
> + bool "rts monitor"
> + help
> + Add support for RTS (real-time scheduling) monitor which
> validates
> + that real-time-priority tasks are scheduled before
> SCHED_OTHER tasks.
> +
> + This monitor may report an error if RT throttling or
> deadline server
> + is enabled.
> +
> + Say Y if you are debugging or testing a real-time system.
> diff --git a/kernel/trace/rv/monitors/rts/rts.c
> b/kernel/trace/rv/monitors/rts/rts.c
> new file mode 100644
> index 000000000000..e23563c47eb1
> --- /dev/null
> +++ b/kernel/trace/rv/monitors/rts/rts.c
> @@ -0,0 +1,131 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/ftrace.h>
> +#include <linux/tracepoint.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/rv.h>
> +#include <linux/sched/deadline.h>
> +#include <linux/sched/rt.h>
> +#include <rv/instrumentation.h>
> +
> +#define MODULE_NAME "rts"
> +
> +#include <trace/events/sched.h>
> +#include <rv_trace.h>
> +#include <monitors/sched/sched.h>
> +
> +#include "rts.h"
> +#include <rv/ltl_monitor.h>
> +
> +static DEFINE_PER_CPU(unsigned int, nr_queued);
> +
> +static void ltl_atoms_fetch(unsigned int cpu, struct ltl_monitor
> *mon)
> +{
> +}
> +
> +static void ltl_atoms_init(unsigned int cpu, struct ltl_monitor
> *mon,
> + bool target_creation)
> +{
> + ltl_atom_set(mon, LTL_SCHED_SWITCH, false);
> + ltl_atom_set(mon, LTL_SCHED_SWITCH_DL, false);
> + ltl_atom_set(mon, LTL_SCHED_SWITCH_RT, false);
> +
> + /*
> + * This may not be accurate, there may be enqueued RT tasks.
> But that's
> + * okay, the worst we get is a false negative. It will be
> accurate as
> + * soon as the CPU no longer has any queued RT task.
> + */
> + ltl_atom_set(mon, LTL_RT_TASK_ENQUEUED, false);
> +}
> +
> +static void handle_enqueue_task_rt(void *data, int cpu, struct
> task_struct *task)
> +{
> + unsigned int *queued = per_cpu_ptr(&nr_queued, cpu);
> +
> + (*queued)++;
> + ltl_atom_update(cpu, LTL_RT_TASK_ENQUEUED, true);
> +}
> +
> +static void handle_dequeue_task_rt(void *data, int cpu, struct
> task_struct *task)
> +{
> + unsigned int *queued = per_cpu_ptr(&nr_queued, cpu);
> +
> + /*
> + * This may not be accurate for a short time after the
> monitor is
> + * enabled, because there may be enqueued RT tasks which are
> not counted
> + * torward nr_queued. But that's okay, the worst we get is a
> false
> + * negative. nr_queued will be accurate as soon as the CPU
> no longer has
> + * any queued RT task.
> + */
> + if (*queued)
> + (*queued)--;
> + if (!*queued)
> + ltl_atom_update(cpu, LTL_RT_TASK_ENQUEUED, false);
> +}
> +
> +static void handle_sched_switch(void *data, bool preempt, struct
> task_struct *prev,
> + struct task_struct *next, unsigned
> int prev_state)
> +{
> + unsigned int cpu = smp_processor_id();
> + struct ltl_monitor *mon = ltl_get_monitor(cpu);
> +
> + ltl_atom_set(mon, LTL_SCHED_SWITCH_RT, rt_task(next));
> + ltl_atom_set(mon, LTL_SCHED_SWITCH_DL, dl_task(next));
> + ltl_atom_update(cpu, LTL_SCHED_SWITCH, true);
> +
> + ltl_atom_set(mon, LTL_SCHED_SWITCH_RT, false);
> + ltl_atom_set(mon, LTL_SCHED_SWITCH_DL, false);
> + ltl_atom_update(cpu, LTL_SCHED_SWITCH, false);
> +}
> +
> +static int enable_rts(void)
> +{
> + int retval;
> +
> + retval = ltl_monitor_init();
> + if (retval)
> + return retval;
> +
> + rv_attach_trace_probe("rts", enqueue_task_rt_tp,
> handle_enqueue_task_rt);
> + rv_attach_trace_probe("rts", dequeue_task_rt_tp,
> handle_dequeue_task_rt);
> + rv_attach_trace_probe("rts", sched_switch,
> handle_sched_switch);
> +
> + return 0;
> +}
> +
> +static void disable_rts(void)
> +{
> + rv_detach_trace_probe("rts", enqueue_task_rt_tp,
> handle_enqueue_task_rt);
> + rv_detach_trace_probe("rts", dequeue_task_rt_tp,
> handle_dequeue_task_rt);
> + rv_detach_trace_probe("rts", sched_switch,
> handle_sched_switch);
> +
> + ltl_monitor_destroy();
> +}
> +
> +/*
> + * This is the monitor register section.
> + */
> +static struct rv_monitor rv_rts = {
> + .name = "rts",
> + .description = "Validate that real-time tasks are scheduled
> before lower-priority tasks",
> + .enable = enable_rts,
> + .disable = disable_rts,
> +};
> +
> +static int __init register_rts(void)
> +{
> + return rv_register_monitor(&rv_rts, &rv_sched);
> +}
> +
> +static void __exit unregister_rts(void)
> +{
> + rv_unregister_monitor(&rv_rts);
> +}
> +
> +module_init(register_rts);
> +module_exit(unregister_rts);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Nam Cao <namcao@linutronix.de>");
> +MODULE_DESCRIPTION("rts: Validate that real-time tasks are scheduled
> before lower-priority tasks");
> diff --git a/kernel/trace/rv/monitors/rts/rts.h
> b/kernel/trace/rv/monitors/rts/rts.h
> new file mode 100644
> index 000000000000..9fbf0d97c1a7
> --- /dev/null
> +++ b/kernel/trace/rv/monitors/rts/rts.h
> @@ -0,0 +1,126 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +/*
> + * C implementation of Buchi automaton, automatically generated by
> + * tools/verification/rvgen from the linear temporal logic
> specification.
> + * For further information, see kernel documentation:
> + * Documentation/trace/rv/linear_temporal_logic.rst
> + */
> +
> +#include <linux/rv.h>
> +
> +#define MONITOR_NAME rts
> +
> +#define LTL_MONITOR_TYPE LTL_CPU_MONITOR
> +
> +enum ltl_atom {
> + LTL_RT_TASK_ENQUEUED,
> + LTL_SCHED_SWITCH,
> + LTL_SCHED_SWITCH_DL,
> + LTL_SCHED_SWITCH_RT,
> + LTL_NUM_ATOM
> +};
> +static_assert(LTL_NUM_ATOM <= RV_MAX_LTL_ATOM);
> +
> +static const char *ltl_atom_str(enum ltl_atom atom)
> +{
> + static const char *const names[] = {
> + "rt_ta_en",
> + "sc_sw",
> + "sc_sw_dl",
> + "sc_sw_rt",
> + };
> +
> + return names[atom];
> +}
> +
> +enum ltl_buchi_state {
> + S0,
> + S1,
> + S2,
> + S3,
> + S4,
> + RV_NUM_BA_STATES
> +};
> +static_assert(RV_NUM_BA_STATES <= RV_MAX_BA_STATES);
> +
> +static void ltl_start(unsigned int cpu, struct ltl_monitor *mon)
> +{
> + bool sched_switch_rt = test_bit(LTL_SCHED_SWITCH_RT, mon-
> >atoms);
> + bool sched_switch_dl = test_bit(LTL_SCHED_SWITCH_DL, mon-
> >atoms);
> + bool sched_switch = test_bit(LTL_SCHED_SWITCH, mon->atoms);
> + bool rt_task_enqueued = test_bit(LTL_RT_TASK_ENQUEUED, mon-
> >atoms);
> + bool val13 = !rt_task_enqueued;
> + bool val8 = sched_switch_dl || val13;
> + bool val9 = sched_switch_rt || val8;
> + bool val6 = !sched_switch;
> + bool val1 = !rt_task_enqueued;
> +
> + if (val1)
> + __set_bit(S0, mon->states);
> + if (val6)
> + __set_bit(S1, mon->states);
> + if (val9)
> + __set_bit(S4, mon->states);
> +}
> +
> +static void
> +ltl_possible_next_states(struct ltl_monitor *mon, unsigned int
> state, unsigned long *next)
> +{
> + bool sched_switch_rt = test_bit(LTL_SCHED_SWITCH_RT, mon-
> >atoms);
> + bool sched_switch_dl = test_bit(LTL_SCHED_SWITCH_DL, mon-
> >atoms);
> + bool sched_switch = test_bit(LTL_SCHED_SWITCH, mon->atoms);
> + bool rt_task_enqueued = test_bit(LTL_RT_TASK_ENQUEUED, mon-
> >atoms);
> + bool val13 = !rt_task_enqueued;
> + bool val8 = sched_switch_dl || val13;
> + bool val9 = sched_switch_rt || val8;
> + bool val6 = !sched_switch;
> + bool val1 = !rt_task_enqueued;
> +
> + switch (state) {
> + case S0:
> + if (val1)
> + __set_bit(S0, next);
> + if (val6)
> + __set_bit(S1, next);
> + if (val9)
> + __set_bit(S4, next);
> + break;
> + case S1:
> + if (val6)
> + __set_bit(S1, next);
> + if (val1 && val6)
> + __set_bit(S2, next);
> + if (val1 && val9)
> + __set_bit(S3, next);
> + if (val9)
> + __set_bit(S4, next);
> + break;
> + case S2:
> + if (val6)
> + __set_bit(S1, next);
> + if (val1 && val6)
> + __set_bit(S2, next);
> + if (val1 && val9)
> + __set_bit(S3, next);
> + if (val9)
> + __set_bit(S4, next);
> + break;
> + case S3:
> + if (val1)
> + __set_bit(S0, next);
> + if (val6)
> + __set_bit(S1, next);
> + if (val9)
> + __set_bit(S4, next);
> + break;
> + case S4:
> + if (val1)
> + __set_bit(S0, next);
> + if (val6)
> + __set_bit(S1, next);
> + if (val9)
> + __set_bit(S4, next);
> + break;
> + }
> +}
> diff --git a/kernel/trace/rv/monitors/rts/rts_trace.h
> b/kernel/trace/rv/monitors/rts/rts_trace.h
> new file mode 100644
> index 000000000000..0ac9e112a8b0
> --- /dev/null
> +++ b/kernel/trace/rv/monitors/rts/rts_trace.h
> @@ -0,0 +1,15 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +/*
> + * Snippet to be included in rv_trace.h
> + */
> +
> +#ifdef CONFIG_RV_MON_RTS
> +DEFINE_EVENT(event_ltl_monitor, event_rts,
> + TP_PROTO(unsigned int cpu, char *states, char *atoms, char
> *next),
> + TP_ARGS(cpu, states, atoms, next));
> +
> +DEFINE_EVENT(error_ltl_monitor, error_rts,
> + TP_PROTO(unsigned int cpu),
> + TP_ARGS(cpu));
> +#endif /* CONFIG_RV_MON_RTS */
> diff --git a/tools/verification/models/sched/rts.ltl
> b/tools/verification/models/sched/rts.ltl
> new file mode 100644
> index 000000000000..90872bca46b1
> --- /dev/null
> +++ b/tools/verification/models/sched/rts.ltl
> @@ -0,0 +1,5 @@
> +RULE = always (RT_TASK_ENQUEUED imply SCHEDULE_RT_NEXT)
> +
> +SCHEDULE_RT_NEXT = (not SCHED_SWITCH) until (SCHED_SWITCH_RT or
> EXCEPTIONS)
> +
> +EXCEPTIONS = SCHED_SWITCH_DL or not RT_TASK_ENQUEUED
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 5/5] rv: Add rts monitor
2025-08-05 8:40 ` Gabriele Monaco
@ 2025-08-05 12:22 ` Nam Cao
2025-08-05 15:45 ` Nam Cao
0 siblings, 1 reply; 32+ messages in thread
From: Nam Cao @ 2025-08-05 12:22 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel
On Tue, Aug 05, 2025 at 10:40:30AM +0200, Gabriele Monaco wrote:
> Hello Nam,
>
> I just built and booted up the monitor in a VM (virtme-ng), the
> configuration has preemptirq tracepoints and all monitors so far (as we
> have seen earlier, it doesn't build if rtapp monitors are not there
> because of the circular dependency in the tracepoints).
>
> All I did was to enable the monitor and printk reactor, but I get a
> whole lot of errors (as in, I need to quit the VM for it to stop):
>
> [ 1537.699834] rv: rts: 7: violation detected
> [ 1537.699930] rv: rts: 3: violation detected
> [ 1537.701827] rv: rts: 6: violation detected
> [ 1537.704894] rv: rts: 0: violation detected
> [ 1537.704925] rv: rts: 0: violation detected
> [ 1537.704988] rv: rts: 3: violation detected
> [ 1537.705019] rv: rts: 3: violation detected
> [ 1537.705998] rv: rts: 0: violation detected
> [ 1537.706024] rv: rts: 0: violation detected
> [ 1537.709875] rv: rts: 6: violation detected
> [ 1537.709921] rv: rts: 6: violation detected
> [ 1537.711241] rv: rts: 6: violation detected
>
> Curiously enough, I only see those CPUs (0, 3, 6 and 7).
> Other runs have different CPUs but always a small subset (e.g. 10-15,
> 6-7 only 2).
> It doesn't always occur but enabling/disabling the monitor might help
> triggering it.
>
> Any idea what is happening?
Thanks for the report. I can reproduce the issue.
Looking at the tracepoints, it makes sense why the monitor complains, some
RT tasks are enqueued but are not dequeued. Then a sched_switch happens
which switches to a non-RT task.
Most likely the dequeue tracepoint misses some cases, let me investigate..
Nam
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 5/5] rv: Add rts monitor
2025-08-05 12:22 ` Nam Cao
@ 2025-08-05 15:45 ` Nam Cao
2025-08-06 8:15 ` Gabriele Monaco
0 siblings, 1 reply; 32+ messages in thread
From: Nam Cao @ 2025-08-05 15:45 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel
On Tue, Aug 05, 2025 at 02:22:17PM +0200, Nam Cao wrote:
> On Tue, Aug 05, 2025 at 10:40:30AM +0200, Gabriele Monaco wrote:
> > Hello Nam,
> >
> > I just built and booted up the monitor in a VM (virtme-ng), the
> > configuration has preemptirq tracepoints and all monitors so far (as we
> > have seen earlier, it doesn't build if rtapp monitors are not there
> > because of the circular dependency in the tracepoints).
> >
> > All I did was to enable the monitor and printk reactor, but I get a
> > whole lot of errors (as in, I need to quit the VM for it to stop):
> >
> > [ 1537.699834] rv: rts: 7: violation detected
> > [ 1537.699930] rv: rts: 3: violation detected
> > [ 1537.701827] rv: rts: 6: violation detected
> > [ 1537.704894] rv: rts: 0: violation detected
> > [ 1537.704925] rv: rts: 0: violation detected
> > [ 1537.704988] rv: rts: 3: violation detected
> > [ 1537.705019] rv: rts: 3: violation detected
> > [ 1537.705998] rv: rts: 0: violation detected
> > [ 1537.706024] rv: rts: 0: violation detected
> > [ 1537.709875] rv: rts: 6: violation detected
> > [ 1537.709921] rv: rts: 6: violation detected
> > [ 1537.711241] rv: rts: 6: violation detected
> >
> > Curiously enough, I only see those CPUs (0, 3, 6 and 7).
> > Other runs have different CPUs but always a small subset (e.g. 10-15,
> > 6-7 only 2).
> > It doesn't always occur but enabling/disabling the monitor might help
> > triggering it.
> >
> > Any idea what is happening?
There are two issues:
- When the monitor is disabled then enabled, the number of queued task
does not reset. The monitor may mistakenly thinks there are queued RT
tasks, but there aren't.
- The enqueue tracepoint is registered before the dequeue tracepoint.
Therefore there may be a enqueue followed by a dequeue, but the monitor
missed the latter.
The first issue can be fixed by reseting the queued task number at
enabling.
For the second issue, LTL monitors need something similar to
da_monitor_enabled_##name(void). But a quick workaround is reordering the
tracepoint registerations.
So with the below diff, I no longer see the issue.
Thanks again for noticing this!
Nam
diff --git a/kernel/trace/rv/monitors/rts/rts.c b/kernel/trace/rv/monitors/rts/rts.c
index 473004b673c5..3ddbf09db0dd 100644
--- a/kernel/trace/rv/monitors/rts/rts.c
+++ b/kernel/trace/rv/monitors/rts/rts.c
@@ -81,14 +81,21 @@ static void handle_sched_switch(void *data, bool preempt, struct task_struct *pr
static int enable_rts(void)
{
+ unsigned int cpu;
int retval;
retval = ltl_monitor_init();
if (retval)
return retval;
- rv_attach_trace_probe("rts", enqueue_task_rt_tp, handle_enqueue_task_rt);
+ for_each_possible_cpu(cpu) {
+ unsigned int *queued = per_cpu_ptr(&nr_queued, cpu);
+
+ *queued = 0;
+ }
+
rv_attach_trace_probe("rts", dequeue_task_rt_tp, handle_dequeue_task_rt);
+ rv_attach_trace_probe("rts", enqueue_task_rt_tp, handle_enqueue_task_rt);
rv_attach_trace_probe("rts", sched_switch, handle_sched_switch);
return 0;
^ permalink raw reply related [flat|nested] 32+ messages in thread
* Re: [PATCH 5/5] rv: Add rts monitor
2025-08-05 15:45 ` Nam Cao
@ 2025-08-06 8:15 ` Gabriele Monaco
2025-08-06 8:46 ` Nam Cao
0 siblings, 1 reply; 32+ messages in thread
From: Gabriele Monaco @ 2025-08-06 8:15 UTC (permalink / raw)
To: Nam Cao
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel
On Tue, 2025-08-05 at 17:45 +0200, Nam Cao wrote:
> On Tue, Aug 05, 2025 at 02:22:17PM +0200, Nam Cao wrote:
> > On Tue, Aug 05, 2025 at 10:40:30AM +0200, Gabriele Monaco wrote:
> > > Hello Nam,
> > >
> > > I just built and booted up the monitor in a VM (virtme-ng), the
> > > configuration has preemptirq tracepoints and all monitors so far
> > > (as we
> > > have seen earlier, it doesn't build if rtapp monitors are not
> > > there
> > > because of the circular dependency in the tracepoints).
> > >
> > > All I did was to enable the monitor and printk reactor, but I get
> > > a
> > > whole lot of errors (as in, I need to quit the VM for it to
> > > stop):
> > >
> > > [ 1537.699834] rv: rts: 7: violation detected
> > > [ 1537.699930] rv: rts: 3: violation detected
> > > [ 1537.701827] rv: rts: 6: violation detected
> > > [ 1537.704894] rv: rts: 0: violation detected
> > > [ 1537.704925] rv: rts: 0: violation detected
> > > [ 1537.704988] rv: rts: 3: violation detected
> > > [ 1537.705019] rv: rts: 3: violation detected
> > > [ 1537.705998] rv: rts: 0: violation detected
> > > [ 1537.706024] rv: rts: 0: violation detected
> > > [ 1537.709875] rv: rts: 6: violation detected
> > > [ 1537.709921] rv: rts: 6: violation detected
> > > [ 1537.711241] rv: rts: 6: violation detected
> > >
> > > Curiously enough, I only see those CPUs (0, 3, 6 and 7).
> > > Other runs have different CPUs but always a small subset (e.g.
> > > 10-15,
> > > 6-7 only 2).
> > > It doesn't always occur but enabling/disabling the monitor might
> > > help
> > > triggering it.
> > >
> > > Any idea what is happening?
>
> There are two issues:
>
> - When the monitor is disabled then enabled, the number of queued
> task does not reset. The monitor may mistakenly thinks there are
> queued RT tasks, but there aren't.
>
> - The enqueue tracepoint is registered before the dequeue
> tracepoint.
> Therefore there may be a enqueue followed by a dequeue, but the
> monitor missed the latter.
>
> The first issue can be fixed by reseting the queued task number at
> enabling.
Mmh good catch, indeed you have a counter separated from the LTL thing
here.
>
> For the second issue, LTL monitors need something similar to
> da_monitor_enabled_##name(void). But a quick workaround is reordering
> the tracepoint registerations.
I didn't make it on time before your V2, I assume you solved already so
you might ignore this.
You kinda have something like the da_monitor_enabled: the
rv_ltl_all_atoms_known
I wonder if you could define LTL_RT_TASK_ENQUEUED only when you
actually know it (or are reasonably sure based on your internal
counter). Or at least not set all atoms until the monitor is fully set
up.
Anyway reordering the tracepoints registration is likely necessary
whatever you do, but I'm afraid a problem like this can occur pretty
often with this type of monitors.
Thanks,
Gabriele
>
> So with the below diff, I no longer see the issue.
>
> Thanks again for noticing this!
>
> Nam
>
> diff --git a/kernel/trace/rv/monitors/rts/rts.c
> b/kernel/trace/rv/monitors/rts/rts.c
> index 473004b673c5..3ddbf09db0dd 100644
> --- a/kernel/trace/rv/monitors/rts/rts.c
> +++ b/kernel/trace/rv/monitors/rts/rts.c
> @@ -81,14 +81,21 @@ static void handle_sched_switch(void *data, bool
> preempt, struct task_struct *pr
>
> static int enable_rts(void)
> {
> + unsigned int cpu;
> int retval;
>
> retval = ltl_monitor_init();
> if (retval)
> return retval;
>
> - rv_attach_trace_probe("rts", enqueue_task_rt_tp,
> handle_enqueue_task_rt);
> + for_each_possible_cpu(cpu) {
> + unsigned int *queued = per_cpu_ptr(&nr_queued, cpu);
> +
> + *queued = 0;
> + }
> +
> rv_attach_trace_probe("rts", dequeue_task_rt_tp,
> handle_dequeue_task_rt);
> + rv_attach_trace_probe("rts", enqueue_task_rt_tp,
> handle_enqueue_task_rt);
> rv_attach_trace_probe("rts", sched_switch,
> handle_sched_switch);
>
> return 0;
^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCH 5/5] rv: Add rts monitor
2025-08-06 8:15 ` Gabriele Monaco
@ 2025-08-06 8:46 ` Nam Cao
2025-08-06 9:03 ` Gabriele Monaco
0 siblings, 1 reply; 32+ messages in thread
From: Nam Cao @ 2025-08-06 8:46 UTC (permalink / raw)
To: Gabriele Monaco
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel
On Wed, Aug 06, 2025 at 10:15:48AM +0200, Gabriele Monaco wrote:
> I didn't make it on time before your V2, I assume you solved already so
> you might ignore this.
>
> You kinda have something like the da_monitor_enabled: the
> rv_ltl_all_atoms_known
>
> I wonder if you could define LTL_RT_TASK_ENQUEUED only when you
> actually know it (or are reasonably sure based on your internal
> counter). Or at least not set all atoms until the monitor is fully set
> up.
The rv_ltl_all_atoms_known() thingy is for situation where relevant
tracepoints have not been hit yet.
This case is slightly different, the tracepoint has been hit. And it is not
clear how to implement the "reasonably sure based on your internal counter"
part.
> Anyway reordering the tracepoints registration is likely necessary
> whatever you do, but I'm afraid a problem like this can occur pretty
> often with this type of monitors.
What I have in v2 is a workaround only, by reordering the tracepoint
registrations.
The root problem is not specific to this monitor, but all LTL monitors. My
idea for the real fix is the untested patch below. I will send it
separately. It is not urgent, so I can wait for your DA macro removal patch
to be merged first.
As I'm sending the patch to you, I realized that the patch effectively
nullifies ltl_atoms_init(). So I will need to fix that up..
Nam
commit 7fbb9a99f1a95e5149d476fa3d83a60be1a9a579
Author: Nam Cao <namcao@linutronix.de>
Date: Tue Aug 5 22:47:49 2025 +0200
rv: Share the da_monitor_enabled_##name() function with LTL
The LTL monitors also need the functionality that
da_monitor_enabled_##name() offers.
This is useful to prevent the automaton from being executed before the
monitor is completely enabled, preventing the situation where the
monitors run before all tracepoints are registered. This situation can
cause a false positive error, because the monitors do not see some
events and do not validate properly.
Pull da_monitor_enabled_##name() to be in the common header, and use
it for both LTL and DA.
Signed-off-by: Nam Cao <namcao@linutronix.de>
diff --git a/include/linux/rv.h b/include/linux/rv.h
index 1aa01d98e390..8a885b3665a8 100644
--- a/include/linux/rv.h
+++ b/include/linux/rv.h
@@ -119,6 +119,14 @@ int rv_register_monitor(struct rv_monitor *monitor, struct rv_monitor *parent);
int rv_get_task_monitor_slot(void);
void rv_put_task_monitor_slot(int slot);
+static inline bool rv_monitor_enabled(struct rv_monitor *monitor)
+{
+ if (unlikely(!rv_monitoring_on()))
+ return 0;
+
+ return likely(monitor->enabled);
+}
+
#ifdef CONFIG_RV_REACTORS
bool rv_reacting_on(void);
int rv_unregister_reactor(struct rv_reactor *reactor);
diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h
index 17fa4f6e5ea6..92b8a8c0b9b7 100644
--- a/include/rv/da_monitor.h
+++ b/include/rv/da_monitor.h
@@ -74,29 +74,12 @@ static inline bool da_monitoring_##name(struct da_monitor *da_mon) \
return da_mon->monitoring; \
} \
\
-/* \
- * da_monitor_enabled_##name - checks if the monitor is enabled \
- */ \
-static inline bool da_monitor_enabled_##name(void) \
-{ \
- /* global switch */ \
- if (unlikely(!rv_monitoring_on())) \
- return 0; \
- \
- /* monitor enabled */ \
- if (unlikely(!rv_##name.enabled)) \
- return 0; \
- \
- return 1; \
-} \
- \
/* \
* da_monitor_handling_event_##name - checks if the monitor is ready to handle events \
*/ \
static inline bool da_monitor_handling_event_##name(struct da_monitor *da_mon) \
{ \
- \
- if (!da_monitor_enabled_##name()) \
+ if (!rv_monitor_enabled(&rv_##name)) \
return 0; \
\
/* monitor is actually monitoring */ \
@@ -390,7 +373,7 @@ static inline bool da_handle_start_event_##name(enum events_##name event) \
{ \
struct da_monitor *da_mon; \
\
- if (!da_monitor_enabled_##name()) \
+ if (!rv_monitor_enabled(&rv_##name)) \
return 0; \
\
da_mon = da_get_monitor_##name(); \
@@ -415,7 +398,7 @@ static inline bool da_handle_start_run_event_##name(enum events_##name event)
{ \
struct da_monitor *da_mon; \
\
- if (!da_monitor_enabled_##name()) \
+ if (!rv_monitor_enabled(&rv_##name)) \
return 0; \
\
da_mon = da_get_monitor_##name(); \
@@ -475,7 +458,7 @@ da_handle_start_event_##name(struct task_struct *tsk, enum events_##name event)
{ \
struct da_monitor *da_mon; \
\
- if (!da_monitor_enabled_##name()) \
+ if (!rv_monitor_enabled(&rv_##name)) \
return 0; \
\
da_mon = da_get_monitor_##name(tsk); \
@@ -501,7 +484,7 @@ da_handle_start_run_event_##name(struct task_struct *tsk, enum events_##name eve
{ \
struct da_monitor *da_mon; \
\
- if (!da_monitor_enabled_##name()) \
+ if (!rv_monitor_enabled(&rv_##name)) \
return 0; \
\
da_mon = da_get_monitor_##name(tsk); \
diff --git a/include/rv/ltl_monitor.h b/include/rv/ltl_monitor.h
index 29bbf86d1a52..85a3d07a0303 100644
--- a/include/rv/ltl_monitor.h
+++ b/include/rv/ltl_monitor.h
@@ -16,6 +16,8 @@
#error "Please include $(MODEL_NAME).h generated by rvgen"
#endif
+#define RV_MONITOR_NAME CONCATENATE(rv_, MONITOR_NAME)
+
#if LTL_MONITOR_TYPE == LTL_TASK_MONITOR
#define TARGET_PRINT_FORMAT "%s[%d]"
@@ -33,7 +35,6 @@ typedef unsigned int monitor_target;
#endif
#ifdef CONFIG_RV_REACTORS
-#define RV_MONITOR_NAME CONCATENATE(rv_, MONITOR_NAME)
static struct rv_monitor RV_MONITOR_NAME;
static struct ltl_monitor *ltl_get_monitor(monitor_target target);
@@ -156,6 +157,9 @@ static void ltl_attempt_start(monitor_target target, struct ltl_monitor *mon)
static inline void ltl_atom_set(struct ltl_monitor *mon, enum ltl_atom atom, bool value)
{
+ if (!rv_monitor_enabled(&RV_MONITOR_NAME))
+ return;
+
__clear_bit(atom, mon->unknown_atoms);
if (value)
__set_bit(atom, mon->atoms);
^ permalink raw reply related [flat|nested] 32+ messages in thread
* Re: [PATCH 5/5] rv: Add rts monitor
2025-08-06 8:46 ` Nam Cao
@ 2025-08-06 9:03 ` Gabriele Monaco
0 siblings, 0 replies; 32+ messages in thread
From: Gabriele Monaco @ 2025-08-06 9:03 UTC (permalink / raw)
To: Nam Cao
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-trace-kernel, linux-kernel
On Wed, 2025-08-06 at 10:46 +0200, Nam Cao wrote:
> On Wed, Aug 06, 2025 at 10:15:48AM +0200, Gabriele Monaco wrote:
> > I didn't make it on time before your V2, I assume you solved
> > already so
> > you might ignore this.
> >
> > You kinda have something like the da_monitor_enabled: the
> > rv_ltl_all_atoms_known
> >
> > I wonder if you could define LTL_RT_TASK_ENQUEUED only when you
> > actually know it (or are reasonably sure based on your internal
> > counter). Or at least not set all atoms until the monitor is fully
> > set
> > up.
>
> The rv_ltl_all_atoms_known() thingy is for situation where relevant
> tracepoints have not been hit yet.
>
> This case is slightly different, the tracepoint has been hit. And it
> is not clear how to implement the "reasonably sure based on your
> internal counter" part.
>
> > Anyway reordering the tracepoints registration is likely necessary
> > whatever you do, but I'm afraid a problem like this can occur
> > pretty
> > often with this type of monitors.
>
> What I have in v2 is a workaround only, by reordering the tracepoint
> registrations.
>
> The root problem is not specific to this monitor, but all LTL
> monitors. My idea for the real fix is the untested patch below. I
> will send it separately. It is not urgent, so I can wait for your DA
> macro removal patch to be merged first.
>
Alright, I get it, let's continue with the workaround for now, I'm
going to have a look at your V2.
Thanks,
Gabriele
> As I'm sending the patch to you, I realized that the patch
> effectively nullifies ltl_atoms_init(). So I will need to fix that
> up..
>
> Nam
>
> commit 7fbb9a99f1a95e5149d476fa3d83a60be1a9a579
> Author: Nam Cao <namcao@linutronix.de>
> Date: Tue Aug 5 22:47:49 2025 +0200
>
> rv: Share the da_monitor_enabled_##name() function with LTL
>
> The LTL monitors also need the functionality that
> da_monitor_enabled_##name() offers.
>
> This is useful to prevent the automaton from being executed
> before the
> monitor is completely enabled, preventing the situation where the
> monitors run before all tracepoints are registered. This
> situation can
> cause a false positive error, because the monitors do not see
> some
> events and do not validate properly.
>
> Pull da_monitor_enabled_##name() to be in the common header, and
> use
> it for both LTL and DA.
>
> Signed-off-by: Nam Cao <namcao@linutronix.de>
>
> diff --git a/include/linux/rv.h b/include/linux/rv.h
> index 1aa01d98e390..8a885b3665a8 100644
> --- a/include/linux/rv.h
> +++ b/include/linux/rv.h
> @@ -119,6 +119,14 @@ int rv_register_monitor(struct rv_monitor
> *monitor, struct rv_monitor *parent);
> int rv_get_task_monitor_slot(void);
> void rv_put_task_monitor_slot(int slot);
>
> +static inline bool rv_monitor_enabled(struct rv_monitor *monitor)
> +{
> + if (unlikely(!rv_monitoring_on()))
> + return 0;
> +
> + return likely(monitor->enabled);
> +}
> +
> #ifdef CONFIG_RV_REACTORS
> bool rv_reacting_on(void);
> int rv_unregister_reactor(struct rv_reactor *reactor);
> diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h
> index 17fa4f6e5ea6..92b8a8c0b9b7 100644
> --- a/include/rv/da_monitor.h
> +++ b/include/rv/da_monitor.h
> @@ -74,29 +74,12 @@ static inline bool da_monitoring_##name(struct
> da_monitor *da_mon) \
> return da_mon-
> >monitoring; \
> }
> \
>
> \
> -
> /* \
> - * da_monitor_enabled_##name - checks if the monitor is
> enabled \
> -
> */ \
> -static inline bool
> da_monitor_enabled_##name(void) \
> -
> { \
> - /* global switch
> */ \
> - if
> (unlikely(!rv_monitoring_on())) \
> - return
> 0; \
> -
> \
> - /* monitor enabled
> */ \
> - if
> (unlikely(!rv_##name.enabled)) \
> - return
> 0; \
> -
> \
> - return
> 1; \
> -
> } \
> -
> \
> /*
> \
> * da_monitor_handling_event_##name - checks if the monitor is ready
> to handle events \
>
> */ \
> static inline bool da_monitor_handling_event_##name(struct
> da_monitor *da_mon) \
> {
> \
> -
> \
> - if
> (!da_monitor_enabled_##name()) \
> + if
> (!rv_monitor_enabled(&rv_##name)) \
> return
> 0; \
>
> \
> /* monitor is actually monitoring
> */ \
> @@ -390,7 +373,7 @@ static inline bool
> da_handle_start_event_##name(enum events_##name
> event) \
> {
> \
> struct da_monitor
> *da_mon; \
>
> \
> - if
> (!da_monitor_enabled_##name()) \
> + if
> (!rv_monitor_enabled(&rv_##name)) \
> return
> 0; \
>
> \
> da_mon =
> da_get_monitor_##name(); \
> @@ -415,7 +398,7 @@ static inline bool
> da_handle_start_run_event_##name(enum events_##name event)
> {
> \
> struct da_monitor
> *da_mon; \
>
> \
> - if
> (!da_monitor_enabled_##name()) \
> + if
> (!rv_monitor_enabled(&rv_##name)) \
> return
> 0; \
>
> \
> da_mon =
> da_get_monitor_##name(); \
> @@ -475,7 +458,7 @@ da_handle_start_event_##name(struct task_struct
> *tsk, enum events_##name event)
> {
> \
> struct da_monitor
> *da_mon; \
>
> \
> - if
> (!da_monitor_enabled_##name()) \
> + if
> (!rv_monitor_enabled(&rv_##name)) \
> return
> 0; \
>
> \
> da_mon =
> da_get_monitor_##name(tsk); \
> @@ -501,7 +484,7 @@ da_handle_start_run_event_##name(struct
> task_struct *tsk, enum events_##name eve
> {
> \
> struct da_monitor
> *da_mon; \
>
> \
> - if
> (!da_monitor_enabled_##name()) \
> + if
> (!rv_monitor_enabled(&rv_##name)) \
> return
> 0; \
>
> \
> da_mon =
> da_get_monitor_##name(tsk); \
> diff --git a/include/rv/ltl_monitor.h b/include/rv/ltl_monitor.h
> index 29bbf86d1a52..85a3d07a0303 100644
> --- a/include/rv/ltl_monitor.h
> +++ b/include/rv/ltl_monitor.h
> @@ -16,6 +16,8 @@
> #error "Please include $(MODEL_NAME).h generated by rvgen"
> #endif
>
> +#define RV_MONITOR_NAME CONCATENATE(rv_, MONITOR_NAME)
> +
> #if LTL_MONITOR_TYPE == LTL_TASK_MONITOR
>
> #define TARGET_PRINT_FORMAT "%s[%d]"
> @@ -33,7 +35,6 @@ typedef unsigned int monitor_target;
> #endif
>
> #ifdef CONFIG_RV_REACTORS
> -#define RV_MONITOR_NAME CONCATENATE(rv_, MONITOR_NAME)
> static struct rv_monitor RV_MONITOR_NAME;
>
> static struct ltl_monitor *ltl_get_monitor(monitor_target target);
> @@ -156,6 +157,9 @@ static void ltl_attempt_start(monitor_target
> target, struct ltl_monitor *mon)
>
> static inline void ltl_atom_set(struct ltl_monitor *mon, enum
> ltl_atom atom, bool value)
> {
> + if (!rv_monitor_enabled(&RV_MONITOR_NAME))
> + return;
> +
> __clear_bit(atom, mon->unknown_atoms);
> if (value)
> __set_bit(atom, mon->atoms);
^ permalink raw reply [flat|nested] 32+ messages in thread
end of thread, other threads:[~2025-08-06 9:03 UTC | newest]
Thread overview: 32+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-30 12:45 [PATCH 0/5] rv: LTL per-cpu monitor type and real-time scheduling monitor Nam Cao
2025-07-30 12:45 ` [PATCH 1/5] rv/ltl: Prepare for other monitor types Nam Cao
2025-07-31 9:04 ` Gabriele Monaco
2025-07-31 9:28 ` Nam Cao
2025-07-31 10:14 ` Gabriele Monaco
2025-07-30 12:45 ` [PATCH 2/5] rv/ltl: Support per-cpu monitors Nam Cao
2025-07-31 8:02 ` Gabriele Monaco
2025-08-01 6:26 ` Nam Cao
2025-07-30 12:45 ` [PATCH 3/5] verification/rvgen/ltl: Support per-cpu monitor generation Nam Cao
2025-07-30 12:45 ` [PATCH 4/5] sched: Add rt task enqueue/dequeue trace points Nam Cao
2025-07-30 13:53 ` Gabriele Monaco
2025-07-30 15:18 ` Nam Cao
2025-07-30 16:18 ` Gabriele Monaco
2025-07-31 7:35 ` Nam Cao
2025-07-31 8:39 ` Gabriele Monaco
2025-08-01 3:42 ` K Prateek Nayak
2025-08-01 7:29 ` Nam Cao
2025-08-01 9:56 ` K Prateek Nayak
2025-08-01 11:04 ` Gabriele Monaco
2025-08-04 3:07 ` K Prateek Nayak
2025-08-04 5:49 ` Nam Cao
2025-07-30 12:45 ` [PATCH 5/5] rv: Add rts monitor Nam Cao
2025-07-31 7:47 ` Gabriele Monaco
2025-08-01 7:58 ` Nam Cao
2025-08-01 9:14 ` Gabriele Monaco
2025-08-04 6:05 ` Nam Cao
2025-08-05 8:40 ` Gabriele Monaco
2025-08-05 12:22 ` Nam Cao
2025-08-05 15:45 ` Nam Cao
2025-08-06 8:15 ` Gabriele Monaco
2025-08-06 8:46 ` Nam Cao
2025-08-06 9:03 ` Gabriele Monaco
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).